Merge remote-tracking branch 'upstream/3.4' into merge-3.4

7 years ago · a8b0db4e5d
parent 29e88e50ff 4b895a4d1f
commit a8b0db4e5d
77 changed files with 584 additions and 441 deletions
--- a/modules/core/include/opencv2/core/ocl.hpp
+++ b/modules/core/include/opencv2/core/ocl.hpp
@ -59,7 +59,7 @@ CV_EXPORTS_W void finish();
 CV_EXPORTS bool haveSVM();

 class CV_EXPORTS Context;
-class CV_EXPORTS Device;
+class CV_EXPORTS_W_SIMPLE Device;
 class CV_EXPORTS Kernel;
 class CV_EXPORTS Program;
 class CV_EXPORTS ProgramSource;
@ -67,14 +67,14 @@ class CV_EXPORTS Queue;
 class CV_EXPORTS PlatformInfo;
 class CV_EXPORTS Image2D;

-class CV_EXPORTS Device
+class CV_EXPORTS_W_SIMPLE Device
 {
 public:
-    Device();
+    CV_WRAP Device();
    explicit Device(void* d);
    Device(const Device& d);
    Device& operator = (const Device& d);
-    ~Device();
+    CV_WRAP ~Device();

    void set(void* d);

@ -89,24 +89,24 @@ public:
        TYPE_ALL         = 0xFFFFFFFF
    };

-    String name() const;
-    String extensions() const;
-    bool isExtensionSupported(const String& extensionName) const;
-    String version() const;
-    String vendorName() const;
-    String OpenCL_C_Version() const;
-    String OpenCLVersion() const;
-    int deviceVersionMajor() const;
-    int deviceVersionMinor() const;
-    String driverVersion() const;
+    CV_WRAP String name() const;
+    CV_WRAP String extensions() const;
+    CV_WRAP bool isExtensionSupported(const String& extensionName) const;
+    CV_WRAP String version() const;
+    CV_WRAP String vendorName() const;
+    CV_WRAP String OpenCL_C_Version() const;
+    CV_WRAP String OpenCLVersion() const;
+    CV_WRAP int deviceVersionMajor() const;
+    CV_WRAP int deviceVersionMinor() const;
+    CV_WRAP String driverVersion() const;
    void* ptr() const;

-    int type() const;
+    CV_WRAP int type() const;

-    int addressBits() const;
-    bool available() const;
-    bool compilerAvailable() const;
-    bool linkerAvailable() const;
+    CV_WRAP int addressBits() const;
+    CV_WRAP bool available() const;
+    CV_WRAP bool compilerAvailable() const;
+    CV_WRAP bool linkerAvailable() const;

    enum
    {
@ -119,21 +119,21 @@ public:
        FP_SOFT_FLOAT=(1 << 6),
        FP_CORRECTLY_ROUNDED_DIVIDE_SQRT=(1 << 7)
    };
-    int doubleFPConfig() const;
-    int singleFPConfig() const;
-    int halfFPConfig() const;
+    CV_WRAP int doubleFPConfig() const;
+    CV_WRAP int singleFPConfig() const;
+    CV_WRAP int halfFPConfig() const;

-    bool endianLittle() const;
-    bool errorCorrectionSupport() const;
+    CV_WRAP bool endianLittle() const;
+    CV_WRAP bool errorCorrectionSupport() const;

    enum
    {
        EXEC_KERNEL=(1 << 0),
        EXEC_NATIVE_KERNEL=(1 << 1)
    };
-    int executionCapabilities() const;
+    CV_WRAP int executionCapabilities() const;

-    size_t globalMemCacheSize() const;
+    CV_WRAP size_t globalMemCacheSize() const;

    enum
    {
@ -141,38 +141,38 @@ public:
        READ_ONLY_CACHE=1,
        READ_WRITE_CACHE=2
    };
-    int globalMemCacheType() const;
-    int globalMemCacheLineSize() const;
-    size_t globalMemSize() const;
+    CV_WRAP int globalMemCacheType() const;
+    CV_WRAP int globalMemCacheLineSize() const;
+    CV_WRAP size_t globalMemSize() const;

-    size_t localMemSize() const;
+    CV_WRAP size_t localMemSize() const;
    enum
    {
        NO_LOCAL_MEM=0,
        LOCAL_IS_LOCAL=1,
        LOCAL_IS_GLOBAL=2
    };
-    int localMemType() const;
-    bool hostUnifiedMemory() const;
+    CV_WRAP int localMemType() const;
+    CV_WRAP bool hostUnifiedMemory() const;

-    bool imageSupport() const;
+    CV_WRAP bool imageSupport() const;

-    bool imageFromBufferSupport() const;
+    CV_WRAP bool imageFromBufferSupport() const;
    uint imagePitchAlignment() const;
    uint imageBaseAddressAlignment() const;

    /// deprecated, use isExtensionSupported() method (probably with "cl_khr_subgroups" value)
-    bool intelSubgroupsSupport() const;
+    CV_WRAP bool intelSubgroupsSupport() const;

-    size_t image2DMaxWidth() const;
-    size_t image2DMaxHeight() const;
+    CV_WRAP size_t image2DMaxWidth() const;
+    CV_WRAP size_t image2DMaxHeight() const;

-    size_t image3DMaxWidth() const;
-    size_t image3DMaxHeight() const;
-    size_t image3DMaxDepth() const;
+    CV_WRAP size_t image3DMaxWidth() const;
+    CV_WRAP size_t image3DMaxHeight() const;
+    CV_WRAP size_t image3DMaxDepth() const;

-    size_t imageMaxBufferSize() const;
-    size_t imageMaxArraySize() const;
+    CV_WRAP size_t imageMaxBufferSize() const;
+    CV_WRAP size_t imageMaxArraySize() const;

    enum
    {
@ -181,53 +181,53 @@ public:
        VENDOR_INTEL=2,
        VENDOR_NVIDIA=3
    };
-    int vendorID() const;
+    CV_WRAP int vendorID() const;
    // FIXIT
    // dev.isAMD() doesn't work for OpenCL CPU devices from AMD OpenCL platform.
    // This method should use platform name instead of vendor name.
    // After fix restore code in arithm.cpp: ocl_compare()
-    inline bool isAMD() const { return vendorID() == VENDOR_AMD; }
-    inline bool isIntel() const { return vendorID() == VENDOR_INTEL; }
-    inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; }
+    CV_WRAP inline bool isAMD() const { return vendorID() == VENDOR_AMD; }
+    CV_WRAP inline bool isIntel() const { return vendorID() == VENDOR_INTEL; }
+    CV_WRAP inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; }

-    int maxClockFrequency() const;
-    int maxComputeUnits() const;
-    int maxConstantArgs() const;
-    size_t maxConstantBufferSize() const;
+    CV_WRAP int maxClockFrequency() const;
+    CV_WRAP int maxComputeUnits() const;
+    CV_WRAP int maxConstantArgs() const;
+    CV_WRAP size_t maxConstantBufferSize() const;

-    size_t maxMemAllocSize() const;
-    size_t maxParameterSize() const;
+    CV_WRAP size_t maxMemAllocSize() const;
+    CV_WRAP size_t maxParameterSize() const;

-    int maxReadImageArgs() const;
-    int maxWriteImageArgs() const;
-    int maxSamplers() const;
+    CV_WRAP int maxReadImageArgs() const;
+    CV_WRAP int maxWriteImageArgs() const;
+    CV_WRAP int maxSamplers() const;

-    size_t maxWorkGroupSize() const;
-    int maxWorkItemDims() const;
+    CV_WRAP size_t maxWorkGroupSize() const;
+    CV_WRAP int maxWorkItemDims() const;
    void maxWorkItemSizes(size_t*) const;

-    int memBaseAddrAlign() const;
+    CV_WRAP int memBaseAddrAlign() const;

-    int nativeVectorWidthChar() const;
-    int nativeVectorWidthShort() const;
-    int nativeVectorWidthInt() const;
-    int nativeVectorWidthLong() const;
-    int nativeVectorWidthFloat() const;
-    int nativeVectorWidthDouble() const;
-    int nativeVectorWidthHalf() const;
+    CV_WRAP int nativeVectorWidthChar() const;
+    CV_WRAP int nativeVectorWidthShort() const;
+    CV_WRAP int nativeVectorWidthInt() const;
+    CV_WRAP int nativeVectorWidthLong() const;
+    CV_WRAP int nativeVectorWidthFloat() const;
+    CV_WRAP int nativeVectorWidthDouble() const;
+    CV_WRAP int nativeVectorWidthHalf() const;

-    int preferredVectorWidthChar() const;
-    int preferredVectorWidthShort() const;
-    int preferredVectorWidthInt() const;
-    int preferredVectorWidthLong() const;
-    int preferredVectorWidthFloat() const;
-    int preferredVectorWidthDouble() const;
-    int preferredVectorWidthHalf() const;
+    CV_WRAP int preferredVectorWidthChar() const;
+    CV_WRAP int preferredVectorWidthShort() const;
+    CV_WRAP int preferredVectorWidthInt() const;
+    CV_WRAP int preferredVectorWidthLong() const;
+    CV_WRAP int preferredVectorWidthFloat() const;
+    CV_WRAP int preferredVectorWidthDouble() const;
+    CV_WRAP int preferredVectorWidthHalf() const;

-    size_t printfBufferSize() const;
-    size_t profilingTimerResolution() const;
+    CV_WRAP size_t printfBufferSize() const;
+    CV_WRAP size_t profilingTimerResolution() const;

-    static const Device& getDefault();
+    CV_WRAP static const Device& getDefault();

 protected:
    struct Impl;
--- a/modules/core/include/opencv2/core/opengl.hpp
+++ b/modules/core/include/opencv2/core/opengl.hpp
@ -558,13 +558,11 @@ by the call to mapGLBuffer() function.
 */
 CV_EXPORTS void unmapGLBuffer(UMat& u);

+//! @}
 }} // namespace cv::ogl

 namespace cv { namespace cuda {

-//! @addtogroup cuda
-//! @{
-
 /** @brief Sets a CUDA device and initializes it for the current thread with OpenGL interoperability.

 This function should be explicitly called after OpenGL context creation and before any CUDA calls.
@ -573,8 +571,6 @@ This function should be explicitly called after OpenGL context creation and befo
 */
 CV_EXPORTS void setGlDevice(int device = 0);

-//! @}
-
 }}

 //! @cond IGNORED
--- a/modules/core/include/opencv2/core/private.hpp
+++ b/modules/core/include/opencv2/core/private.hpp
@ -704,12 +704,12 @@ CV_EXPORTS InstrNode*   getCurrentNode();
    if(::cv::instr::useInstrumentation()){\
        ::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, NULL, false, TYPE, IMPL);\
        try{\
-            auto status = ((FUN)(__VA_ARGS__));\
+            auto instrStatus = ((FUN)(__VA_ARGS__));\
            if(ERROR_COND){\
                ::cv::instr::getCurrentNode()->m_payload.m_funError = true;\
                CV_INSTRUMENT_MARK_META(IMPL, #FUN " - BadExit");\
            }\
-            return status;\
+            return instrStatus;\
        }catch(...){\
            ::cv::instr::getCurrentNode()->m_payload.m_funError = true;\
            CV_INSTRUMENT_MARK_META(IMPL, #FUN " - BadExit");\
@ -750,7 +750,7 @@ CV_EXPORTS InstrNode*   getCurrentNode();
 // Wrapper region instrumentation macro
 #define CV_INSTRUMENT_REGION_IPP();          CV_INSTRUMENT_REGION_META(__FUNCTION__, false, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_IPP)
 // Function instrumentation macro
-#define CV_INSTRUMENT_FUN_IPP(FUN, ...)     CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_IPP, status < 0, FUN, __VA_ARGS__)
+#define CV_INSTRUMENT_FUN_IPP(FUN, ...)     CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_IPP, instrStatus < 0, FUN, __VA_ARGS__)
 // Diagnostic markers
 #define CV_INSTRUMENT_MARK_IPP(NAME)        CV_INSTRUMENT_MARK_META(::cv::instr::IMPL_IPP, NAME)

--- a/modules/core/src/ocl.cpp
+++ b/modules/core/src/ocl.cpp
@ -3073,7 +3073,7 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
            dims == 1 ? 64 : dims == 2 ? (i == 0 ? 256 : 8) : dims == 3 ? (8>>(int)(i>0)) : 1;
        CV_Assert( val > 0 );
        total *= _globalsize[i];
-        if (_globalsize[i] == 1)
+        if (_globalsize[i] == 1 && !_localsize)
            val = 1;
        globalsize[i] = divUp(_globalsize[i], (unsigned int)val) * val;
    }
@ -3086,7 +3086,7 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
 bool Kernel::Impl::run(int dims, size_t globalsize[], size_t localsize[],
        bool sync, int64* timeNS, const Queue& q)
 {
-    CV_INSTRUMENT_REGION_OPENCL_RUN(name.c_str(););
+    CV_INSTRUMENT_REGION_OPENCL_RUN(name.c_str());

    if (!handle || isInProgress)
        return false;
--- a/modules/core/src/opengl.cpp
+++ b/modules/core/src/opengl.cpp
@ -1434,14 +1434,14 @@ void cv::ogl::render(const ogl::Texture2D& tex, Rect_<double> wndRect, Rect_<dou
        gl::TexParameteri(gl::TEXTURE_2D, gl::TEXTURE_MIN_FILTER, gl::LINEAR);
        CV_CheckGlError();

-        const float vertex[] =
+        const double vertex[] =
        {
-            wndRect.x, wndRect.y, 0.0f,
-            wndRect.x, (wndRect.y + wndRect.height), 0.0f,
-            wndRect.x + wndRect.width, (wndRect.y + wndRect.height), 0.0f,
-            wndRect.x + wndRect.width, wndRect.y, 0.0f
+            wndRect.x, wndRect.y, 0.0,
+            wndRect.x, (wndRect.y + wndRect.height), 0.0,
+            wndRect.x + wndRect.width, (wndRect.y + wndRect.height), 0.0,
+            wndRect.x + wndRect.width, wndRect.y, 0.0
        };
-        const float texCoords[] =
+        const double texCoords[] =
        {
            texRect.x, texRect.y,
            texRect.x, texRect.y + texRect.height,
@ -1454,7 +1454,7 @@ void cv::ogl::render(const ogl::Texture2D& tex, Rect_<double> wndRect, Rect_<dou
        gl::EnableClientState(gl::TEXTURE_COORD_ARRAY);
        CV_CheckGlError();

-        gl::TexCoordPointer(2, gl::FLOAT, 0, texCoords);
+        gl::TexCoordPointer(2, gl::DOUBLE, 0, texCoords);
        CV_CheckGlError();

        gl::DisableClientState(gl::NORMAL_ARRAY);
@ -1464,7 +1464,7 @@ void cv::ogl::render(const ogl::Texture2D& tex, Rect_<double> wndRect, Rect_<dou
        gl::EnableClientState(gl::VERTEX_ARRAY);
        CV_CheckGlError();

-        gl::VertexPointer(3, gl::FLOAT, 0, vertex);
+        gl::VertexPointer(3, gl::DOUBLE, 0, vertex);
        CV_CheckGlError();

        gl::DrawArrays(gl::QUADS, 0, 4);
--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@ -1768,7 +1768,7 @@ FLAGS getFlags()

 NodeData::NodeData(const char* funName, const char* fileName, int lineNum, void* retAddress, bool alwaysExpand, cv::instr::TYPE instrType, cv::instr::IMPL implType)
 {
-    m_funName       = funName;
+    m_funName       = funName ? cv::String(funName) : cv::String();  // std::string doesn't accept NULL
    m_instrType     = instrType;
    m_implType      = implType;
    m_fileName      = fileName;
--- a/modules/core/test/ocl/test_gemm.cpp
+++ b/modules/core/test/ocl/test_gemm.cpp
@ -145,6 +145,21 @@ OCL_INSTANTIATE_TEST_CASE_P(Core, Gemm, ::testing::Combine(
                            testing::Values(CV_32FC1, CV_32FC2, CV_64FC1, CV_64FC2),
                            Bool(), Bool(), Bool(), Bool()));

+// Test for non-Intel GPUs to check CL_INVALID_WORK_GROUP_SIZE when localsize > globalsize
+OCL_TEST(Gemm, small)
+{
+    UMat A(2, 3, CV_32F), B(4, 3, CV_32F), uC(2, 4, CV_32F);
+    Mat C(2, 4, CV_32F);
+
+    randu(A, -1, 1);
+    randu(B, -1, 1);
+
+    OCL_OFF(cv::gemm(A, B, 1, noArray(), 0, C, GEMM_2_T));
+    OCL_ON(cv::gemm(A, B, 1, noArray(), 0, uC, GEMM_2_T));
+
+    EXPECT_LE(cvtest::norm(C, uC, cv::NORM_INF), 1e-5);
+}
+
 } } // namespace opencv_test::ocl

 #endif // HAVE_OPENCL
--- a/modules/core/test/test_intrin.cpp
+++ b/modules/core/test/test_intrin.cpp
@ -12,6 +12,9 @@
 #include "test_intrin256.simd.hpp"
 #include "test_intrin256.simd_declarations.hpp"

+#ifdef _MSC_VER
+# pragma warning(disable:4702)  // unreachable code
+#endif

 namespace opencv_test { namespace hal {

--- a/modules/dnn/CMakeLists.txt
+++ b/modules/dnn/CMakeLists.txt
@ -36,7 +36,6 @@ else()
                                       -Wunused-parameter -Wunused-local-typedefs -Wsign-compare -Wsign-promo
                                       -Wundef -Wtautological-undefined-compare -Wignored-qualifiers -Wextra
                                       -Wunused-function -Wunused-const-variable -Wdeprecated-declarations
-                                       -Werror=non-virtual-dtor
  )
 endif()

--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@ -528,6 +528,11 @@ CV__DNN_INLINE_NS_BEGIN
        /** @brief Returns indexes of layers with unconnected outputs.
         */
        CV_WRAP std::vector<int> getUnconnectedOutLayers() const;
+
+        /** @brief Returns names of layers with unconnected outputs.
+         */
+        CV_WRAP std::vector<String> getUnconnectedOutLayersNames() const;
+
        /** @brief Returns input and output shapes for all layers in loaded model;
         *  preliminary inferencing isn't necessary.
         *  @param netInputShapes shapes for all input blobs in net input layer.
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@ -1078,12 +1078,22 @@ struct Net::Impl
            }
 #else
            {
-                if (!DNN_OPENCL_ALLOW_ALL_DEVICES
-                    && !(ocl::Device::getDefault().isIntel() && ocl::Device::getDefault().type() == ocl::Device::TYPE_GPU) // Current implementation is only valid for Intel GPU (#11494)
-                    )
+                if (!DNN_OPENCL_ALLOW_ALL_DEVICES)
                {
-                    CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with Intel GPUs only), switching to CPU.");
-                    preferableTarget = DNN_TARGET_CPU;
+                    // Current implementation is only valid for GPU (#11494)
+                    if (ocl::Device::getDefault().type() != ocl::Device::TYPE_GPU)
+                    {
+                        CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with GPUs only), switching to CPU.");
+                        preferableTarget = DNN_TARGET_CPU;
+                    }
+                    else if (preferableTarget == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel())
+                    {
+                        CV_LOG_WARNING(NULL,
+                            "DNN: OpenCL target with fp16 precision is not supported "
+                            "with current OpenCL device (tested with Intel GPUs only), "
+                            "switching to OpenCL with fp32 precision.");
+                        preferableTarget = DNN_TARGET_OPENCL;
+                    }
                }
            }
 #endif
@ -2789,6 +2799,18 @@ std::vector<int> Net::getUnconnectedOutLayers() const
    return layersIds;
 }

+std::vector<String> Net::getUnconnectedOutLayersNames() const
+{
+    std::vector<int> ids = getUnconnectedOutLayers();
+    const size_t n = ids.size();
+    std::vector<String> names(n);
+    for (size_t i = 0; i < n; ++i)
+    {
+        names[i] = impl->layers[ids[i]].name;
+    }
+    return names;
+}
+
 void Net::getLayersShapes(const ShapesVec& netInputShapes,
                          std::vector<int>& layersIds,
                          std::vector<ShapesVec>& inLayersShapes,
--- a/modules/dnn/src/layers/batch_norm_layer.cpp
+++ b/modules/dnn/src/layers/batch_norm_layer.cpp
@ -230,8 +230,7 @@ public:
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());

-        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                   forward_ocl(inputs_arr, outputs_arr, internals_arr))

        if (inputs_arr.depth() == CV_16S)
--- a/modules/dnn/src/layers/blank_layer.cpp
+++ b/modules/dnn/src/layers/blank_layer.cpp
@ -95,16 +95,9 @@ public:
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());

-        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                   forward_ocl(inputs_arr, outputs_arr, internals_arr))

-        if (inputs_arr.depth() == CV_16S)
-        {
-            forward_fallback(inputs_arr, outputs_arr, internals_arr);
-            return;
-        }
-
        std::vector<Mat> inputs, outputs;
        inputs_arr.getMatVector(inputs);
        outputs_arr.getMatVector(outputs);
--- a/modules/dnn/src/layers/concat_layer.cpp
+++ b/modules/dnn/src/layers/concat_layer.cpp
@ -237,16 +237,9 @@ public:
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());

-        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                   forward_ocl(inputs_arr, outputs_arr, internals_arr))

-        if (inputs_arr.depth() == CV_16S)
-        {
-            forward_fallback(inputs_arr, outputs_arr, internals_arr);
-            return;
-        }
-
        std::vector<Mat> inputs, outputs;
        inputs_arr.getMatVector(inputs);
        outputs_arr.getMatVector(outputs);
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@ -1529,8 +1529,7 @@ public:
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());

-        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                   forward_ocl(inputs_arr, outputs_arr, internals_arr));

        if (inputs_arr.depth() == CV_16S)
--- a/modules/dnn/src/layers/crop_layer.cpp
+++ b/modules/dnn/src/layers/crop_layer.cpp
@ -137,12 +137,6 @@ public:
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());

-        if (inputs_arr.depth() == CV_16S)
-        {
-            forward_fallback(inputs_arr, outputs_arr, internals_arr);
-            return;
-        }
-
        std::vector<Mat> inputs, outputs;
        inputs_arr.getMatVector(inputs);
        outputs_arr.getMatVector(outputs);
--- a/modules/dnn/src/layers/detection_output_layer.cpp
+++ b/modules/dnn/src/layers/detection_output_layer.cpp
@ -415,8 +415,7 @@ public:

        if (_bboxesNormalized)
        {
-            CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                       OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+            CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                       forward_ocl(inputs_arr, outputs_arr, internals_arr))
        }
        if (inputs_arr.depth() == CV_16S)
--- a/modules/dnn/src/layers/eltwise_layer.cpp
+++ b/modules/dnn/src/layers/eltwise_layer.cpp
@ -354,8 +354,7 @@ public:
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());

-        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                   forward_ocl(inputs_arr, outputs_arr, internals_arr))

        if (inputs_arr.depth() == CV_16S)
--- a/modules/dnn/src/layers/flatten_layer.cpp
+++ b/modules/dnn/src/layers/flatten_layer.cpp
@ -135,16 +135,9 @@ public:
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());

        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                   outputs_arr.isUMatVector() &&
-                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+                   outputs_arr.isUMatVector(),
                   forward_ocl(inputs_arr, outputs_arr, internals_arr))

-        if (inputs_arr.depth() == CV_16S)
-        {
-            forward_fallback(inputs_arr, outputs_arr, internals_arr);
-            return;
-        }
-
        std::vector<Mat> inputs, outputs;
        inputs_arr.getMatVector(inputs);
        outputs_arr.getMatVector(outputs);
--- a/modules/dnn/src/layers/fully_connected_layer.cpp
+++ b/modules/dnn/src/layers/fully_connected_layer.cpp
@ -389,8 +389,7 @@ public:
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());

-        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                   forward_ocl(inputs_arr, outputs_arr, internals_arr))

        if (inputs_arr.depth() == CV_16S)
--- a/modules/dnn/src/layers/lrn_layer.cpp
+++ b/modules/dnn/src/layers/lrn_layer.cpp
@ -148,8 +148,7 @@ public:

        CV_Assert(inputs_arr.total() == outputs_arr.total());

-        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                   forward_ocl(inputs_arr, outputs_arr, internals_arr))

        if (inputs_arr.depth() == CV_16S)
--- a/modules/dnn/src/layers/normalize_bbox_layer.cpp
+++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp
@ -184,8 +184,7 @@ public:
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());

-        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                   forward_ocl(inputs_arr, outputs_arr, internals_arr))

        if (inputs_arr.depth() == CV_16S)
--- a/modules/dnn/src/layers/padding_layer.cpp
+++ b/modules/dnn/src/layers/padding_layer.cpp
@ -99,19 +99,21 @@ public:
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());

-        if (inputs_arr.depth() == CV_16S)
-        {
-            forward_fallback(inputs_arr, outputs_arr, internals_arr);
-            return;
-        }
-
        std::vector<Mat> inputs, outputs;
        inputs_arr.getMatVector(inputs);
        outputs_arr.getMatVector(outputs);

        if (paddingType == "constant")
        {
-            outputs[0].setTo(paddingValue);
+            if (inputs_arr.depth() == CV_16S)
+            {
+                std::vector<float> paddingValue_fp32(1, paddingValue);
+                std::vector<int16_t> paddingValue_fp16(1);
+                convertFp16(paddingValue_fp32, paddingValue_fp16);
+                outputs[0].setTo(paddingValue_fp16[0]);
+            }
+            else
+                outputs[0].setTo(paddingValue);
            inputs[0].copyTo(outputs[0](dstRanges));
        }
        else if (paddingType == "reflect")
--- a/modules/dnn/src/layers/permute_layer.cpp
+++ b/modules/dnn/src/layers/permute_layer.cpp
@ -304,8 +304,7 @@ public:
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());

-        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                   forward_ocl(inputs_arr, outputs_arr, internals_arr))

        if (inputs_arr.depth() == CV_16S)
--- a/modules/dnn/src/layers/prior_box_layer.cpp
+++ b/modules/dnn/src/layers/prior_box_layer.cpp
@ -402,8 +402,7 @@ public:
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());

-        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                   forward_ocl(inputs_arr, outputs_arr, internals_arr))

        if (inputs_arr.depth() == CV_16S)
--- a/modules/dnn/src/layers/region_layer.cpp
+++ b/modules/dnn/src/layers/region_layer.cpp
@ -196,8 +196,7 @@ public:
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());

-        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                   forward_ocl(inputs_arr, outputs_arr, internals_arr))

        if (inputs_arr.depth() == CV_16S)
--- a/modules/dnn/src/layers/reorg_layer.cpp
+++ b/modules/dnn/src/layers/reorg_layer.cpp
@ -160,8 +160,7 @@ public:
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());

-        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                   forward_ocl(inputs_arr, outputs_arr, internals_arr))

        if (inputs_arr.depth() == CV_16S)
--- a/modules/dnn/src/layers/reshape_layer.cpp
+++ b/modules/dnn/src/layers/reshape_layer.cpp
@ -233,16 +233,9 @@ public:
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());

-        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                   forward_ocl(inputs_arr, outputs_arr, internals_arr))

-        if (inputs_arr.depth() == CV_16S)
-        {
-            forward_fallback(inputs_arr, outputs_arr, internals_arr);
-            return;
-        }
-
        std::vector<Mat> inputs, outputs;
        inputs_arr.getMatVector(inputs);
        outputs_arr.getMatVector(outputs);
--- a/modules/dnn/src/layers/shuffle_channel_layer.cpp
+++ b/modules/dnn/src/layers/shuffle_channel_layer.cpp
@ -92,8 +92,7 @@ public:
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());

-        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                   forward_ocl(inputs_arr, outputs_arr, internals_arr))

        if (inputs_arr.depth() == CV_16S)
--- a/modules/dnn/src/layers/slice_layer.cpp
+++ b/modules/dnn/src/layers/slice_layer.cpp
@ -239,16 +239,9 @@ public:
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());

-        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                   forward_ocl(inputs_arr, outputs_arr, internals_arr))

-        if (inputs_arr.depth() == CV_16S)
-        {
-            forward_fallback(inputs_arr, outputs_arr, internals_arr);
-            return;
-        }
-
        std::vector<Mat> inputs, outputs;
        inputs_arr.getMatVector(inputs);
        outputs_arr.getMatVector(outputs);
--- a/modules/dnn/src/layers/softmax_layer.cpp
+++ b/modules/dnn/src/layers/softmax_layer.cpp
@ -187,8 +187,7 @@ public:
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());

-        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                   forward_ocl(inputs_arr, outputs_arr, internals_arr))

        if (inputs_arr.depth() == CV_16S)
--- a/modules/dnn/src/layers/split_layer.cpp
+++ b/modules/dnn/src/layers/split_layer.cpp
@ -83,12 +83,6 @@ public:
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());

-        if (inputs_arr.depth() == CV_16S)
-        {
-            forward_fallback(inputs_arr, outputs_arr, internals_arr);
-            return;
-        }
-
        std::vector<Mat> inputs, outputs;
        inputs_arr.getMatVector(inputs);
        outputs_arr.getMatVector(outputs);
--- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
+++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
@ -60,6 +60,8 @@
 #if defined WIN32 || defined _WIN32
 #include <windows.h>
 #include <direct.h>
+#undef min
+#undef max
 #endif

 namespace cv { namespace dnn { namespace ocl4dnn {
@ -68,6 +70,30 @@ typedef std::map<std::string, std::string> kernel_hash_t;
 static kernel_hash_t kernelConfigMap;
 static bool defaultConfigLoaded = false;

+static bool enableWorkaroundIDLF()
+{
+    static bool param = utils::getConfigurationParameterSizeT("OPENCV_OCL4DNN_WORKAROUND_IDLF", true);
+    return param;
+}
+
+static bool dumpFailedResult()
+{
+    static bool param = utils::getConfigurationParameterSizeT("OPENCV_OCL4DNN_DUMP_FAILED_RESULT", false);
+    return param;
+}
+
+static size_t testAllKernels()
+{
+    static size_t param = utils::getConfigurationParameterSizeT("OPENCV_OCL4DNN_TEST_ALL_KERNELS", 0);
+    return param;
+}
+
+static bool raiseOnCheckError()
+{
+    static bool param = utils::getConfigurationParameterBool("OPENCV_OCL4DNN_TUNING_RAISE_CHECK_ERROR", false);
+    return param;
+}
+
 static std::string sanitize(const std::string& s)
 {
    std::string s_ = s;
@ -1221,9 +1247,6 @@ bool OCL4DNNConvSpatial<float>::verifyResult(const UMat &bottom,
                                             kernelConfig* config,
                                             UMat &verifyTop)
 {
-
-    uint32_t verificationFail = 0;
-
    if (config->verified)
        return true;
    else if (config->tested)
@ -1236,6 +1259,8 @@ bool OCL4DNNConvSpatial<float>::verifyResult(const UMat &bottom,
    convolve(bottom, top, weight, bias, numImages, config);
    tuned_ = saved_tuned;

+    config->tested = true;
+
    UMat new_top, new_verify_top;
    Mat mat_top, mat_verify_top;
    if (use_half_)
@ -1254,41 +1279,88 @@ bool OCL4DNNConvSpatial<float>::verifyResult(const UMat &bottom,
    const float* data = mat_top.ptr<float>();
    const float* verify_data = mat_verify_top.ptr<float>();

-    for (int32_t n = 0; n < num_; ++n) {
-        for (int32_t g = 0; g < group_; ++g) {
-            int32_t output_image_offset = n * top_dim_ + output_w_ * output_h_ * M_ * g;
-            for (int out_ch = 0; out_ch < M_ && !verificationFail; out_ch++)
-                for (int h = 0; h < output_h_ && !verificationFail; h++)
-                    for (int w = 0; w < output_w_; w++) {
-                        size_t offset = output_image_offset + out_ch * output_w_ * output_h_ + h * output_w_ + w;
-
-                        float error_factor = fabs(data[offset] - verify_data[offset]);
-                        if (use_half_ && error_factor > 0.1 * fabs(verify_data[offset]) &&
-                            error_factor > 0.04 && !(fabs(verify_data[offset]) < 1.e-3 && error_factor < 1.e-4))
-                        {
-                            CV_LOG_ERROR(NULL, "test verification failed @ image " << n << " group " << g
-                                         << " out_ch " << out_ch << " h " << h << " w " << w
-                                         << " got " << data[offset] << " expected " << verify_data[offset]);
-                            verificationFail = 1;
-                            goto out;
-                        }
-                        else if (!use_half_ && error_factor > 0.1 * fabs(verify_data[offset]) &&
-                                 !(fabs(verify_data[offset]) < 1.e-3 && error_factor < 1.e-4))
-                        {
-                            CV_LOG_ERROR(NULL, "test verification failed @ image " << n << " group " << g
-                                         << " out_ch " << out_ch << " h " << h << " w " << w
-                                         << " got " << data[offset] << " expected " << verify_data[offset]);
-                            verificationFail = 1;
-                            goto out;
+    int error_slice_offset = 0;
+    int error_slice = 0;
+    float relative_eps = use_half_ ? 0.1f : 0.01f;
+
+    size_t errors = 0;
+
+    double rel_err = norm(mat_top.reshape(1, 1), mat_verify_top.reshape(1, 1), NORM_L1 | NORM_RELATIVE);
+    if (rel_err >= relative_eps)
+    {
+        for (int32_t n = 0; n < num_; ++n) {
+            for (int32_t g = 0; g < group_; ++g) {
+                int32_t output_image_offset = n * top_dim_ + output_w_ * output_h_ * M_ * g;
+                for (int out_ch = 0; out_ch < M_; out_ch++)
+                    for (int h = 0; h < output_h_; h++)
+                        for (int w = 0; w < output_w_; w++) {
+                            size_t offset = output_image_offset + out_ch * output_w_ * output_h_ + h * output_w_ + w;
+
+                            bool has_error = !(data[offset] == data[offset]);  // is NaN
+                            if (!has_error)
+                            {
+                                float error_factor = std::fabs(data[offset] - verify_data[offset]);
+                                float base_value_abs = std::max(1e-3f, std::fabs(verify_data[offset]));
+                                has_error = error_factor > relative_eps * base_value_abs;
+                            }
+                            if (has_error)
+                            {
+                                if (errors == 0)
+                                {
+                                    error_slice = (int)(offset / (output_w_ * output_h_));
+                                    error_slice_offset = (int)(offset % (output_w_ * output_h_));
+                                    CV_LOG_ERROR(NULL, "Kernel: " << config->kernelName);
+                                }
+                                if (errors < 10)
+                                    CV_LOG_ERROR(NULL, "test verification failed @ image " << n << " group " << g
+                                            << " out_ch " << out_ch << " h " << h << " w " << w
+                                            << " (offset: " << offset << ")"
+                                            << " got " << data[offset] << " expected " << verify_data[offset]);
+                                errors++;
+                            }
                        }
-                    }
+            }
        }
    }
-out:
-    if (verificationFail == 1)
+
+    if (errors)
+    {
+        if (dumpFailedResult())
+        {
+            try
+            {
+                int n_outputs = (int)(mat_top.size[0]*mat_top.size[1]);
+                int slice_size = (int)(mat_top.total() / n_outputs);
+                Rect roi(0, 0, slice_size, n_outputs);
+                roi.width = std::min(roi.width, 32);
+                roi.height = std::min(roi.height, 16);
+                roi.x = std::max(0, std::min(slice_size - roi.width, error_slice_offset - roi.width/2));
+                roi.y = std::max(0, std::min(n_outputs - roi.height, error_slice - roi.height/2));
+                std::cout << "roi = " << roi << " errors=" << errors << std::endl;
+                std::cout << "mat_top = " << shape(mat_top) << std::endl
+                          << mat_top.reshape(1, 1).reshape(1, n_outputs)(roi) << std::endl;
+                std::cout << "verify_top = " << shape(mat_verify_top) << std::endl
+                          << mat_verify_top.reshape(1, 1).reshape(1, n_outputs)(roi) << std::endl;
+            }
+            catch (const std::exception& e)
+            {
+                CV_LOG_ERROR(NULL, "Results dump failed: " << e.what());
+            }
+            catch (...)
+            {
+                CV_LOG_ERROR(NULL, "Results dump failed")
+            }
+        }
+
+        if (raiseOnCheckError())
+            CV_Error_(Error::StsError, ("ocl4dnn tuning verification failed: %s (errors %lld)", config->kernelName.c_str(), (long long int)errors));
        return false;
+    }
    else
+    {
+        config->verified = true;
        return true;
+    }
 }

 template<typename Dtype>
@ -1408,6 +1480,17 @@ bool OCL4DNNConvSpatial<float>::createIDLFKernel(int32_t blockWidth,

    setupKernel();

+    if (enableWorkaroundIDLF() && ocl::Device::getDefault().intelSubgroupsSupport())
+    {
+        // Issues are observed with these kernels: 3x1 (covered by tests), 2x1, 4x1, 5x1, 3x2
+        // kernels 1x3, 3x3, 2x3 are good
+        if (pad_h_ != 0 && kernel_w_ <= simd_size && kernel_h_ <= 2)
+        {
+            CV_LOG_INFO(NULL, "DNN(workaround): skip IDLF kernel: " << kernel_name_);
+            return false;
+        }
+    }
+
    ocl::Program program = compileKernel();
    if (program.ptr())
    {
@ -1623,13 +1706,38 @@ void OCL4DNNConvSpatial<float>::useFirstAvailable(const UMat &bottom,
    generateTunerItems(tunerItems);
    tunerItems.push_back(makePtr<tunerParam>(KERNEL_TYPE_BASIC, 1, 1, 1));

-    for (int i = 0; i < tunerItems.size(); i++) {
+    for (int i = 0; i < tunerItems.size(); i++)
+    {
        if (createConvolutionKernel(tunerItems[i]->kernelType,
                                    tunerItems[i]->blockWidth,
                                    tunerItems[i]->blockHeight,
-                                    tunerItems[i]->blockDepth)) {
+                                    tunerItems[i]->blockDepth))
+        {
            int kernelIdx = kernelQueue.size() - 1;
-            if (verifyResult(bottom, top, weight, bias, numImages, kernelQueue[kernelIdx], verifyTop)) {
+            kernelConfig* config = kernelQueue[kernelIdx].get();
+            bool failed = false;
+            const size_t testCount = testAllKernels();
+            for(int t = 0; t < testCount; t++)
+            {
+                try
+                {
+                    config->tested = false;
+                    config->verified = false;
+                    if (!verifyResult(bottom, top, weight, bias, numImages, config, verifyTop))
+                    {
+                        CV_LOG_ERROR(NULL, "Failed on test iteration: " << t);
+                        failed = true;
+                        break;
+                    }
+                }
+                catch (...)
+                {
+                    CV_LOG_ERROR(NULL, "Failed on test iteration: " << t);
+                    throw;
+                }
+            }
+            if (!failed && verifyResult(bottom, top, weight, bias, numImages, config, verifyTop))
+            {
                bestKernelConfig = kernelQueue[kernelIdx];
                if (bestKernelConfig->kernelType != KERNEL_TYPE_INTEL_IDLF &&
                    bestKernelConfig->kernelType != KERNEL_TYPE_GEMM_LIKE)
@ -1685,42 +1793,50 @@ void OCL4DNNConvSpatial<float>::setupConvolution(const UMat &bottom,
                                tunerItems[i]->blockHeight,
                                tunerItems[i]->blockDepth);

-    for (int32_t x = 0; x < kernelQueue.size(); x++) {
-        kernelQueue[x]->executionTime = timedConvolve(bottom, top, weight, bias, numImages,
-                                                      kernelQueue[x]);
-        #ifdef TEST_ALL_KERNELS
-        if (kernelQueue[x]->tested == false) {
-            bool verified = verifyResult(bottom, top, weight, bias, numImages, kernelQueue[x], verifyTop);
-            if (verified == false) {
-                CV_LOG_ERROR(NULL, "Kernel " << kernelQueue[x]->kernelName << " failed verification");
-                CV_LOG_ERROR(NULL, "kernelQueue[x]->workItem_output[0]: "
-                             << kernelQueue[x]->workItem_output[0] << " "
-                             << "kernelQueue[x]->workItem_output[1]: "
-                             << kernelQueue[x]->workItem_output[1] << " "
-                             << "kernelQueue[x]->workItem_output[2]: "
-                             << kernelQueue[x]->workItem_output[2] << " "
-                             << "kernelQueue[x]->kernelType: "
-                             << kernelQueue[x]->kernelType << " "
-                             << "kernelQueue[x]->global_work_size[0]: "
-                             << kernelQueue[x]->global_work_size[0] << " "
-                             << "kernelQueue[x]->global_work_size[1]: "
-                             << kernelQueue[x]->global_work_size[1] << " "
-                             << "kernelQueue[x]->global_work_size[2]: "
-                             << kernelQueue[x]->global_work_size[2] << " "
-                             << "kernelQueue[x]->local_work_size[0]: "
-                             << kernelQueue[x]->local_work_size[0] << " "
-                             << "kernelQueue[x]->local_work_size[1]: "
-                             << kernelQueue[x]->local_work_size[1] << " "
-                             << "kernelQueue[x]->local_work_size[2]: "
-                             << kernelQueue[x]->local_work_size[2] << " "
-                             << kernelQueue[x]->swizzle_weights << " "
-                             << kernelQueue[x]->use_null_local);
-            } else {
-                CV_LOG_INFO(NULL, "Kernel " << kernelQueue[x]->kernelName << " pass verification");
+    const size_t testCount = testAllKernels();
+    for (int32_t x = 0; x < kernelQueue.size(); x++)
+    {
+        kernelConfig* config = kernelQueue[x];
+        config->executionTime = timedConvolve(bottom, top, weight, bias, numImages, config);
+        for(int t = 0; t < testCount; t++)
+        {
+            try
+            {
+                config->tested = false;
+                config->verified = false;
+                bool verified = verifyResult(bottom, top, weight, bias, numImages, config, verifyTop);
+                if (verified == false)
+                {
+                    CV_LOG_ERROR(NULL, "Kernel " << config->kernelName << " failed verification");
+                    CV_LOG_ERROR(NULL, "workItem="
+                         << config->workItem_output[0] << ","
+                         << config->workItem_output[1] << ","
+                         << config->workItem_output[2] << " "
+                         << "kernelType: " << config->kernelType << " "
+                         << "global_work_size="
+                         << config->global_work_size[0] << ","
+                         << config->global_work_size[1] << ","
+                         << config->global_work_size[2] << " "
+                         << "local_work_size="
+                         << config->local_work_size[0] << ","
+                         << config->local_work_size[1] << ","
+                         << config->local_work_size[2] << " "
+                         << config->swizzle_weights << " "
+                         << config->use_null_local);
+                }
+                else
+                {
+                    CV_LOG_VERBOSE(NULL, "Kernel " << config->kernelName << " pass verification");
+                }
+            }
+            catch (...)
+            {
+                CV_LOG_ERROR(NULL, "Failed on test iteration: " << t);
+                throw;
            }
        }
-        #endif
    }
+
    int32_t failures = 0;
    bool verification = false;
    if (kernelQueue.size()) {
@ -1739,12 +1855,10 @@ void OCL4DNNConvSpatial<float>::setupConvolution(const UMat &bottom,
            // Test fastest kernel
            bool verified = verifyResult(bottom, top, weight, bias, numImages, kernelQueue[fastestKernel], verifyTop);
            if (verified == true) {
-                kernelQueue[fastestKernel]->verified = true;
                kernel_index_ = fastestKernel;
                verification = true;
                break;
            } else {
-                kernelQueue[fastestKernel]->tested = true;
                CV_LOG_ERROR(NULL, "Kernel " << kernelQueue[fastestKernel]->kernelName <<
                             " failed verification");
                failures++;
--- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_lrn.cpp
+++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_lrn.cpp
@ -69,9 +69,6 @@ bool OCL4DNNLRN<Dtype>::Forward(const UMat& bottom, UMat& top)
 {
    bool ret = true;

-    if (!ocl::Device::getDefault().intelSubgroupsSupport())
-        return false;
-
    switch (lrn_type_)
    {
    case LRNParameter_NormRegion_ACROSS_CHANNELS:
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@ -213,7 +213,7 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot
        else if (attribute_proto.floats_size() > 0)
        {
            lp.set(attribute_name, DictValue::arrayReal(
-                (float*)attribute_proto.mutable_floats(), attribute_proto.floats_size()));
+                attribute_proto.floats().data(), attribute_proto.floats_size()));
        }
        else if (attribute_proto.ints_size() > 0)
        {
--- a/modules/dnn/src/opencl/prior_box.cl
+++ b/modules/dnn/src/opencl/prior_box.cl
@ -114,6 +114,6 @@ __kernel void clip(const int nthreads,
    for (int index = get_global_id(0); index < nthreads; index += get_global_size(0))
    {
        Dtype4 vec = vload4(index, dst);
-        vstore4(clamp(vec, 0, 1), index, dst);
+        vstore4(clamp(vec, 0.0f, 1.0f), index, dst);
    }
 }
--- a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp
+++ b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp
@ -20,6 +20,8 @@ using ::google::protobuf::MapPair;
 class Subgraph  // Interface to match and replace TensorFlow subgraphs.
 {
 public:
+    virtual ~Subgraph() {}
+
    // Add a node to be matched in the origin graph. Specify ids of nodes that
    // are expected to be inputs. Returns id of a newly added node.
    // TODO: Replace inputs to std::vector<int> in C++11
--- a/modules/dnn/test/test_common.hpp
+++ b/modules/dnn/test/test_common.hpp
@ -276,6 +276,8 @@ static testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAnd
        targets.push_back(make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16));
    }
 #endif
+    if (targets.empty())  // validate at least CPU mode
+        targets.push_back(make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU));
    return testing::ValuesIn(targets);
 }

--- a/modules/dnn/test/test_halide_layers.cpp
+++ b/modules/dnn/test/test_halide_layers.cpp
@ -99,14 +99,6 @@ TEST_P(Convolution, Accuracy)
 #endif

    bool skipCheck = false;
-    if (cvtest::skipUnstableTests && backendId == DNN_BACKEND_OPENCV &&
-        (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16) &&
-        (
-            (kernel == Size(3, 1) && stride == Size(1, 1) && pad == Size(0, 1)) ||
-            (stride.area() > 1 && !(pad.width == 0 && pad.height == 0))
-        )
-    )
-        skipCheck = true;

    int sz[] = {outChannels, inChannels / group, kernel.height, kernel.width};
    Mat weights(4, &sz[0], CV_32F);
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@ -295,7 +295,7 @@ TEST_P(Test_ONNX_nets, TinyYolov2)
 TEST_P(Test_ONNX_nets, CNN_MNIST)
 {
    // output range: [-1952; 6574]
-    const double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 3.82 : 4.3e-4;
+    const double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 3.82 : 4.4e-4;
    const double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 13.5 : 2e-3;

    testONNXModels("cnn_mnist", pb, l1, lInf);
@ -341,7 +341,7 @@ TEST_P(Test_ONNX_nets, Inception_v2)
 TEST_P(Test_ONNX_nets, DenseNet121)
 {
    // output range: [-87; 138]
-    const double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.12 : 1.88e-5;
+    const double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.12 : 2.2e-5;
    const double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.74 : 1.23e-4;
    testONNXModels("densenet121", pb, l1, lInf);
 }
--- a/modules/flann/include/opencv2/flann/kmeans_index.h
+++ b/modules/flann/include/opencv2/flann/kmeans_index.h
@ -276,7 +276,7 @@ public:
    public:
        KMeansDistanceComputer(Distance _distance, const Matrix<ElementType>& _dataset,
            const int _branching, const int* _indices, const Matrix<double>& _dcenters, const size_t _veclen,
-            int* _count, int* _belongs_to, std::vector<DistanceType>& _radiuses, bool& _converged, cv::Mutex& _mtx)
+            int* _count, int* _belongs_to, std::vector<DistanceType>& _radiuses, bool& _converged)
            : distance(_distance)
            , dataset(_dataset)
            , branching(_branching)
@ -287,7 +287,6 @@ public:
            , belongs_to(_belongs_to)
            , radiuses(_radiuses)
            , converged(_converged)
-            , mtx(_mtx)
        {
        }

@ -311,12 +310,10 @@ public:
                    radiuses[new_centroid] = sq_dist;
                }
                if (new_centroid != belongs_to[i]) {
-                    count[belongs_to[i]]--;
-                    count[new_centroid]++;
+                    CV_XADD(&count[belongs_to[i]], -1);
+                    CV_XADD(&count[new_centroid], 1);
                    belongs_to[i] = new_centroid;
-                    mtx.lock();
                    converged = false;
-                    mtx.unlock();
                }
            }
        }
@ -332,7 +329,6 @@ public:
        int* belongs_to;
        std::vector<DistanceType>& radiuses;
        bool& converged;
-        cv::Mutex& mtx;
        KMeansDistanceComputer& operator=( const KMeansDistanceComputer & ) { return *this; }
    };

@ -801,8 +797,7 @@ private:
            }

            // reassign points to clusters
-            cv::Mutex mtx;
-            KMeansDistanceComputer invoker(distance_, dataset_, branching, indices, dcenters, veclen_, count, belongs_to, radiuses, converged, mtx);
+            KMeansDistanceComputer invoker(distance_, dataset_, branching, indices, dcenters, veclen_, count, belongs_to, radiuses, converged);
            parallel_for_(cv::Range(0, (int)indices_length), invoker);

            for (int i=0; i<branching; ++i) {
--- a/modules/js/CMakeLists.txt
+++ b/modules/js/CMakeLists.txt
@ -1,24 +1,42 @@
 # ----------------------------------------------------------------------------
 #  CMake file for js support
 # ----------------------------------------------------------------------------
-
-# message(STATUS "---------------- Start of JavaScript module ----------------------")
-
 set(the_description "The js bindings")
-set(MODULE_NAME js)
+
+if(NOT BUILD_opencv_js)  # should be enabled explicitly (by build_js.py script)
+  ocv_module_disable(js)
+endif()

 set(OPENCV_JS "opencv.js")

-ocv_add_module(${MODULE_NAME} BINDINGS)
+find_path(EMSCRIPTEN_INCLUDE_DIR
+          emscripten/bind.h
+          PATHS
+            ENV EMSCRIPTEN_ROOT
+          PATH_SUFFIXES system/include include
+          DOC "Location of Emscripten SDK")
+
+if(NOT EMSCRIPTEN_INCLUDE_DIR OR NOT PYTHON_DEFAULT_AVAILABLE)
+  set(DISABLE_MSG "Module 'js' disabled because the following dependencies are not found:")
+  if(NOT EMSCRIPTEN_INCLUDE_DIR)
+    set(DISABLE_MSG "${DISABLE_MSG} Emscripten")
+  endif()
+  if(NOT PYTHON_DEFAULT_AVAILABLE)
+    set(DISABLE_MSG "${DISABLE_MSG} Python")
+  endif()
+  message(STATUS ${DISABLE_MSG})
+  ocv_module_disable(js)
+endif()
+
+ocv_add_module(js BINDINGS)

-# TODO: add emscripten path
-ocv_module_include_directories()
+ocv_module_include_directories(${EMSCRIPTEN_INCLUDE_DIR})

 # get list of modules to wrap
-# message(STATUS "Wrapped in ${MODULE_NAME}:")
+# message(STATUS "Wrapped in js:")
 set(OPENCV_JS_MODULES)
 foreach(m ${OPENCV_MODULES_BUILD})
-  if (";${OPENCV_MODULE_${m}_WRAPPERS};" MATCHES ";${MODULE_NAME};" AND HAVE_${m})
+  if(";${OPENCV_MODULE_${m}_WRAPPERS};" MATCHES ";js;" AND HAVE_${m})
    list(APPEND OPENCV_JS_MODULES ${m})
    # message(STATUS "\t${m}")
  endif()
@ -125,7 +143,3 @@ list(APPEND opencv_test_js_file_deps "${test_data_path}" "${opencv_test_js_bin_d

 add_custom_target(${PROJECT_NAME}_test ALL
                  DEPENDS ${OCV_JS_PATH} ${opencv_test_js_file_deps})
-
-unset(MODULE_NAME)
-
-# message(STATUS "---------------- End of JavaScript module ----------------------")
--- a/modules/js/src/make_umd.py
+++ b/modules/js/src/make_umd.py
@ -103,4 +103,7 @@ if __name__ == "__main__":
    if len(sys.argv) > 2:
        opencvjs = sys.argv[1]
        cvjs = sys.argv[2]
+        if not os.path.isfile(opencvjs):
+            print('opencv.js file not found! Have you compiled the opencv_js module?')
+            exit()
        make_umd(opencvjs, cvjs);
--- a/modules/objdetect/src/hog.cpp
+++ b/modules/objdetect/src/hog.cpp
@ -216,7 +216,7 @@ void HOGDescriptor::copyTo(HOGDescriptor& c) const
    c.histogramNormType = histogramNormType;
    c.L2HysThreshold = L2HysThreshold;
    c.gammaCorrection = gammaCorrection;
-    c.svmDetector = svmDetector;
+    c.setSVMDetector(svmDetector);
    c.nlevels = nlevels;
    c.signedGradient = signedGradient;
 }
--- a/modules/photo/src/seamless_cloning.cpp
+++ b/modules/photo/src/seamless_cloning.cpp
@ -47,64 +47,45 @@
 using namespace std;
 using namespace cv;

-void cv::seamlessClone(InputArray _src, InputArray _dst, InputArray _mask, Point p, OutputArray _blend, int flags)
+static Mat checkMask(InputArray _mask, Size size)
 {
-    CV_INSTRUMENT_REGION();
-
-    const Mat src  = _src.getMat();
-    const Mat dest = _dst.getMat();
-    const Mat mask = _mask.getMat();
-    dest.copyTo(_blend);
-    Mat blend = _blend.getMat();
-
+    Mat mask = _mask.getMat();
    Mat gray;
-
-    if(mask.channels() == 3)
-        cvtColor(mask, gray, COLOR_BGR2GRAY );
+    if (mask.channels() == 3)
+        cvtColor(mask, gray, COLOR_BGR2GRAY);
    else
    {
        if (mask.empty())
-            gray = Mat(src.rows, src.cols, CV_8UC1, Scalar(255));
+            gray = Mat(size.height, size.width, CV_8UC1, Scalar(255));
        else
            mask.copyTo(gray);
    }

-    Mat gray_inner = gray(Rect(1, 1, gray.cols - 2, gray.rows - 2));
-    copyMakeBorder(gray_inner, gray, 1, 1, 1, 1, BORDER_ISOLATED | BORDER_CONSTANT, Scalar(0));
-
-    int minx = INT_MAX, miny = INT_MAX, maxx = INT_MIN, maxy = INT_MIN;
-    int h = gray.size().height;
-    int w = gray.size().width;
+    return gray;
+}

-    for(int i=0;i<h;i++)
-    {
-        for(int j=0;j<w;j++)
-        {
-            if(gray.at<uchar>(i,j) == 255)
-            {
-                miny = std::min(miny,i);
-                maxy = std::max(maxy,i);
-                minx = std::min(minx,j);
-                maxx = std::max(maxx,j);
-            }
-        }
-    }
+void cv::seamlessClone(InputArray _src, InputArray _dst, InputArray _mask, Point p, OutputArray _blend, int flags)
+{
+    CV_INSTRUMENT_REGION();

-    int lenx = maxx - minx + 1;
-    int leny = maxy - miny + 1;
+    const Mat src  = _src.getMat();
+    const Mat dest = _dst.getMat();
+    Mat mask = checkMask(_mask, src.size());
+    dest.copyTo(_blend);
+    Mat blend = _blend.getMat();

-    int minxd = p.x - lenx/2;
-    int minyd = p.y - leny/2;
+    Mat mask_inner = mask(Rect(1, 1, mask.cols - 2, mask.rows - 2));
+    copyMakeBorder(mask_inner, mask, 1, 1, 1, 1, BORDER_ISOLATED | BORDER_CONSTANT, Scalar(0));

-    Rect roi_d(minxd,minyd,lenx,leny);
-    Rect roi_s(minx,miny,lenx,leny);
+    Rect roi_s = boundingRect(mask);
+    Rect roi_d(p.x - roi_s.width / 2, p.y - roi_s.height / 2, roi_s.width, roi_s.height);

    Mat destinationROI = dest(roi_d).clone();

-    Mat sourceROI = Mat::zeros(leny, lenx, src.type());
-    src(roi_s).copyTo(sourceROI,gray(roi_s));
+    Mat sourceROI = Mat::zeros(roi_s.height, roi_s.width, src.type());
+    src(roi_s).copyTo(sourceROI,mask(roi_s));

-    Mat maskROI = gray(roi_s);
+    Mat maskROI = mask(roi_s);
    Mat recoveredROI = blend(roi_d);

    Cloning obj;
@ -116,21 +97,15 @@ void cv::colorChange(InputArray _src, InputArray _mask, OutputArray _dst, float
    CV_INSTRUMENT_REGION();

    Mat src  = _src.getMat();
-    Mat mask  = _mask.getMat();
+    Mat mask = checkMask(_mask, src.size());
    _dst.create(src.size(), src.type());
    Mat blend = _dst.getMat();

-    Mat gray, cs_mask;
-
-    if(mask.channels() == 3)
-        cvtColor(mask, gray, COLOR_BGR2GRAY );
-    else
-        mask.copyTo(gray);
-
-    src.copyTo(cs_mask,gray);
+    Mat cs_mask = Mat::zeros(src.size(), src.type());
+    src.copyTo(cs_mask, mask);

    Cloning obj;
-    obj.localColorChange(src,cs_mask,gray,blend,red,green,blue);
+    obj.localColorChange(src, cs_mask, mask, blend, red, green, blue);
 }

 void cv::illuminationChange(InputArray _src, InputArray _mask, OutputArray _dst, float alpha, float beta)
@ -138,21 +113,15 @@ void cv::illuminationChange(InputArray _src, InputArray _mask, OutputArray _dst,
    CV_INSTRUMENT_REGION();

    Mat src  = _src.getMat();
-    Mat mask  = _mask.getMat();
+    Mat mask = checkMask(_mask, src.size());
    _dst.create(src.size(), src.type());
    Mat blend = _dst.getMat();

-    Mat gray, cs_mask;
-
-    if(mask.channels() == 3)
-        cvtColor(mask, gray, COLOR_BGR2GRAY );
-    else
-        mask.copyTo(gray);
-
-    src.copyTo(cs_mask,gray);
+    Mat cs_mask = Mat::zeros(src.size(), src.type());
+    src.copyTo(cs_mask, mask);

    Cloning obj;
-    obj.illuminationChange(src,cs_mask,gray,blend,alpha,beta);
+    obj.illuminationChange(src, cs_mask, mask, blend, alpha, beta);

 }

@ -162,18 +131,13 @@ void cv::textureFlattening(InputArray _src, InputArray _mask, OutputArray _dst,
    CV_INSTRUMENT_REGION();

    Mat src  = _src.getMat();
-    Mat mask  = _mask.getMat();
+    Mat mask = checkMask(_mask, src.size());
    _dst.create(src.size(), src.type());
    Mat blend = _dst.getMat();
-    Mat gray, cs_mask;
-
-    if(mask.channels() == 3)
-        cvtColor(mask, gray, COLOR_BGR2GRAY );
-    else
-        mask.copyTo(gray);

-    src.copyTo(cs_mask,gray);
+    Mat cs_mask = Mat::zeros(src.size(), src.type());
+    src.copyTo(cs_mask, mask);

    Cloning obj;
-    obj.textureFlatten(src,cs_mask,gray,low_threshold,high_threshold,kernel_size,blend);
+    obj.textureFlatten(src, cs_mask, mask, low_threshold, high_threshold, kernel_size, blend);
 }
--- a/modules/python/test/test_dnn.py
+++ b/modules/python/test/test_dnn.py
@ -95,7 +95,7 @@ if haveInfEngine:
 if cv.ocl.haveOpenCL() and cv.ocl.useOpenCL():
    dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_OPENCL])
    dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_OPENCL_FP16])
-    if haveInfEngine:  # FIXIT Check Intel iGPU only
+    if haveInfEngine and cv.ocl_Device.getDefault().isIntel():
        dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL])
        dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL_FP16])

--- a/modules/videoio/include/opencv2/videoio.hpp
+++ b/modules/videoio/include/opencv2/videoio.hpp
@ -169,6 +169,7 @@ enum VideoCaptureProperties {
       CAP_PROP_AUTOFOCUS     =39,
       CAP_PROP_SAR_NUM       =40, //!< Sample aspect ratio: num/den (num)
       CAP_PROP_SAR_DEN       =41, //!< Sample aspect ratio: num/den (den)
+       CAP_PROP_BACKEND       =42, //!< current backend (enum VideoCaptureAPIs). Read-only property
 #ifndef CV_DOXYGEN
       CV__CAP_PROP_LATEST
 #endif
@ -808,6 +809,12 @@ public:
    */
    CV_WRAP virtual bool open(const String& filename, int apiPreference);

+    /** @brief Returns used backend API name
+
+     @note Stream should be opened.
+     */
+    CV_WRAP String getBackendName() const;
+
 protected:
    Ptr<CvCapture> cap;
    Ptr<IVideoCapture> icap;
@ -946,6 +953,12 @@ public:
     */
    CV_WRAP static int fourcc(char c1, char c2, char c3, char c4);

+    /** @brief Returns used backend API name
+
+     @note Stream should be opened.
+     */
+    CV_WRAP String getBackendName() const;
+
 protected:
    Ptr<CvVideoWriter> writer;
    Ptr<IVideoWriter> iwriter;
--- a/modules/videoio/src/cap.cpp
+++ b/modules/videoio/src/cap.cpp
@ -41,6 +41,7 @@

 #include "precomp.hpp"

+#include "opencv2/videoio/registry.hpp"
 #include "videoio_registry.hpp"

 namespace cv {
@ -168,6 +169,17 @@ bool VideoCapture::isOpened() const
    return !cap.empty();  // legacy interface doesn't support closed files
 }

+String VideoCapture::getBackendName() const
+{
+    int api = 0;
+    if (icap)
+        api = icap->isOpened() ? icap->getCaptureDomain() : 0;
+    else if (cap)
+        api = cap->getCaptureDomain();
+    CV_Assert(api != 0);
+    return cv::videoio_registry::getBackendName((VideoCaptureAPIs)api);
+}
+
 void VideoCapture::release()
 {
    CV_TRACE_FUNCTION();
@ -256,6 +268,8 @@ VideoCapture& VideoCapture::operator >> (UMat& image)

 bool VideoCapture::set(int propId, double value)
 {
+    CV_CheckNE(propId, (int)CAP_PROP_BACKEND, "Can set read-only property");
+
    if (!icap.empty())
        return icap->setProperty(propId, value);
    return cvSetCaptureProperty(cap, propId, value) != 0;
@ -263,6 +277,17 @@ bool VideoCapture::set(int propId, double value)

 double VideoCapture::get(int propId) const
 {
+    if (propId == CAP_PROP_BACKEND)
+    {
+        int api = 0;
+        if (icap)
+            api = icap->isOpened() ? icap->getCaptureDomain() : 0;
+        else if (cap)
+            api = cap->getCaptureDomain();
+        if (api <= 0)
+            return -1.0;
+        return (double)api;
+    }
    if (!icap.empty())
        return icap->getProperty(propId);
    return cap ? cap->getProperty(propId) : 0;
@ -342,6 +367,8 @@ bool VideoWriter::isOpened() const

 bool VideoWriter::set(int propId, double value)
 {
+    CV_CheckNE(propId, (int)CAP_PROP_BACKEND, "Can set read-only property");
+
    if (!iwriter.empty())
        return iwriter->setProperty(propId, value);
    return false;
@ -349,11 +376,33 @@ bool VideoWriter::set(int propId, double value)

 double VideoWriter::get(int propId) const
 {
+    if (propId == CAP_PROP_BACKEND)
+    {
+        int api = 0;
+        if (iwriter)
+            api = iwriter->getCaptureDomain();
+        else if (writer)
+            api = writer->getCaptureDomain();
+        if (api <= 0)
+            return -1.0;
+        return (double)api;
+    }
    if (!iwriter.empty())
        return iwriter->getProperty(propId);
    return 0.;
 }

+String VideoWriter::getBackendName() const
+{
+    int api = 0;
+    if (iwriter)
+        api = iwriter->getCaptureDomain();
+    else if (writer)
+        api = writer->getCaptureDomain();
+    CV_Assert(api != 0);
+    return cv::videoio_registry::getBackendName((VideoCaptureAPIs)api);
+}
+
 void VideoWriter::write(const Mat& image)
 {
    CV_INSTRUMENT_REGION();
--- a/modules/videoio/src/cap_avfoundation.mm
+++ b/modules/videoio/src/cap_avfoundation.mm
@ -173,7 +173,8 @@ class CvVideoWriter_AVFoundation : public CvVideoWriter{
                double fps, CvSize frame_size,
                int is_color=1);
        ~CvVideoWriter_AVFoundation();
-        bool writeFrame(const IplImage* image);
+        bool writeFrame(const IplImage* image) CV_OVERRIDE;
+        int getCaptureDomain() const CV_OVERRIDE { return cv::CAP_AVFOUNDATION; }
    private:
        IplImage* argbimage;

--- a/modules/videoio/src/cap_avfoundation_mac.mm
+++ b/modules/videoio/src/cap_avfoundation_mac.mm
@ -181,7 +181,8 @@ class CvVideoWriter_AVFoundation : public CvVideoWriter {
                double fps, CvSize frame_size,
                int is_color=1);
        ~CvVideoWriter_AVFoundation();
-        bool writeFrame(const IplImage* image);
+        bool writeFrame(const IplImage* image) CV_OVERRIDE;
+        int getCaptureDomain() const CV_OVERRIDE { return cv::CAP_AVFOUNDATION; }
    private:
        IplImage* argbimage;

--- a/modules/videoio/src/cap_dc1394.cpp
+++ b/modules/videoio/src/cap_dc1394.cpp
@ -1053,7 +1053,7 @@ public:
    virtual bool setProperty(int, double) CV_OVERRIDE;
    virtual bool grabFrame() CV_OVERRIDE;
    virtual IplImage* retrieveFrame(int) CV_OVERRIDE;
-    virtual int getCaptureDomain() CV_OVERRIDE { return CV_CAP_DC1394; } // Return the type of the capture object: CV_CAP_VFW, etc...
+    virtual int getCaptureDomain() CV_OVERRIDE { return CV_CAP_DC1394; }
 protected:

    CvCaptureCAM_DC1394* captureDC1394;
--- a/modules/videoio/src/cap_dc1394_v2.cpp
+++ b/modules/videoio/src/cap_dc1394_v2.cpp
@ -211,7 +211,7 @@ public:
    virtual bool setProperty(int, double) CV_OVERRIDE;
    virtual bool grabFrame() CV_OVERRIDE;
    virtual IplImage* retrieveFrame(int) CV_OVERRIDE;
-    virtual int getCaptureDomain() CV_OVERRIDE { return CV_CAP_DC1394; } // Return the type of the capture object: CV_CAP_VFW, etc...
+    virtual int getCaptureDomain() CV_OVERRIDE { return CV_CAP_DC1394; }


 protected:
--- a/modules/videoio/src/cap_ffmpeg.cpp
+++ b/modules/videoio/src/cap_ffmpeg.cpp
@ -289,6 +289,8 @@ public:
    CvVideoWriter_FFMPEG_proxy(const cv::String& filename, int fourcc, double fps, cv::Size frameSize, bool isColor) { ffmpegWriter = 0; open(filename, fourcc, fps, frameSize, isColor); }
    virtual ~CvVideoWriter_FFMPEG_proxy() { close(); }

+    int getCaptureDomain() const CV_OVERRIDE { return cv::CAP_FFMPEG; }
+
    virtual void write(cv::InputArray image ) CV_OVERRIDE
    {
        if(!ffmpegWriter)
--- a/modules/videoio/src/cap_gphoto2.cpp
+++ b/modules/videoio/src/cap_gphoto2.cpp
@ -144,10 +144,7 @@ public:
    virtual bool setProperty(int, double) CV_OVERRIDE;
    virtual bool grabFrame() CV_OVERRIDE;
    virtual bool retrieveFrame(int, OutputArray) CV_OVERRIDE;
-    virtual int getCaptureDomain() CV_OVERRIDE
-    {
-        return CV_CAP_GPHOTO2;
-    } // Return the type of the capture object: CV_CAP_VFW, etc...
+    virtual int getCaptureDomain() CV_OVERRIDE { return CV_CAP_GPHOTO2; }

    bool open(int index);
    void close();
--- a/modules/videoio/src/cap_gstreamer.cpp
+++ b/modules/videoio/src/cap_gstreamer.cpp
@ -189,7 +189,7 @@ public:
    virtual double getProperty(int propId) const CV_OVERRIDE;
    virtual bool setProperty(int propId, double value) CV_OVERRIDE;
    virtual bool isOpened() const CV_OVERRIDE;
-    virtual int getCaptureDomain() CV_OVERRIDE; // Return the type of the capture object: CAP_VFW, etc...
+    virtual int getCaptureDomain() CV_OVERRIDE { return cv::CAP_GSTREAMER; }
    bool open(int id);
    bool open(const String &filename_);
    static void newPad(GstElement * /*elem*/, GstPad     *pad, gpointer    data);
@ -578,8 +578,6 @@ bool GStreamerCapture::isOpened() const
    return pipeline != NULL;
 }

-int GStreamerCapture::getCaptureDomain() { return CAP_GSTREAMER; }
-
 /*!
 * \brief CvCapture_GStreamer::open Open the given file with gstreamer
 * \param type CvCapture type. One of CV_CAP_GSTREAMER_*
@ -1233,6 +1231,8 @@ public:
    }
    virtual ~CvVideoWriter_GStreamer() CV_OVERRIDE { close(); }

+    int getCaptureDomain() const CV_OVERRIDE { return cv::CAP_GSTREAMER; }
+
    virtual bool open( const char* filename, int fourcc,
                       double fps, CvSize frameSize, bool isColor );
    virtual void close();
--- a/modules/videoio/src/cap_images.cpp
+++ b/modules/videoio/src/cap_images.cpp
@ -86,6 +86,7 @@ public:
    virtual bool grabFrame() CV_OVERRIDE;
    virtual IplImage* retrieveFrame(int) CV_OVERRIDE;

+    int getCaptureDomain() /*const*/ CV_OVERRIDE { return cv::CAP_IMAGES; }
 protected:
    char*  filename; // actually a printf-pattern
    unsigned currentframe;
@ -336,6 +337,7 @@ public:
    virtual bool setProperty( int, double ); // FIXIT doesn't work: IVideoWriter interface only!
    virtual bool writeFrame( const IplImage* ) CV_OVERRIDE;

+    int getCaptureDomain() const CV_OVERRIDE { return cv::CAP_IMAGES; }
 protected:
    char* filename;
    unsigned currentframe;
--- a/modules/videoio/src/cap_libv4l.cpp
+++ b/modules/videoio/src/cap_libv4l.cpp
@ -1928,6 +1928,8 @@ public:
    virtual bool setProperty(int, double) CV_OVERRIDE;
    virtual bool grabFrame() CV_OVERRIDE;
    virtual IplImage* retrieveFrame(int) CV_OVERRIDE;
+
+    int getCaptureDomain() /*const*/ CV_OVERRIDE { return cv::CAP_V4L; }
 protected:

    CvCaptureCAM_V4L* captureV4L;
--- a/modules/videoio/src/cap_mfx_writer.hpp
+++ b/modules/videoio/src/cap_mfx_writer.hpp
@ -26,6 +26,7 @@ public:
    virtual void write(cv::InputArray input);
    static cv::Ptr<VideoWriter_IntelMFX> create(const cv::String& filename, int _fourcc, double fps, cv::Size frameSize, bool isColor);

+    virtual int getCaptureDomain() const { return cv::CAP_INTEL_MFX; }
 protected:
    bool write_one(cv::InputArray bgr);

--- a/modules/videoio/src/cap_mjpeg_decoder.cpp
+++ b/modules/videoio/src/cap_mjpeg_decoder.cpp
@ -54,7 +54,7 @@ public:
    virtual bool grabFrame() CV_OVERRIDE;
    virtual bool retrieveFrame(int, OutputArray) CV_OVERRIDE;
    virtual bool isOpened() const CV_OVERRIDE;
-    virtual int getCaptureDomain() CV_OVERRIDE { return CAP_ANY; } // Return the type of the capture object: CAP_VFW, etc...
+    virtual int getCaptureDomain() CV_OVERRIDE { return CAP_OPENCV_MJPEG; }
    MotionJpegCapture(const String&);

    bool open(const String&);
--- a/modules/videoio/src/cap_mjpeg_encoder.cpp
+++ b/modules/videoio/src/cap_mjpeg_encoder.cpp
@ -403,6 +403,8 @@ public:
    }
    ~MotionJpegWriter() { close(); }

+    virtual int getCaptureDomain() const CV_OVERRIDE { return cv::CAP_OPENCV_MJPEG; }
+
    void close()
    {
        if( !container.isOpenedStream() )
--- a/modules/videoio/src/cap_msmf.cpp
+++ b/modules/videoio/src/cap_msmf.cpp
@ -701,7 +701,7 @@ public:
    virtual bool grabFrame() CV_OVERRIDE;
    virtual bool retrieveFrame(int, cv::OutputArray) CV_OVERRIDE;
    virtual bool isOpened() const CV_OVERRIDE { return isOpen; }
-    virtual int getCaptureDomain() CV_OVERRIDE { return CV_CAP_MSMF; } // Return the type of the capture object: CV_CAP_VFW, etc...
+    virtual int getCaptureDomain() CV_OVERRIDE { return CV_CAP_MSMF; }
 protected:
    double getFramerate(MediaType MT) const;
    bool configureOutput(UINT32 width, UINT32 height, double prefFramerate, UINT32 aspectRatioN, UINT32 aspectRatioD, int outFormat, bool convertToFormat);
@ -1955,6 +1955,7 @@ public:
    virtual bool setProperty(int, double) { return false; }
    virtual bool isOpened() const { return initiated; }

+    int getCaptureDomain() const CV_OVERRIDE { return cv::CAP_MSMF; }
 private:
    Media_Foundation& MF;
    UINT32 videoWidth;
--- a/modules/videoio/src/cap_qt.cpp
+++ b/modules/videoio/src/cap_qt.cpp
@ -1445,7 +1445,7 @@ public:
    virtual bool setProperty(int, double) CV_OVERRIDE;
    virtual bool grabFrame() CV_OVERRIDE;
    virtual IplImage* retrieveFrame(int) CV_OVERRIDE;
-    virtual int getCaptureDomain() CV_OVERRIDE { return CV_CAP_QT; } // Return the type of the capture object: CV_CAP_VFW, etc...
+    virtual int getCaptureDomain() CV_OVERRIDE { return CV_CAP_QT; }
 protected:

    CvCapture_QT_Movie* captureQT;
@ -1580,6 +1580,7 @@ public:
    virtual void close();
    virtual bool writeFrame( const IplImage* );

+    int getCaptureDomain() const CV_OVERRIDE { return cv::CAP_QT; }
 protected:
    CvVideoWriter_QT* writerQT;
 };
--- a/modules/videoio/src/cap_qtkit.mm
+++ b/modules/videoio/src/cap_qtkit.mm
@ -198,6 +198,8 @@ public:
                   int is_color=1);
    ~CvVideoWriter_QT();
    bool writeFrame(const IplImage* image);
+
+    int getCaptureDomain() const CV_OVERRIDE { return cv::CAP_QT; }
 private:
    IplImage* argbimage;
    QTMovie* mMovie;
--- a/modules/videoio/src/cap_unicap.cpp
+++ b/modules/videoio/src/cap_unicap.cpp
@ -66,7 +66,7 @@ struct CvCapture_Unicap : public CvCapture
  virtual bool setProperty(int, double) CV_OVERRIDE;
  virtual bool grabFrame() CV_OVERRIDE;
  virtual IplImage* retrieveFrame(int) CV_OVERRIDE;
-  virtual int getCaptureDomain() CV_OVERRIDE { return CV_CAP_UNICAP; } // Return the type of the capture object: CV_CAP_VFW, etc...
+  virtual int getCaptureDomain() CV_OVERRIDE { return CV_CAP_UNICAP; }

  bool shutdownDevice();
  bool initDevice();
--- a/modules/videoio/src/cap_v4l.cpp
+++ b/modules/videoio/src/cap_v4l.cpp
@ -267,6 +267,8 @@ struct buffer

 struct CvCaptureCAM_V4L CV_FINAL : public CvCapture
 {
+    int getCaptureDomain() /*const*/ CV_OVERRIDE { return cv::CAP_V4L; }
+
    int deviceHandle;
    int bufferIndex;
    int FirstCapture;
--- a/modules/videoio/src/cap_vfw.cpp
+++ b/modules/videoio/src/cap_vfw.cpp
@ -103,7 +103,7 @@ public:
    virtual bool setProperty(int, double) CV_OVERRIDE;
    virtual bool grabFrame() CV_OVERRIDE;
    virtual IplImage* retrieveFrame(int) CV_OVERRIDE;
-    virtual int getCaptureDomain() CV_OVERRIDE { return CV_CAP_VFW; } // Return the type of the capture object: CV_CAP_VFW, etc...
+    virtual int getCaptureDomain() CV_OVERRIDE { return CV_CAP_VFW; }

 protected:
    void init();
@ -697,6 +697,7 @@ public:
    virtual void close();
    virtual bool writeFrame( const IplImage* );

+    int getCaptureDomain() const CV_OVERRIDE { return cv::CAP_VFW; }
 protected:
    void init();
    bool createStreams( CvSize frameSize, bool isColor );
--- a/modules/videoio/src/cap_winrt_capture.hpp
+++ b/modules/videoio/src/cap_winrt_capture.hpp
@ -55,8 +55,7 @@ namespace cv {
        virtual bool grabFrame();
        virtual bool retrieveFrame(int channel, cv::OutputArray outArray);

-        // Return the type of the capture object
-        virtual int getCaptureDomain() { return CAP_WINRT; }
+        virtual int getCaptureDomain() CV_OVERRIDE { return CAP_WINRT; }

        virtual bool isOpened() const;

--- a/modules/videoio/src/cap_ximea.cpp
+++ b/modules/videoio/src/cap_ximea.cpp
@ -24,7 +24,7 @@ public:
    virtual bool setProperty(int, double) CV_OVERRIDE;
    virtual bool grabFrame() CV_OVERRIDE;
    virtual IplImage* retrieveFrame(int) CV_OVERRIDE;
-    virtual int getCaptureDomain() CV_OVERRIDE { return CV_CAP_XIAPI; } // Return the type of the capture object: CV_CAP_VFW, etc...
+    virtual int getCaptureDomain() CV_OVERRIDE { return CV_CAP_XIAPI; }

 private:
    bool _open();
--- a/modules/videoio/src/precomp.hpp
+++ b/modules/videoio/src/precomp.hpp
@ -102,6 +102,7 @@ struct CvVideoWriter
 {
    virtual ~CvVideoWriter() {}
    virtual bool writeFrame(const IplImage*) { return false; }
+    virtual int getCaptureDomain() const { return cv::CAP_ANY; } // Return the type of the capture object: CAP_FFMPEG, etc...
 };

 CvCapture * cvCreateCameraCapture_V4L( int index );
@ -178,6 +179,8 @@ namespace cv

        virtual bool isOpened() const = 0;
        virtual void write(InputArray) = 0;
+
+        virtual int getCaptureDomain() const { return cv::CAP_ANY; } // Return the type of the capture object: CAP_FFMPEG, etc...
    };

    Ptr<IVideoCapture> createMotionJpegCapture(const String& filename);
--- a/modules/videoio/test/test_camera.cpp
+++ b/modules/videoio/test/test_camera.cpp
@ -0,0 +1,41 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Note: all tests here are DISABLED by default due specific requirements.
+// Don't use #if 0 - these tests should be tested for compilation at least.
+//
+// Usage: opencv_test_videoio --gtest_also_run_disabled_tests --gtest_filter=*VideoIO_Camera*<tested case>*
+
+#include "test_precomp.hpp"
+
+namespace opencv_test { namespace {
+
+TEST(DISABLED_VideoIO_Camera, basic)
+{
+    VideoCapture capture(0);
+    ASSERT_TRUE(capture.isOpened());
+    std::cout << "Camera 0 via " << capture.getBackendName() << " backend" << std::endl;
+    std::cout << "Frame width: " << capture.get(CAP_PROP_FRAME_WIDTH) << std::endl;
+    std::cout << "     height: " << capture.get(CAP_PROP_FRAME_HEIGHT) << std::endl;
+    std::cout << "Capturing FPS: " << capture.get(CAP_PROP_FPS) << std::endl;
+
+    const int N = 100;
+    Mat frame;
+    int64 time0 = cv::getTickCount();
+    for (int i = 0; i < N; i++)
+    {
+        SCOPED_TRACE(cv::format("frame=%d", i));
+
+        capture >> frame;
+        ASSERT_FALSE(frame.empty());
+
+        EXPECT_GT(cvtest::norm(frame, NORM_INF), 0) << "Complete black image has been received";
+    }
+    int64 time1 = cv::getTickCount();
+    printf("Processed %d frames on %.2f FPS\n", N, (N * cv::getTickFrequency()) / (time1 - time0 + 1));
+
+    capture.release();
+}
+
+}} // namespace
--- a/platforms/js/build_js.py
+++ b/platforms/js/build_js.py
@ -3,6 +3,8 @@
 import os, sys, subprocess, argparse, shutil, glob, re, multiprocessing
 import logging as log

+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+
 class Fail(Exception):
    def __init__(self, text=None):
        self.t = text
@ -58,30 +60,12 @@ def find_file(name, path):
        if name in files:
            return os.path.join(root, name)

-def determine_emcc_version(emscripten_dir):
-    ret = subprocess.check_output([os.path.join(emscripten_dir, "emcc"), "--version"])
-    m = re.match(r'^emcc.*(\d+\.\d+\.\d+)', ret, flags=re.IGNORECASE)
-    return m.group(1)
-
-def determine_opencv_version(version_hpp_path):
-    # version in 2.4 - CV_VERSION_EPOCH.CV_VERSION_MAJOR.CV_VERSION_MINOR.CV_VERSION_REVISION
-    # version in master - CV_VERSION_MAJOR.CV_VERSION_MINOR.CV_VERSION_REVISION-CV_VERSION_STATUS
-    with open(version_hpp_path, "rt") as f:
-        data = f.read()
-        major = re.search(r'^#define\W+CV_VERSION_MAJOR\W+(\d+)$', data, re.MULTILINE).group(1)
-        minor = re.search(r'^#define\W+CV_VERSION_MINOR\W+(\d+)$', data, re.MULTILINE).group(1)
-        revision = re.search(r'^#define\W+CV_VERSION_REVISION\W+(\d+)$', data, re.MULTILINE).group(1)
-        version_status = re.search(r'^#define\W+CV_VERSION_STATUS\W+"([^"]*)"$', data, re.MULTILINE).group(1)
-        return "%(major)s.%(minor)s.%(revision)s%(version_status)s" % locals()
-
 class Builder:
    def __init__(self, options):
        self.options = options
        self.build_dir = check_dir(options.build_dir, create=True)
        self.opencv_dir = check_dir(options.opencv_dir)
        self.emscripten_dir = check_dir(options.emscripten_dir)
-        self.opencv_version = determine_opencv_version(os.path.join(self.opencv_dir, "modules", "core", "include", "opencv2", "core", "version.hpp"))
-        self.emcc_version = determine_emcc_version(self.emscripten_dir)

    def get_toolchain_file(self):
        return os.path.join(self.emscripten_dir, "cmake", "Modules", "Platform", "Emscripten.cmake")
@ -123,7 +107,6 @@ class Builder:
               "-DWITH_OPENCL_SVM=OFF",
               "-DWITH_OPENCLAMDFFT=OFF",
               "-DWITH_OPENCLAMDBLAS=OFF",
-               "-DWITH_MATLAB=OFF",
               "-DWITH_GPHOTO2=OFF",
               "-DWITH_LAPACK=OFF",
               "-DWITH_ITT=OFF",
@ -187,7 +170,7 @@ class Builder:
 #===================================================================================================

 if __name__ == "__main__":
-    opencv_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), "../.."))
+    opencv_dir = os.path.abspath(os.path.join(SCRIPT_DIR, '../..'))
    emscripten_dir = None
    if "EMSCRIPTEN" in os.environ:
        emscripten_dir = os.environ["EMSCRIPTEN"]
@ -214,9 +197,6 @@ if __name__ == "__main__":

    builder = Builder(args)

-    log.info("Detected OpenCV version: %s", builder.opencv_version)
-    log.info("Detected emcc version: %s", builder.emcc_version)
-
    os.chdir(builder.build_dir)

    if args.clean_build_dir:
--- a/samples/cpp/CMakeLists.txt
+++ b/samples/cpp/CMakeLists.txt
@ -27,9 +27,6 @@ endif()
 project(cpp_samples)
 ocv_include_modules_recurse(${OPENCV_CPP_SAMPLES_REQUIRED_DEPS})
 file(GLOB_RECURSE cpp_samples RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp)
-if(NOT HAVE_OPENGL)
-  ocv_list_filterout(cpp_samples Qt_sample)
-endif()
 if(NOT HAVE_opencv_cudaarithm OR NOT HAVE_opencv_cudafilters)
  ocv_list_filterout(cpp_samples "/gpu/")
 endif()
--- a/samples/dnn/object_detection.cpp
+++ b/samples/dnn/object_detection.cpp
@ -86,6 +86,7 @@ int main(int argc, char** argv)
    Net net = readNet(parser.get<String>("model"), parser.get<String>("config"), parser.get<String>("framework"));
    net.setPreferableBackend(parser.get<int>("backend"));
    net.setPreferableTarget(parser.get<int>("target"));
+    std::vector<String> outNames = net.getUnconnectedOutLayersNames();

    // Create a window
    static const std::string kWinName = "Deep learning object detection in OpenCV";
@ -125,7 +126,7 @@ int main(int argc, char** argv)
            net.setInput(imInfo, "im_info");
        }
        std::vector<Mat> outs;
-        net.forward(outs, getOutputsNames(net));
+        net.forward(outs, outNames);

        postprocess(frame, outs, net);

@ -265,17 +266,3 @@ void callback(int pos, void*)
 {
    confThreshold = pos * 0.01f;
 }
-
-std::vector<String> getOutputsNames(const Net& net)
-{
-    static std::vector<String> names;
-    if (names.empty())
-    {
-        std::vector<int> outLayers = net.getUnconnectedOutLayers();
-        std::vector<String> layersNames = net.getLayerNames();
-        names.resize(outLayers.size());
-        for (size_t i = 0; i < outLayers.size(); ++i)
-            names[i] = layersNames[outLayers[i] - 1];
-    }
-    return names;
-}
--- a/samples/dnn/object_detection.py
+++ b/samples/dnn/object_detection.py
@ -78,14 +78,11 @@ if args.classes:
 net = cv.dnn.readNet(args.model, args.config, args.framework)
 net.setPreferableBackend(args.backend)
 net.setPreferableTarget(args.target)
+outNames = net.getUnconnectedOutLayersNames()

 confThreshold = args.thr
 nmsThreshold = args.nms

-def getOutputsNames(net):
-    layersNames = net.getLayerNames()
-    return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]
-
 def postprocess(frame, outs):
    frameHeight = frame.shape[0]
    frameWidth = frame.shape[1]
@ -213,7 +210,7 @@ while cv.waitKey(1) < 0:
    if net.getLayer(0).outputNameToIndex('im_info') != -1:  # Faster-RCNN or R-FCN
        frame = cv.resize(frame, (inpWidth, inpHeight))
        net.setInput(np.array([[inpHeight, inpWidth, 1.6]], dtype=np.float32), 'im_info')
-    outs = net.forward(getOutputsNames(net))
+    outs = net.forward(outNames)

    postprocess(frame, outs)

--- a/samples/gpu/CMakeLists.txt
+++ b/samples/gpu/CMakeLists.txt
@ -50,9 +50,6 @@ if((CV_GCC OR CV_CLANG) AND NOT ENABLE_NOISY_WARNINGS)
  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function")
 endif()
 file(GLOB all_samples RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp)
-if(NOT HAVE_OPENGL)
-  ocv_list_filterout(all_samples "opengl")
-endif()
 foreach(sample_filename ${all_samples})
  ocv_define_sample(tgt ${sample_filename} gpu)
  ocv_target_link_libraries(${tgt} ${OPENCV_LINKER_LIBS} ${OPENCV_CUDA_SAMPLES_REQUIRED_DEPS})
--- a/samples/opengl/CMakeLists.txt
+++ b/samples/opengl/CMakeLists.txt
@ -1,15 +1,9 @@
 if(APPLE)
-    return()
+  return()
 endif()

 if(UNIX)
  find_package(X11 QUIET)
-  if(NOT X11_FOUND)
-    message(STATUS "OpenGL samples require development files for libX11")
-    return()
-  endif()
-  include_directories(${X11_INCLUDE_DIR})
-  set(SAMPLE_LINKER_DEPS "${X11_LIBRARIES}")
 endif()

 SET(OPENCV_OPENGL_SAMPLES_REQUIRED_DEPS
@ -24,10 +18,16 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
  project(opengl_samples)
  ocv_include_modules_recurse(${OPENCV_OPENGL_SAMPLES_REQUIRED_DEPS})
  file(GLOB all_samples RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp)
+  if(NOT X11_FOUND)
+    ocv_list_filterout(all_samples "opengl_interop")
+  endif()
  foreach(sample_filename ${all_samples})
    ocv_define_sample(tgt ${sample_filename} opengl)
-    ocv_target_link_libraries(${tgt}
-      ${OPENCV_LINKER_LIBS} ${OPENCV_OPENGL_SAMPLES_REQUIRED_DEPS} ${SAMPLE_LINKER_DEPS})
+    ocv_target_link_libraries(${tgt} ${OPENCV_LINKER_LIBS} ${OPENCV_OPENGL_SAMPLES_REQUIRED_DEPS})
+    if(sample_filename STREQUAL "opengl_interop.cpp")
+      ocv_target_link_libraries(${tgt} ${X11_LIBRARIES})
+      ocv_target_include_directories(${tgt} ${X11_INCLUDE_DIR})
+    endif()
  endforeach()
 endif()

--- a/samples/opengl/opengl.cpp
+++ b/samples/opengl/opengl.cpp