Merge remote-tracking branch 'upstream/3.4' into merge-3.4

4 years ago · cb51a155b2
parent 9c16408e91 d3be58b6d7
commit cb51a155b2
10 changed files with 201 additions and 18 deletions
--- a/doc/py_tutorials/py_feature2d/py_features_harris/py_features_harris.markdown
+++ b/doc/py_tutorials/py_feature2d/py_features_harris/py_features_harris.markdown
@ -40,12 +40,12 @@ using **cv.Sobel()**).
 Then comes the main part. After this, they created a score, basically an equation, which
 determines if a window can contain a corner or not.

-\f[R = det(M) - k(trace(M))^2\f]
+\f[R = \det(M) - k(\operatorname{trace}(M))^2\f]

 where
-    -   \f$det(M) = \lambda_1 \lambda_2\f$
-    -   \f$trace(M) = \lambda_1 + \lambda_2\f$
-    -   \f$\lambda_1\f$ and \f$\lambda_2\f$ are the eigenvalues of M
+    -   \f$\det(M) = \lambda_1 \lambda_2\f$
+    -   \f$\operatorname{trace}(M) = \lambda_1 + \lambda_2\f$
+    -   \f$\lambda_1\f$ and \f$\lambda_2\f$ are the eigenvalues of \f$M\f$

 So the magnitudes of these eigenvalues decide whether a region is a corner, an edge, or flat.

--- a/doc/py_tutorials/py_feature2d/py_shi_tomasi/py_shi_tomasi.markdown
+++ b/doc/py_tutorials/py_feature2d/py_shi_tomasi/py_shi_tomasi.markdown
@ -20,7 +20,7 @@ Harris Corner Detector. The scoring function in Harris Corner Detector was given

 Instead of this, Shi-Tomasi proposed:

-\f[R = min(\lambda_1, \lambda_2)\f]
+\f[R = \min(\lambda_1, \lambda_2)\f]

 If it is a greater than a threshold value, it is considered as a corner. If we plot it in
 \f$\lambda_1 - \lambda_2\f$ space as we did in Harris Corner Detector, we get an image as below:
@ -28,7 +28,7 @@ If it is a greater than a threshold value, it is considered as a corner. If we p
 ![image](images/shitomasi_space.png)

 From the figure, you can see that only when \f$\lambda_1\f$ and \f$\lambda_2\f$ are above a minimum value,
-\f$\lambda_{min}\f$, it is considered as a corner(green region).
+\f$\lambda_{\min}\f$, it is considered as a corner(green region).

 Code
 ----
--- a/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown
+++ b/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown
@ -156,7 +156,7 @@ sift = cv.SIFT_create()
 kp, des = sift.detectAndCompute(gray,None)
@endcode
 Here kp will be a list of keypoints and des is a numpy array of shape
-\f$Number\_of\_Keypoints \times 128\f$.
+\f$\text{(Number of Keypoints)} \times 128\f$.

 So we got keypoints, descriptors etc. Now we want to see how to match keypoints in different images.
 That we will learn in coming chapters.
--- a/modules/core/src/ocl.cpp
+++ b/modules/core/src/ocl.cpp
@ -3482,7 +3482,6 @@ struct Kernel::Impl
    void registerImageArgument(int arg, const Image2D& image)
    {
        CV_CheckGE(arg, 0, "");
-        CV_CheckLT(arg, (int)MAX_ARRS, "");
        if (arg < (int)shadow_images.size() && shadow_images[arg].ptr() != image.ptr())  // TODO future: replace ptr => impl (more strong check)
        {
            CV_Check(arg, !isInProgress, "ocl::Kernel: clearing of pending Image2D arguments is not allowed");
--- a/modules/core/test/ocl/test_opencl.cpp
+++ b/modules/core/test/ocl/test_opencl.cpp
@ -132,6 +132,73 @@ TEST(OpenCL, support_SPIR_programs)
    testOpenCLKernel(k);
 }

+
+TEST(OpenCL, image2Dcount_regression_19334)
+{
+    cv::ocl::Context ctx = cv::ocl::Context::getDefault();
+    if (!ctx.ptr())
+    {
+        throw cvtest::SkipTestException("OpenCL is not available");
+    }
+    cv::ocl::Device device = cv::ocl::Device::getDefault();
+    if (!device.compilerAvailable())
+    {
+        throw cvtest::SkipTestException("OpenCL compiler is not available");
+    }
+
+    std::string module_name; // empty to disable OpenCL cache
+
+    static const char* opencl_kernel_src =
+"__kernel void test_kernel(int a,\n"
+"                          __global const uchar* src0, int src0_step, int src0_offset, int src0_rows, int src0_cols,\n"
+"                          __global const uchar* src1, int src1_step, int src1_offset, int src1_rows, int src1_cols,\n"
+"                          __global const uchar* src2, int src2_step, int src2_offset, int src2_rows, int src2_cols,\n"
+"                          __read_only image2d_t image)\n"
+"{\n"
+"}";
+    cv::ocl::ProgramSource src(module_name, "test_opencl_image_arg", opencl_kernel_src, "");
+    cv::String errmsg;
+    cv::ocl::Program program(src, "", errmsg);
+    ASSERT_TRUE(program.ptr() != NULL);
+    cv::ocl::Kernel k("test_kernel", program);
+    ASSERT_FALSE(k.empty());
+
+    std::vector<UMat> images(4);
+    for (size_t i = 0; i < images.size(); ++i)
+        images[i] = UMat(10, 10, CV_8UC1);
+    cv::ocl::Image2D image;
+    try
+    {
+        cv::ocl::Image2D image_(images.back());
+        image = image_;
+    }
+    catch (const cv::Exception&)
+    {
+        throw cvtest::SkipTestException("OpenCL images are not supported");
+    }
+
+    int nargs = 0;
+    int a = 0;
+    nargs = k.set(nargs, a);
+    ASSERT_EQ(1, nargs);
+    nargs = k.set(nargs, images[0]);
+    ASSERT_EQ(6, nargs);
+    nargs = k.set(nargs, images[1]);
+    ASSERT_EQ(11, nargs);
+    nargs = k.set(nargs, images[2]);
+    ASSERT_EQ(16, nargs);
+
+    // do not throw (issue of #19334)
+    ASSERT_NO_THROW(nargs = k.set(nargs, image));
+    ASSERT_EQ(17, nargs);
+
+    // allow to replace image argument if kernel is not running
+    UMat image2(10, 10, CV_8UC1);
+    ASSERT_NO_THROW(nargs = k.set(16, cv::ocl::Image2D(image2)));
+    ASSERT_EQ(17, nargs);
+}
+
+
 TEST(OpenCL, move_construct_assign)
 {
    cv::ocl::Context ctx1 = cv::ocl::Context::getDefault();
--- a/modules/highgui/src/window_winrt_bridge.cpp
+++ b/modules/highgui/src/window_winrt_bridge.cpp
@ -271,7 +271,7 @@ void CvWindow::createSlider(cv::String name, int* val, int count, CvTrackbarCall
        // Image control is loaded. See callback implementation in CvWindow ctor.
        slider->Width = sliderDefaultWidth;
    }
-    slider->Value = *val;
+    slider->Value = val ? *val : 0;
    slider->Maximum = count;
    slider->Visibility = Windows::UI::Xaml::Visibility::Visible;
    slider->Margin = Windows::UI::Xaml::ThicknessHelper::FromLengths(10, 10, 10, 0);
--- a/modules/imgproc/src/color_lab.cpp
+++ b/modules/imgproc/src/color_lab.cpp
@ -1536,6 +1536,8 @@ static inline void trilinearPackedInterpolate(const v_uint16& inX, const v_uint1
 #endif // CV_SIMD


+
+
 struct RGB2Lab_b
 {
    typedef uchar channel_type;
@ -1571,6 +1573,69 @@ struct RGB2Lab_b
        }
    }

+#if CV_NEON
+    template <int n>
+    inline void rgb2lab_batch(const ushort* tab,
+                              const v_uint8 vRi, const v_uint8 vGi, const v_uint8 vBi,
+                              v_int32& vL, v_int32& va, v_int32& vb) const
+    {
+        // Define some scalar constants which we will make use of later
+        const int Lscale = (116*255+50)/100;
+        const int Lshift = -((16*255*(1 << lab_shift2) + 50)/100);
+        const int xyzDescaleShift = (1 << (lab_shift - 1));
+        const int labDescaleShift = (1 << (lab_shift2 - 1));
+        const int abShift = 128*(1 << lab_shift2);
+
+        const int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
+                  C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
+                  C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
+
+        // int R = tab[src[0]], G = tab[src[1]], B = tab[src[2]];
+        v_int32 vR(tab[v_extract_n<4*n+0>(vRi)], tab[v_extract_n<4*n+1>(vRi)],
+                   tab[v_extract_n<4*n+2>(vRi)], tab[v_extract_n<4*n+3>(vRi)]);
+        v_int32 vG(tab[v_extract_n<4*n+0>(vGi)], tab[v_extract_n<4*n+1>(vGi)],
+                   tab[v_extract_n<4*n+2>(vGi)], tab[v_extract_n<4*n+3>(vGi)]);
+        v_int32 vB(tab[v_extract_n<4*n+0>(vBi)], tab[v_extract_n<4*n+1>(vBi)],
+                   tab[v_extract_n<4*n+2>(vBi)], tab[v_extract_n<4*n+3>(vBi)]);
+
+        /* int fX = LabCbrtTab_b[CV_DESCALE(R*C0 + G*C1 + B*C2, lab_shift)];*/
+        v_int32 vfX = v_fma(vR, v_setall_s32(C0), v_setall_s32(xyzDescaleShift));
+        vfX = v_fma(vG, v_setall_s32(C1), vfX);
+        vfX = v_fma(vB, v_setall_s32(C2), vfX);
+        vfX = v_shr<lab_shift>(vfX);
+        vfX = v_int32(LabCbrtTab_b[v_extract_n<0>(vfX)], LabCbrtTab_b[v_extract_n<1>(vfX)],
+                      LabCbrtTab_b[v_extract_n<2>(vfX)], LabCbrtTab_b[v_extract_n<3>(vfX)]);
+
+        /* int fY = LabCbrtTab_b[CV_DESCALE(R*C3 + G*C4 + B*C5, lab_shift)]; */
+        v_int32 vfY = v_fma(vR, v_setall_s32(C3), v_setall_s32(xyzDescaleShift));
+        vfY = v_fma(vG, v_setall_s32(C4), vfY);
+        vfY = v_fma(vB, v_setall_s32(C5), vfY);
+        vfY = v_shr<lab_shift>(vfY);
+        vfY = v_int32(LabCbrtTab_b[v_extract_n<0>(vfY)], LabCbrtTab_b[v_extract_n<1>(vfY)],
+                      LabCbrtTab_b[v_extract_n<2>(vfY)], LabCbrtTab_b[v_extract_n<3>(vfY)]);
+
+        /* int fZ = LabCbrtTab_b[CV_DESCALE(R*C6 + G*C7 + B*C8, lab_shift)];*/
+        v_int32 vfZ = v_fma(vR, v_setall_s32(C6), v_setall_s32(xyzDescaleShift));
+        vfZ = v_fma(vG, v_setall_s32(C7), vfZ);
+        vfZ = v_fma(vB, v_setall_s32(C8), vfZ);
+        vfZ = v_shr<lab_shift>(vfZ);
+        vfZ = v_int32(LabCbrtTab_b[v_extract_n<0>(vfZ)], LabCbrtTab_b[v_extract_n<1>(vfZ)],
+                      LabCbrtTab_b[v_extract_n<2>(vfZ)], LabCbrtTab_b[v_extract_n<3>(vfZ)]);
+
+        /* int L = CV_DESCALE( Lscale*fY + Lshift, lab_shift2 );*/
+        vL = v_fma(vfY, v_setall_s32(Lscale), v_setall_s32(Lshift+labDescaleShift));
+        vL = v_shr<lab_shift2>(vL);
+
+        /* int a = CV_DESCALE( 500*(fX - fY) + 128*(1 << lab_shift2), lab_shift2 );*/
+        va = v_fma(vfX - vfY, v_setall_s32(500), v_setall_s32(abShift+labDescaleShift));
+        va = v_shr<lab_shift2>(va);
+
+        /* int b = CV_DESCALE( 200*(fY - fZ) + 128*(1 << lab_shift2), lab_shift2 );*/
+        vb = v_fma(vfY - vfZ, v_setall_s32(200), v_setall_s32(abShift+labDescaleShift));
+        vb = v_shr<lab_shift2>(vb);
+    }
+#endif // CV_NEON
+
    void operator()(const uchar* src, uchar* dst, int n) const
    {
        CV_INSTRUMENT_REGION();
@ -1585,6 +1650,45 @@ struct RGB2Lab_b

        i = 0;

+#if CV_NEON
+        // On each loop, we load nlanes of RGB/A v_uint8s and store nlanes of
+        // Lab v_uint8s
+        for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes,
+                src += scn*v_uint8::nlanes, dst += 3*v_uint8::nlanes )
+        {
+            // Load 4 batches of 4 src
+            v_uint8 vRi, vGi, vBi;
+            if(scn == 4)
+            {
+                v_uint8 vAi;
+                v_load_deinterleave(src, vRi, vGi, vBi, vAi);
+            }
+            else // scn == 3
+            {
+                v_load_deinterleave(src, vRi, vGi, vBi);
+            }
+
+            // Do 4 batches of 4 RGB2Labs
+            v_int32 vL0, va0, vb0;
+            rgb2lab_batch<0>(tab, vRi, vGi, vBi, vL0, va0, vb0);
+            v_int32 vL1, va1, vb1;
+            rgb2lab_batch<1>(tab, vRi, vGi, vBi, vL1, va1, vb1);
+            v_int32 vL2, va2, vb2;
+            rgb2lab_batch<2>(tab, vRi, vGi, vBi, vL2, va2, vb2);
+            v_int32 vL3, va3, vb3;
+            rgb2lab_batch<3>(tab, vRi, vGi, vBi, vL3, va3, vb3);
+
+            // Saturate, combine and store all batches
+            // dst[0] = saturate_cast<uchar>(L);
+            // dst[1] = saturate_cast<uchar>(a);
+            // dst[2] = saturate_cast<uchar>(b);
+            v_store_interleave(dst,
+                v_pack(v_pack_u(vL0, vL1), v_pack_u(vL2, vL3)),
+                v_pack(v_pack_u(va0, va1), v_pack_u(va2, va3)),
+                v_pack(v_pack_u(vb0, vb1), v_pack_u(vb2, vb3)));
+        }
+#endif // CV_NEON
+
 #if CV_SIMD
        const int vsize = v_uint8::nlanes;
        const int xyzDescaleShift = 1 << (lab_shift - 1);
--- a/modules/imgproc/src/phasecorr.cpp
+++ b/modules/imgproc/src/phasecorr.cpp
@ -82,9 +82,9 @@ static void magSpectrums( InputArray _src, OutputArray _dst)
            {
                if( k == 1 )
                    dataSrc += cols - 1, dataDst += cols - 1;
-                dataDst[0] = dataSrc[0]*dataSrc[0];
+                dataDst[0] = (float)std::abs(dataSrc[0]);
                if( rows % 2 == 0 )
-                    dataDst[(rows-1)*stepDst] = dataSrc[(rows-1)*stepSrc]*dataSrc[(rows-1)*stepSrc];
+                    dataDst[(rows-1)*stepDst] = (float)std::abs(dataSrc[(rows-1)*stepSrc]);

                for( j = 1; j <= rows - 2; j += 2 )
                {
@ -101,9 +101,9 @@ static void magSpectrums( InputArray _src, OutputArray _dst)
        {
            if( is_1d && cn == 1 )
            {
-                dataDst[0] = dataSrc[0]*dataSrc[0];
+                dataDst[0] = (float)std::abs(dataSrc[0]);
                if( cols % 2 == 0 )
-                    dataDst[j1] = dataSrc[j1]*dataSrc[j1];
+                    dataDst[j1] = (float)std::abs(dataSrc[j1]);
            }

            for( j = j0; j < j1; j += 2 )
@ -126,9 +126,9 @@ static void magSpectrums( InputArray _src, OutputArray _dst)
            {
                if( k == 1 )
                    dataSrc += cols - 1, dataDst += cols - 1;
-                dataDst[0] = dataSrc[0]*dataSrc[0];
+                dataDst[0] = std::abs(dataSrc[0]);
                if( rows % 2 == 0 )
-                    dataDst[(rows-1)*stepDst] = dataSrc[(rows-1)*stepSrc]*dataSrc[(rows-1)*stepSrc];
+                    dataDst[(rows-1)*stepDst] = std::abs(dataSrc[(rows-1)*stepSrc]);

                for( j = 1; j <= rows - 2; j += 2 )
                {
@ -145,9 +145,9 @@ static void magSpectrums( InputArray _src, OutputArray _dst)
        {
            if( is_1d && cn == 1 )
            {
-                dataDst[0] = dataSrc[0]*dataSrc[0];
+                dataDst[0] = std::abs(dataSrc[0]);
                if( cols % 2 == 0 )
-                    dataDst[j1] = dataSrc[j1]*dataSrc[j1];
+                    dataDst[j1] = std::abs(dataSrc[j1]);
            }

            for( j = j0; j < j1; j += 2 )
--- a/modules/imgproc/src/smooth.simd.hpp
+++ b/modules/imgproc/src/smooth.simd.hpp
@ -1236,8 +1236,12 @@ void hlineSmoothONa_yzy_a<uint16_t, ufixedpoint32>(const uint16_t* src, int cn,
        v_mul_expand(vx_load(src + pre_shift * cn), vx_setall_u16((uint16_t) *((uint32_t*)(m + pre_shift))), v_res0, v_res1);
        for (int j = 0; j < pre_shift; j ++)
        {
+            v_uint16 v_weight = vx_setall_u16((uint16_t) *((uint32_t*)(m + j)));
            v_uint32 v_add0, v_add1;
-            v_mul_expand(vx_load(src + j * cn) + vx_load(src + (n - 1 - j)*cn), vx_setall_u16((uint16_t) *((uint32_t*)(m + j))), v_add0, v_add1);
+            v_mul_expand(vx_load(src + j * cn), v_weight, v_add0, v_add1);
+            v_res0 += v_add0;
+            v_res1 += v_add1;
+            v_mul_expand(vx_load(src + (n - 1 - j)*cn), v_weight, v_add0, v_add1);
            v_res0 += v_add0;
            v_res1 += v_add1;
        }
--- a/modules/imgproc/test/test_smooth_bitexact.cpp
+++ b/modules/imgproc/test/test_smooth_bitexact.cpp
@ -220,6 +220,15 @@ TEST(GaussianBlur_Bitexact, regression_15015)
    ASSERT_EQ(0.0, cvtest::norm(dst, src, NORM_INF));
 }

+TEST(GaussianBlur_Bitexact, overflow_20121)
+{
+    Mat src(100, 100, CV_16UC1, Scalar(65535));
+    Mat dst;
+    GaussianBlur(src, dst, cv::Size(9, 9), 0.0);
+    double min_val;
+    minMaxLoc(dst, &min_val);
+    ASSERT_EQ(cvRound(min_val), 65535);
+}

 static void checkGaussianBlur_8Uvs32F(const Mat& src8u, const Mat& src32f, int N, double sigma)
 {