Merge remote-tracking branch 'upstream/master'

11 years ago · 41b8ab086b
parent 46ba9d30b9 e34a7caabc
commit 41b8ab086b
11 changed files with 274 additions and 36 deletions
--- a/modules/calib3d/src/opencl/stereobm.cl
+++ b/modules/calib3d/src/opencl/stereobm.cl
@ -147,6 +147,8 @@ __kernel void stereoBM(__global const uchar * leftptr, __global const uchar * ri
    __local int best_disp[2];
    __local int best_cost[2];
    best_cost[nthread] = MAX_VAL;
+    best_disp[nthread] = MAX_VAL;
+    barrier(CLK_LOCAL_MEM_FENCE);

    short costbuf[wsz];
    int head = 0;
@ -159,7 +161,7 @@ __kernel void stereoBM(__global const uchar * leftptr, __global const uchar * ri
    int costIdx = calcLocalIdx(lx, ly, d, sizeY);
    cost = costFunc + costIdx;

-    short tempcost = 0;
+    int tempcost = 0;
    if(x < cols-wsz2-mindisp && y < rows-wsz2)
    {
        int shift = 1*nthread + cols*(1-nthread);
@ -191,7 +193,7 @@ __kernel void stereoBM(__global const uchar * leftptr, __global const uchar * ri
    barrier(CLK_LOCAL_MEM_FENCE);

    if(best_cost[1] == tempcost)
-        best_disp[1] = ndisp - d - 1;
+        atomic_min(best_disp + 1, ndisp - d - 1);
    barrier(CLK_LOCAL_MEM_FENCE);

    int dispIdx = mad24(gy, disp_step, disp_offset + gx*(int)sizeof(short));
@ -209,6 +211,7 @@ __kernel void stereoBM(__global const uchar * leftptr, __global const uchar * ri
        y = (ly < sizeY) ? gy + shiftY + ly : rows;

        best_cost[nthread] = MAX_VAL;
+        best_disp[nthread] = MAX_VAL;
        barrier(CLK_LOCAL_MEM_FENCE);

        costIdx = calcLocalIdx(lx, ly, d, sizeY);
@ -227,12 +230,11 @@ __kernel void stereoBM(__global const uchar * leftptr, __global const uchar * ri
        barrier(CLK_LOCAL_MEM_FENCE);

        if(best_cost[nthread] == tempcost)
-            best_disp[nthread] = ndisp - d - 1;
+            atomic_min(best_disp + nthread, ndisp - d - 1);
        barrier(CLK_LOCAL_MEM_FENCE);

        int dispIdx = mad24(gy+ly, disp_step, disp_offset + (gx+lx)*(int)sizeof(short));
        disp = (__global short *)(dispptr + dispIdx);
-
        calcDisp(cost, disp, uniquenessRatio, mindisp, ndisp, 2*sizeY,
            best_disp + nthread, best_cost + nthread, d, x, y, cols, rows, wsz2);
        barrier(CLK_LOCAL_MEM_FENCE);
--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@ -414,24 +414,23 @@ const String& getBuildInformation()

 String format( const char* fmt, ... )
 {
-    char buf[1024];
+    AutoBuffer<char, 1024> buf;

-    va_list va;
-    va_start(va, fmt);
-    int len = vsnprintf(buf, sizeof(buf), fmt, va);
-    va_end(va);
-
-    if (len >= (int)sizeof(buf))
+    for ( ; ; )
    {
-        String s(len, '\0');
+        va_list va;
        va_start(va, fmt);
-        len = vsnprintf((char*)s.c_str(), len + 1, fmt, va);
-        (void)len;
+        int bsize = static_cast<int>(buf.size()),
+                len = vsnprintf((char *)buf, bsize, fmt, va);
        va_end(va);
-        return s;
-    }

-    return String(buf, len);
+        if (len < 0 || len >= bsize)
+        {
+            buf.resize(std::max(bsize << 1, len + 1));
+            continue;
+        }
+        return String((char *)buf, len);
+    }
 }

 String tempfile( const char* suffix )
--- a/modules/core/test/test_umat.cpp
+++ b/modules/core/test/test_umat.cpp
@ -795,4 +795,176 @@ TEST(UMat, ReadBufferRect)
    EXPECT_MAT_NEAR(t, t2, 0);
 }

+// Use iGPU or OPENCV_OPENCL_DEVICE=:CPU: to catch problem
+TEST(UMat, DISABLED_synchronization_map_unmap)
+{
+    class TestParallelLoopBody : public cv::ParallelLoopBody
+    {
+        UMat u_;
+    public:
+        TestParallelLoopBody(const UMat& u) : u_(u) { }
+        void operator() (const cv::Range& range) const
+        {
+            printf("range: %d, %d -- begin\n", range.start, range.end);
+            for (int i = 0; i < 10; i++)
+            {
+                printf("%d: %d map...\n", range.start, i);
+                Mat m = u_.getMat(cv::ACCESS_READ);
+
+                printf("%d: %d unmap...\n", range.start, i);
+                m.release();
+            }
+            printf("range: %d, %d -- end\n", range.start, range.end);
+        }
+    };
+    try
+    {
+        UMat u(1000, 1000, CV_32FC1);
+        parallel_for_(cv::Range(0, 2), TestParallelLoopBody(u));
+    }
+    catch (const cv::Exception& e)
+    {
+        FAIL() << "Exception: " << e.what();
+        ADD_FAILURE();
+    }
+    catch (...)
+    {
+        FAIL() << "Exception!";
+    }
+}
+
 } } // namespace cvtest::ocl
+
+TEST(UMat, DISABLED_bug_with_unmap)
+{
+    for (int i = 0; i < 20; i++)
+    {
+        try
+        {
+            Mat m = Mat(1000, 1000, CV_8UC1);
+            UMat u = m.getUMat(ACCESS_READ);
+            UMat dst;
+            add(u, Scalar::all(0), dst); // start async operation
+            u.release();
+            m.release();
+        }
+        catch (const cv::Exception& e)
+        {
+            printf("i = %d... %s\n", i, e.what());
+            ADD_FAILURE();
+        }
+        catch (...)
+        {
+            printf("i = %d...\n", i);
+            ADD_FAILURE();
+        }
+    }
+}
+
+TEST(UMat, DISABLED_bug_with_unmap_in_class)
+{
+    class Logic
+    {
+    public:
+        Logic() {}
+        void processData(InputArray input)
+        {
+            Mat m = input.getMat();
+            {
+                Mat dst;
+                m.convertTo(dst, CV_32FC1);
+                // some additional CPU-based per-pixel processing into dst
+                intermediateResult = dst.getUMat(ACCESS_READ);
+                std::cout << "data processed..." << std::endl;
+            } // problem is here: dst::~Mat()
+            std::cout << "leave ProcessData()" << std::endl;
+        }
+        UMat getResult() const { return intermediateResult; }
+    protected:
+        UMat intermediateResult;
+    };
+    try
+    {
+        Mat m = Mat(1000, 1000, CV_8UC1);
+        Logic l;
+        l.processData(m);
+        UMat result = l.getResult();
+    }
+    catch (const cv::Exception& e)
+    {
+        printf("exception... %s\n", e.what());
+        ADD_FAILURE();
+    }
+    catch (...)
+    {
+        printf("exception... \n");
+        ADD_FAILURE();
+    }
+}
+
+TEST(UMat, Test_same_behaviour_read_and_read)
+{
+    bool exceptionDetected = false;
+    try
+    {
+        UMat u(Size(10, 10), CV_8UC1);
+        Mat m = u.getMat(ACCESS_READ);
+        UMat dst;
+        add(u, Scalar::all(1), dst);
+    }
+    catch (...)
+    {
+        exceptionDetected = true;
+    }
+    ASSERT_FALSE(exceptionDetected); // no data race, 2+ reads are valid
+}
+
+// VP: this test (and probably others from same_behaviour series) is not valid in my opinion.
+TEST(UMat, DISABLED_Test_same_behaviour_read_and_write)
+{
+    bool exceptionDetected = false;
+    try
+    {
+        UMat u(Size(10, 10), CV_8UC1);
+        Mat m = u.getMat(ACCESS_READ);
+        add(u, Scalar::all(1), u);
+    }
+    catch (...)
+    {
+        exceptionDetected = true;
+    }
+    ASSERT_TRUE(exceptionDetected); // data race
+}
+
+TEST(UMat, DISABLED_Test_same_behaviour_write_and_read)
+{
+    bool exceptionDetected = false;
+    try
+    {
+        UMat u(Size(10, 10), CV_8UC1);
+        Mat m = u.getMat(ACCESS_WRITE);
+        UMat dst;
+        add(u, Scalar::all(1), dst);
+    }
+    catch (...)
+    {
+        exceptionDetected = true;
+    }
+    ASSERT_TRUE(exceptionDetected); // data race
+}
+
+TEST(UMat, DISABLED_Test_same_behaviour_write_and_write)
+{
+    bool exceptionDetected = false;
+    try
+    {
+        UMat u(Size(10, 10), CV_8UC1);
+        Mat m = u.getMat(ACCESS_WRITE);
+        add(u, Scalar::all(1), u);
+    }
+    catch (...)
+    {
+        exceptionDetected = true;
+    }
+    ASSERT_TRUE(exceptionDetected); // data race
+}
--- a/modules/features2d/include/opencv2/features2d.hpp
+++ b/modules/features2d/include/opencv2/features2d.hpp
@ -616,14 +616,14 @@ protected:
 };


-class CV_EXPORTS DenseFeatureDetector : public FeatureDetector
+class CV_EXPORTS_W DenseFeatureDetector : public FeatureDetector
 {
 public:
-    explicit DenseFeatureDetector( float initFeatureScale=1.f, int featureScaleLevels=1,
-                                   float featureScaleMul=0.1f,
-                                   int initXyStep=6, int initImgBound=0,
-                                   bool varyXyStepWithScale=true,
-                                   bool varyImgBoundWithScale=false );
+    CV_WRAP explicit DenseFeatureDetector( float initFeatureScale=1.f, int featureScaleLevels=1,
+                                           float featureScaleMul=0.1f,
+                                           int initXyStep=6, int initImgBound=0,
+                                           bool varyXyStepWithScale=true,
+                                           bool varyImgBoundWithScale=false );
    AlgorithmInfo* info() const;

 protected:
--- a/modules/highgui/include/opencv2/highgui.hpp
+++ b/modules/highgui/include/opencv2/highgui.hpp
@ -215,12 +215,13 @@ enum { IMREAD_UNCHANGED  = -1, // 8bit, color or not
       IMREAD_ANYCOLOR   = 4   // ?, any color
     };

-enum { IMWRITE_JPEG_QUALITY    = 1,
-       IMWRITE_PNG_COMPRESSION = 16,
-       IMWRITE_PNG_STRATEGY    = 17,
-       IMWRITE_PNG_BILEVEL     = 18,
-       IMWRITE_PXM_BINARY      = 32,
-       IMWRITE_WEBP_QUALITY    = 64
+enum { IMWRITE_JPEG_QUALITY     = 1,
+       IMWRITE_JPEG_PROGRESSIVE = 2,
+       IMWRITE_PNG_COMPRESSION  = 16,
+       IMWRITE_PNG_STRATEGY     = 17,
+       IMWRITE_PNG_BILEVEL      = 18,
+       IMWRITE_PXM_BINARY       = 32,
+       IMWRITE_WEBP_QUALITY     = 64
     };

 enum { IMWRITE_PNG_STRATEGY_DEFAULT      = 0,
--- a/modules/highgui/include/opencv2/highgui/highgui_c.h
+++ b/modules/highgui/include/opencv2/highgui/highgui_c.h
@ -220,6 +220,7 @@ CVAPI(CvMat*) cvLoadImageM( const char* filename, int iscolor CV_DEFAULT(CV_LOAD
 enum
 {
    CV_IMWRITE_JPEG_QUALITY =1,
+    CV_IMWRITE_JPEG_PROGRESSIVE =2,
    CV_IMWRITE_PNG_COMPRESSION =16,
    CV_IMWRITE_PNG_STRATEGY =17,
    CV_IMWRITE_PNG_BILEVEL =18,
--- a/modules/highgui/src/grfmt_jpeg.cpp
+++ b/modules/highgui/src/grfmt_jpeg.cpp
@ -598,6 +598,7 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )
        cinfo.in_color_space = channels > 1 ? JCS_RGB : JCS_GRAYSCALE;

        int quality = 95;
+        int progressive = 0;

        for( size_t i = 0; i < params.size(); i += 2 )
        {
@ -606,11 +607,18 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )
                quality = params[i+1];
                quality = MIN(MAX(quality, 0), 100);
            }
+
+            if( params[i] == CV_IMWRITE_JPEG_PROGRESSIVE )
+            {
+                progressive = params[i+1];
+            }
        }

        jpeg_set_defaults( &cinfo );
        jpeg_set_quality( &cinfo, quality,
                          TRUE /* limit to baseline-JPEG values */ );
+        if( progressive )
+            jpeg_simple_progression( &cinfo );
        jpeg_start_compress( &cinfo, TRUE );

        if( channels > 1 )
--- a/modules/highgui/test/test_grfmt.cpp
+++ b/modules/highgui/test/test_grfmt.cpp
@ -386,6 +386,30 @@ TEST(Highgui_Jpeg, encode_empty)

    ASSERT_THROW(cv::imencode(".jpg", img, jpegImg), cv::Exception);
 }
+
+TEST(Highgui_Jpeg, encode_decode_progressive_jpeg)
+{
+    cvtest::TS& ts = *cvtest::TS::ptr();
+    string input = string(ts.get_data_path()) + "../cv/shared/lena.png";
+    cv::Mat img = cv::imread(input);
+    ASSERT_FALSE(img.empty());
+
+    std::vector<int> params;
+    params.push_back(IMWRITE_JPEG_PROGRESSIVE);
+    params.push_back(1);
+
+    string output_progressive = cv::tempfile(".jpg");
+    EXPECT_NO_THROW(cv::imwrite(output_progressive, img, params));
+    cv::Mat img_jpg_progressive = cv::imread(output_progressive);
+
+    string output_normal = cv::tempfile(".jpg");
+    EXPECT_NO_THROW(cv::imwrite(output_normal, img));
+    cv::Mat img_jpg_normal = cv::imread(output_normal);
+
+    EXPECT_EQ(0, cv::norm(img_jpg_progressive, img_jpg_normal, NORM_INF));
+
+    remove(output_progressive.c_str());
+}
 #endif


--- a/modules/imgproc/perf/opencl/perf_imgproc.cpp
+++ b/modules/imgproc/perf/opencl/perf_imgproc.cpp
@ -95,6 +95,34 @@ OCL_PERF_TEST_P(CalcHistFixture, CalcHist, OCL_TEST_SIZES)
    SANITY_CHECK(hist);
 }

+///////////// calcHist ////////////////////////
+
+typedef TestBaseWithParam<Size> CalcBackProjFixture;
+
+OCL_PERF_TEST_P(CalcBackProjFixture, CalcBackProj, OCL_TEST_SIZES)
+{
+    const Size srcSize = GetParam();
+
+    const std::vector<int> channels(1, 0);
+    std::vector<float> ranges(2);
+    std::vector<int> histSize(1, 256);
+    ranges[0] = 0;
+    ranges[1] = 256;
+
+    checkDeviceMaxMemoryAllocSize(srcSize, CV_8UC1);
+
+    UMat src(srcSize, CV_8UC1), hist(256, 1, CV_32FC1), dst(srcSize, CV_8UC1);
+    declare.in(src, WARMUP_RNG).out(hist);
+
+    cv::calcHist(std::vector<UMat>(1, src), channels, noArray(), hist, histSize, ranges, false);
+
+    declare.in(src, WARMUP_RNG).out(dst);
+    OCL_TEST_CYCLE() cv::calcBackProject(std::vector<UMat>(1,src), channels, hist, dst, ranges, 1);
+
+    SANITY_CHECK_NOTHING();
+}
+
+
 /////////// CopyMakeBorder //////////////////////

 CV_ENUM(Border, BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT, BORDER_WRAP, BORDER_REFLECT_101)
--- a/modules/imgproc/src/filter.cpp
+++ b/modules/imgproc/src/filter.cpp
@ -42,7 +42,6 @@

 #include "precomp.hpp"
 #include "opencl_kernels.hpp"
-#include <sstream>

 /****************************************************************************************\
                                    Base Image Filter
@ -3197,6 +3196,8 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
    size_t tryWorkItems = maxWorkItemSizes[0];
    char cvt[2][40];

+    String kerStr = ocl::kernelToStr(kernelMatDataFloat, CV_32F);
+
    for ( ; ; )
    {
        size_t BLOCK_SIZE = tryWorkItems;
@ -3226,14 +3227,14 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,

        String opts = format("-D LOCAL_SIZE=%d -D BLOCK_SIZE_Y=%d -D cn=%d "
                             "-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d "
-                             "-D KERNEL_SIZE_Y2_ALIGNED=%d -D %s -D %s -D %s%s "
+                             "-D KERNEL_SIZE_Y2_ALIGNED=%d -D %s -D %s -D %s%s%s "
                             "-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D WT=%s -D WT1=%s "
                             "-D convertToWT=%s -D convertToDstT=%s",
                             (int)BLOCK_SIZE, (int)BLOCK_SIZE_Y, cn, anchor.x, anchor.y,
                             ksize.width, ksize.height, kernel_size_y2_aligned, borderMap[borderType],
                             extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION",
                             isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED",
-                             doubleSupport ? " -D DOUBLE_SUPPORT" : "",
+                             doubleSupport ? " -D DOUBLE_SUPPORT" : "", kerStr.c_str(),
                             ocl::typeToStr(type), ocl::typeToStr(sdepth), ocl::typeToStr(dtype),
                             ocl::typeToStr(ddepth), ocl::typeToStr(wtype), ocl::typeToStr(wdepth),
                             ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]),
@ -3255,7 +3256,7 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
    }

    _dst.create(sz, dtype);
-    UMat dst = _dst.getUMat(), kernalDataUMat(kernelMatDataFloat, true);
+    UMat dst = _dst.getUMat();

    int srcOffsetX = (int)((src.offset % src.step) / src.elemSize());
    int srcOffsetY = (int)(src.offset / src.step);
@ -3263,8 +3264,7 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
    int srcEndY = (isolated ? (srcOffsetY + sz.height) : wholeSize.height);

    k.args(ocl::KernelArg::PtrReadOnly(src), (int)src.step, srcOffsetX, srcOffsetY,
-           srcEndX, srcEndY, ocl::KernelArg::WriteOnly(dst),
-           ocl::KernelArg::PtrReadOnly(kernalDataUMat), (float)delta);
+           srcEndX, srcEndY, ocl::KernelArg::WriteOnly(dst), (float)delta);

    return k.run(2, globalsize, localsize, false);
 }
--- a/modules/imgproc/src/opencl/filter2D.cl
+++ b/modules/imgproc/src/opencl/filter2D.cl
@ -200,8 +200,11 @@ inline WT readSrcPixel(int2 pos, __global const uchar * srcptr, int src_step, co
    }
 }

+#define DIG(a) a,
+__constant WT1 kernelData[] = { COEFF };
+
 __kernel void filter2D(__global const uchar * srcptr, int src_step, int srcOffsetX, int srcOffsetY, int srcEndX, int srcEndY,
-                       __global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols, __constant WT1 * kernelData, float delta)
+                       __global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols, float delta)
 {
    const struct RectCoords srcCoords = { srcOffsetX, srcOffsetY, srcEndX, srcEndY }; // for non-isolated border: offsetX, offsetY, wholeX, wholeY