diff --git a/modules/ocl/perf/perf_imgproc.cpp b/modules/ocl/perf/perf_imgproc.cpp index a6859d09d4..5eb32b46c9 100644 --- a/modules/ocl/perf/perf_imgproc.cpp +++ b/modules/ocl/perf/perf_imgproc.cpp @@ -198,7 +198,7 @@ PERF_TEST_P(cornerHarrisFixture, cornerHarris, typedef TestBaseWithParam integralFixture; -PERF_TEST_P(integralFixture, DISABLED_integral, OCL_TYPICAL_MAT_SIZES) // TODO does not work properly +PERF_TEST_P(integralFixture, integral, OCL_TYPICAL_MAT_SIZES) { const Size srcSize = GetParam(); diff --git a/modules/ocl/src/haar.cpp b/modules/ocl/src/haar.cpp index aac3785e79..05a76aa84d 100644 --- a/modules/ocl/src/haar.cpp +++ b/modules/ocl/src/haar.cpp @@ -1141,7 +1141,6 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std CvSize sz; cv::Rect roi, roi2; - cv::Mat imgroi, imgroisq; cv::ocl::oclMat resizeroi, gimgroi, gimgroisq; for( int i = 0; i < m_loopcount; i++ ) diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp index 5e0f54fab5..ff3d95fdae 100644 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@ -975,10 +975,12 @@ namespace cv void integral(const oclMat &src, oclMat &sum, oclMat &sqsum) { CV_Assert(src.type() == CV_8UC1); - if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) + if(!src.clCxt->supportsFeature(ocl::FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "select device don't support double"); + return; } + int vlen = 4; int offset = src.offset / vlen; int pre_invalid = src.offset % vlen; @@ -986,50 +988,45 @@ namespace cv oclMat t_sum , t_sqsum; int w = src.cols + 1, h = src.rows + 1; - int depth; - if( src.cols * src.rows <= 2901 * 2901 ) //2901 is the maximum size for int when all values are 255 - { - t_sum.create(src.cols, src.rows, CV_32SC1); - sum.create(h, w, CV_32SC1); - } - else - { - //Use float to prevent overflow - t_sum.create(src.cols, src.rows, CV_32FC1); - sum.create(h, w, CV_32FC1); - } - t_sqsum.create(src.cols, src.rows, CV_32FC1); - sqsum.create(h, w, CV_32FC1); - depth = sum.depth(); - int sum_offset = sum.offset / vlen; - int sqsum_offset = sqsum.offset / vlen; - - vector > args; - args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&offset )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step)); - size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1}; - openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, depth); - args.clear(); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&sqsum.data )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum.step)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum_offset)); - size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1}; - openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, depth); + int depth = src.depth() == CV_8U ? CV_32S : CV_64F; + int type = CV_MAKE_TYPE(depth, 1); + + t_sum.create(src.cols, src.rows, type); + sum.create(h, w, type); + + t_sqsum.create(src.cols, src.rows, CV_32FC1); + sqsum.create(h, w, CV_32FC1); + + int sum_offset = sum.offset / vlen; + int sqsum_offset = sqsum.offset / vlen; + + vector > args; + args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&offset )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step)); + size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1}; + openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, depth); + + args.clear(); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&sqsum.data )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum_offset)); + size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1}; + openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, depth); } void integral(const oclMat &src, oclMat &sum) @@ -1042,39 +1039,35 @@ namespace cv oclMat t_sum; int w = src.cols + 1, h = src.rows + 1; - int depth; - if(src.cols * src.rows <= 2901 * 2901) - { - t_sum.create(src.cols, src.rows, CV_32SC1); - sum.create(h, w, CV_32SC1); - }else - { - t_sum.create(src.cols, src.rows, CV_32FC1); - sum.create(h, w, CV_32FC1); - } - depth = sum.depth(); - int sum_offset = sum.offset / vlen; - vector > args; - args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&offset )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step)); - size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1}; - openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, depth); - args.clear(); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset)); - size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1}; - openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, depth); + int depth = src.depth() == CV_8U ? CV_32S : CV_32F; + int type = CV_MAKE_TYPE(depth, 1); + + t_sum.create(src.cols, src.rows, type); + sum.create(h, w, type); + + int sum_offset = sum.offset / vlen; + vector > args; + args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&offset )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step)); + size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1}; + openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, depth); + + args.clear(); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset)); + size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1}; + openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, depth); } /////////////////////// corner ////////////////////////////// diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp index 4d297a7a4f..86c750f371 100644 --- a/modules/ocl/test/test_imgproc.cpp +++ b/modules/ocl/test/test_imgproc.cpp @@ -579,7 +579,19 @@ TEST_P(cornerHarris, Mat) struct integral : ImgprocTestBase {}; -TEST_P(integral, Mat) +TEST_P(integral, Mat1) +{ + for(int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + cv::ocl::integral(clmat1_roi, cldst_roi); + cv::integral(mat1_roi, dst_roi); + Near(0); + } +} + +TEST_P(integral, Mat2) { for(int j = 0; j < LOOP_TIMES; j++) {