/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // @Authors // Niko Li, newlife20080214@gmail.com // Jia Haipeng, jiahaipeng95@gmail.com // Shengen Yan, yanshengen@gmail.com // Jiang Liyuan,jlyuan001.good@163.com // Rock Li, Rock.Li@amd.com // Zailong Wu, bullet@yeah.net // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other oclMaterials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #include "precomp.hpp" #include #ifdef HAVE_OPENCL using namespace cv; using namespace cv::ocl; using namespace cvtest; using namespace testing; using namespace std; PARAM_TEST_CASE(ArithmTestBase, MatType, bool) { int type; cv::Scalar val; //src mat cv::Mat mat1; cv::Mat mat2; cv::Mat mask; cv::Mat dst; cv::Mat dst1; //bak, for two outputs // set up roi int roicols; int roirows; int src1x; int src1y; int src2x; int src2y; int dstx; int dsty; int maskx; int masky; //src mat with roi cv::Mat mat1_roi; cv::Mat mat2_roi; cv::Mat mask_roi; cv::Mat dst_roi; cv::Mat dst1_roi; //bak //std::vector oclinfo; //ocl dst mat for testing cv::ocl::oclMat gdst_whole; cv::ocl::oclMat gdst1_whole; //bak //ocl mat with roi cv::ocl::oclMat gmat1; cv::ocl::oclMat gmat2; cv::ocl::oclMat gdst; cv::ocl::oclMat gdst1; //bak cv::ocl::oclMat gmask; virtual void SetUp() { type = GET_PARAM(0); cv::RNG& rng = TS::ptr()->get_rng(); cv::Size size(MWIDTH, MHEIGHT); mat1 = randomMat(rng, size, type, 5, 16, false); //mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false); mat2 = randomMat(rng, size, type, 5, 16, false); dst = randomMat(rng, size, type, 5, 16, false); dst1 = randomMat(rng, size, type, 5, 16, false); mask = randomMat(rng, size, CV_8UC1, 0, 2, false); cv::threshold(mask, mask, 0.5, 255., CV_8UC1); val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); //int devnums = getDevice(oclinfo); //CV_Assert(devnums>0); ////if you want to use undefault device, set it here ////setDevice(oclinfo[0]); //setBinpath(CLBINPATH); } void Has_roi(int b) { //cv::RNG& rng = TS::ptr()->get_rng(); if(b) { //randomize ROI roicols = mat1.cols-1; roirows = mat1.rows-1; src1x = 1; src2x = 1; src1y = 1; src2y = 1; dstx = 1; dsty =1; maskx =1; masky =1; }else { roicols = mat1.cols; roirows = mat1.rows; src1x = 0; src2x = 0; src1y = 0; src2y = 0; dstx = 0; dsty = 0; maskx =0; masky =0; }; mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); //mat2_roi = mat2(Rect(src2x,src2y,256,1)); mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows)); mask_roi = mask(Rect(maskx,masky,roicols,roirows)); dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); dst1_roi = dst1(Rect(dstx,dsty,roicols,roirows)); //gdst_whole = dst; //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); //gdst1_whole = dst1; //gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); //gmat1 = mat1_roi; //gmat2 = mat2_roi; //gmask = mask_roi; } }; ////////////////////////////////lut///////////////////////////////////////////////// struct Lut : ArithmTestBase {}; TEST_P(Lut, Mat) { cv::Mat mat2(3, 512, CV_8UC1); cv::RNG& rng = TS::ptr()->get_rng(); rng.fill(mat2, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(256)); #ifndef PRINT_KERNEL_RUN_TIME double totalcputick=0; double totalgputick=0; double totalgputick_kernel=0; double t0=0; double t1=0; double t2=0; for(int k=LOOPROISTART;kget_rng(); double s = rng.uniform(-10.0, 10.0); t0 = (double)cvGetTickCount();//cpu start cv::multiply(mat1_roi, mat2_roi, dst_roi, s); t0 = (double)cvGetTickCount() - t0;//cpu end t1 = (double)cvGetTickCount();//gpu start1 gdst_whole = dst; gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); gmat1 = mat1_roi; gmat2 = mat2_roi; t2=(double)cvGetTickCount();//kernel cv::ocl::multiply(gmat1, gmat2, gdst, s); t2 = (double)cvGetTickCount() - t2;//kernel cv::Mat cpu_dst; gdst_whole.download (cpu_dst);//download t1 = (double)cvGetTickCount() - t1;//gpu end1 if(j == 0) continue; totalgputick=t1+totalgputick; totalcputick=t0+totalcputick; totalgputick_kernel=t2+totalgputick_kernel; } if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; } #else for(int j = LOOPROISTART; j < LOOPROIEND; j ++) { Has_roi(j); cv::RNG& rng = TS::ptr()->get_rng(); double s = rng.uniform(-10.0, 10.0); gdst_whole = dst; gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); gmat1 = mat1_roi; gmat2 = mat2_roi; if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; cv::ocl::multiply(gmat1, gmat2, gdst, s); }; #endif } struct Div : ArithmTestBase {}; TEST_P(Div, Mat) { #ifndef PRINT_KERNEL_RUN_TIME double totalcputick=0; double totalgputick=0; double totalgputick_kernel=0; double t0=0; double t1=0; double t2=0; for(int k=LOOPROISTART;kget_rng(); double s = rng.uniform(-10.0, 10.0); t0 = (double)cvGetTickCount();//cpu start cv::divide(mat1_roi, mat2_roi, dst_roi, s); t0 = (double)cvGetTickCount() - t0;//cpu end t1 = (double)cvGetTickCount();//gpu start1 gdst_whole = dst; gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); gmat1 = mat1_roi; gmat2 = mat2_roi; t2=(double)cvGetTickCount();//kernel cv::ocl::divide(gmat1, gmat2, gdst, s); t2 = (double)cvGetTickCount() - t2;//kernel cv::Mat cpu_dst; gdst_whole.download (cpu_dst);//download t1 = (double)cvGetTickCount() - t1;//gpu end1 if(j == 0) continue; totalgputick=t1+totalgputick; totalcputick=t0+totalcputick; totalgputick_kernel=t2+totalgputick_kernel; } if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; } #else for(int j = LOOPROISTART; j < LOOPROIEND; j ++) { Has_roi(j); cv::RNG& rng = TS::ptr()->get_rng(); double s = rng.uniform(-10.0, 10.0); gdst_whole = dst; gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); gmat1 = mat1_roi; gmat2 = mat2_roi; if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; cv::ocl::divide(gmat1, gmat2, gdst, s); }; #endif } struct Absdiff : ArithmTestBase {}; TEST_P(Absdiff, Mat) { #ifndef PRINT_KERNEL_RUN_TIME double totalcputick=0; double totalgputick=0; double totalgputick_kernel=0; double t0=0; double t1=0; double t2=0; for(int k=LOOPROISTART;k::max(); maxVal = -std::numeric_limits::max(); for (int i = 0; i < mat1_roi.rows; ++i) for (int j = 0; j < mat1_roi.cols; ++j) { signed char val = mat1_roi.at(i, j); if (val < minVal) minVal = val; if (val > maxVal) maxVal = val; } } t0 = (double)cvGetTickCount() - t0;//cpu end t1 = (double)cvGetTickCount();//gpu start1 gmat1 = mat1_roi; double minVal_, maxVal_; t2=(double)cvGetTickCount();//kernel cv::ocl::minMax(gmat1, &minVal_, &maxVal_); t2 = (double)cvGetTickCount() - t2;//kernel t1 = (double)cvGetTickCount() - t1;//gpu end1 if(j == 0) continue; totalgputick=t1+totalgputick; totalcputick=t0+totalcputick; totalgputick_kernel=t2+totalgputick_kernel; } if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; } #else for(int j = LOOPROISTART; j < LOOPROIEND; j ++) { Has_roi(j); gmat1 = mat1_roi; double minVal_, maxVal_; if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; cv::ocl::minMax(gmat1, &minVal_, &maxVal_); }; #endif } TEST_P(MinMax, MASK) { #ifndef PRINT_KERNEL_RUN_TIME double totalcputick=0; double totalgputick=0; double totalgputick_kernel=0; double t0=0; double t1=0; double t2=0; for(int k=LOOPROISTART;k::max(); maxVal = -std::numeric_limits::max(); for (int i = 0; i < mat1_roi.rows; ++i) for (int j = 0; j < mat1_roi.cols; ++j) { signed char val = mat1_roi.at(i, j); unsigned char m = mask_roi.at(i, j); if (val < minVal && m) minVal = val; if (val > maxVal && m) maxVal = val; } } t0 = (double)cvGetTickCount() - t0;//cpu end t1 = (double)cvGetTickCount();//gpu start1 gmat1 = mat1_roi; gmask = mask_roi; double minVal_, maxVal_; t2=(double)cvGetTickCount();//kernel cv::ocl::minMax(gmat1, &minVal_, &maxVal_,gmask); t2 = (double)cvGetTickCount() - t2;//kernel t1 = (double)cvGetTickCount() - t1;//gpu end1 if(j == 0) continue; totalgputick=t1+totalgputick; totalcputick=t0+totalcputick; totalgputick_kernel=t2+totalgputick_kernel; } if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; } #else for(int j = LOOPROISTART; j < LOOPROIEND; j ++) { Has_roi(j); gmat1 = mat1_roi; gmask = mask_roi; double minVal_, maxVal_; if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; cv::ocl::minMax(gmat1, &minVal_, &maxVal_,gmask); }; #endif } struct MinMaxLoc : ArithmTestBase {}; TEST_P(MinMaxLoc, MAT) { #ifndef PRINT_KERNEL_RUN_TIME double totalcputick=0; double totalgputick=0; double totalgputick_kernel=0; double t0=0; double t1=0; double t2=0; for(int k=LOOPROISTART;k::max(); maxVal = -std::numeric_limits::max(); for (int i = 0; i < mat1_roi.rows; ++i) for (int j = 0; j < mat1_roi.cols; ++j) { signed char val = mat1_roi.at(i, j); if (val < minVal) { minVal = val; minLoc.x = j; minLoc.y = i; } if (val > maxVal) { maxVal = val; maxLoc.x = j; maxLoc.y = i; } } } t0 = (double)cvGetTickCount() - t0;//cpu end t1 = (double)cvGetTickCount();//gpu start1 gmat1 = mat1_roi; double minVal_, maxVal_; cv::Point minLoc_, maxLoc_; t2=(double)cvGetTickCount();//kernel cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, cv::ocl::oclMat()); t2 = (double)cvGetTickCount() - t2;//kernel t1 = (double)cvGetTickCount() - t1;//gpu end1 if(j == 0) continue; totalgputick=t1+totalgputick; totalcputick=t0+totalcputick; totalgputick_kernel=t2+totalgputick_kernel; } if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; } #else for(int j = LOOPROISTART; j < LOOPROIEND; j ++) { Has_roi(j); gmat1 = mat1_roi; double minVal_, maxVal_; cv::Point minLoc_, maxLoc_; if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, cv::ocl::oclMat()); }; #endif } TEST_P(MinMaxLoc, MASK) { #ifndef PRINT_KERNEL_RUN_TIME double totalcputick=0; double totalgputick=0; double totalgputick_kernel=0; double t0=0; double t1=0; double t2=0; for(int k=LOOPROISTART;k::max(); maxVal = -std::numeric_limits::max(); for (int i = 0; i < mat1_roi.rows; ++i) for (int j = 0; j < mat1_roi.cols; ++j) { signed char val = mat1_roi.at(i, j); unsigned char m = mask_roi.at(i ,j); if (val < minVal && m) { minVal = val; minLoc.x = j; minLoc.y = i; } if (val > maxVal && m) { maxVal = val; maxLoc.x = j; maxLoc.y = i; } } } t0 = (double)cvGetTickCount() - t0;//cpu end t1 = (double)cvGetTickCount();//gpu start1 gmat1 = mat1_roi; gmask = mask_roi; double minVal_, maxVal_; cv::Point minLoc_, maxLoc_; t2=(double)cvGetTickCount();//kernel cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, gmask); t2 = (double)cvGetTickCount() - t2;//kernel t1 = (double)cvGetTickCount() - t1;//gpu end1 if(j == 0) continue; totalgputick=t1+totalgputick; totalcputick=t0+totalcputick; totalgputick_kernel=t2+totalgputick_kernel; } if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; } #else for(int j = LOOPROISTART; j < LOOPROIEND; j ++) { Has_roi(j); gmat1 = mat1_roi; gmask = mask_roi; double minVal_, maxVal_; cv::Point minLoc_, maxLoc_; if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, gmask); }; #endif } struct Sum : ArithmTestBase {}; TEST_P(Sum, MAT) { #ifndef PRINT_KERNEL_RUN_TIME double totalcputick=0; double totalgputick=0; double totalgputick_kernel=0; double t0=0; double t1=0; double t2=0; for(int k=LOOPROISTART;k oclinfo; //ocl dst mat for testing cv::ocl::oclMat gdst_whole; cv::ocl::oclMat gdst1_whole; //bak //ocl mat with roi cv::ocl::oclMat gmat1; cv::ocl::oclMat gmat2; cv::ocl::oclMat gdst; cv::ocl::oclMat gdst1; //bak cv::ocl::oclMat gmask; virtual void SetUp() { //type = GET_PARAM(0); type = CV_8UC1; cv::RNG& rng = TS::ptr()->get_rng(); cv::Size size(MWIDTH, MHEIGHT); mat1 = randomMat(rng, size, type, 5, 16, false); //mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false); mat2 = randomMat(rng, size, type, 5, 16, false); dst = randomMat(rng, size, type, 5, 16, false); dst1 = randomMat(rng, size, type, 5, 16, false); mask = randomMat(rng, size, CV_8UC1, 0, 2, false); cv::threshold(mask, mask, 0.5, 255., CV_8UC1); val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); //int devnums = getDevice(oclinfo); //CV_Assert(devnums>0); ////if you want to use undefault device, set it here ////setDevice(oclinfo[0]); //setBinpath(CLBINPATH); } void Has_roi(int b) { //cv::RNG& rng = TS::ptr()->get_rng(); if(b) { //randomize ROI roicols = mat1.cols-1; roirows = mat1.rows-1; src1x = 1; src2x = 1; src1y = 1; src2y = 1; dstx = 1; dsty =1; maskx =1; masky =1; }else { roicols = mat1.cols; roirows = mat1.rows; src1x = 0; src2x = 0; src1y = 0; src2y = 0; dstx = 0; dsty = 0; maskx =0; masky =0; }; mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); //mat2_roi = mat2(Rect(src2x,src2y,256,1)); mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows)); mask_roi = mask(Rect(maskx,masky,roicols,roirows)); dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); dst1_roi = dst1(Rect(dstx,dsty,roicols,roirows)); //gdst_whole = dst; //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); //gdst1_whole = dst1; //gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); //gmat1 = mat1_roi; //gmat2 = mat2_roi; //gmask = mask_roi; } }; struct Compare : CompareTestBase {}; TEST_P(Compare, Mat) { if(mat1.type()==CV_8SC1) { cout << "\tUnsupported type\t\n"; } int cmp_codes[] = {CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE}; const char* cmp_str[] = {"CMP_EQ", "CMP_GT", "CMP_GE", "CMP_LT", "CMP_LE", "CMP_NE"}; int cmp_num = sizeof(cmp_codes) / sizeof(int); for (int i = 0; i < cmp_num; ++i) { #ifndef PRINT_KERNEL_RUN_TIME double totalcputick=0; double totalgputick=0; double totalgputick_kernel=0; double t0=0; double t1=0; double t2=0; for(int k=LOOPROISTART;k(i,j); float val2 = mat2.at(i,j); ((float *)(dst.data))[i*dst.step/4 +j]= val1 * val1 +val2 * val2; } t0 = (double)cvGetTickCount() - t0;//cpu end t1 = (double)cvGetTickCount();//gpu start1 cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst; t2=(double)cvGetTickCount();//kernel cv::ocl::magnitudeSqr(clmat1,clmat2, cldst); t2 = (double)cvGetTickCount() - t2;//kernel cv::Mat cpu_dst; cldst.download(cpu_dst);//download t1 = (double)cvGetTickCount() - t1;//gpu end1 if(j == 0) continue; totalgputick=t1+totalgputick; totalcputick=t0+totalcputick; totalgputick_kernel=t2+totalgputick_kernel; } if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; } #else for(int j = LOOPROISTART; j < LOOPROIEND; j ++) { Has_roi(j); cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst; if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; cv::ocl::magnitudeSqr(clmat1,clmat2, cldst); }; #endif } struct AddWeighted : ArithmTestBase {}; TEST_P(AddWeighted, Mat) { #ifndef PRINT_KERNEL_RUN_TIME double totalcputick=0; double totalgputick=0; double totalgputick_kernel=0; double t0=0; double t1=0; double t2=0; for(int k=LOOPROISTART;k