From 11b403900ea91b93384a9a216cdc13e7a8077dcb Mon Sep 17 00:00:00 2001 From: niko Date: Mon, 3 Sep 2012 18:07:31 +0800 Subject: [PATCH] add channel 3 for test/main.cpp add remap --- modules/ocl/src/filtering.cpp | 1 - modules/ocl/src/imgproc.cpp | 44 +- modules/ocl/src/initialization.cpp | 4 +- modules/ocl/src/kernels/imgproc_remap.cl | 528 +++++++++++++++++++++ modules/ocl/test/main.cpp | 1 + modules/ocl/test/test_arithm.cpp | 32 +- modules/ocl/test/test_blend.cpp | 2 +- modules/ocl/test/test_canny.cpp | 2 +- modules/ocl/test/test_filters.cpp | 14 +- modules/ocl/test/test_hog.cpp | 4 +- modules/ocl/test/test_imgproc.cpp | 200 +++++--- modules/ocl/test/test_match_template.cpp | 4 +- modules/ocl/test/test_matrix_operation.cpp | 8 +- modules/ocl/test/test_split_merge.cpp | 4 +- 14 files changed, 751 insertions(+), 97 deletions(-) diff --git a/modules/ocl/src/filtering.cpp b/modules/ocl/src/filtering.cpp index 02c13702f5..bbce18103f 100644 --- a/modules/ocl/src/filtering.cpp +++ b/modules/ocl/src/filtering.cpp @@ -1445,7 +1445,6 @@ void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat Ptr cv::ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType ) { - CV_Assert(dstType == srcType); Mat kx, ky; getDerivKernels( kx, ky, dx, dy, ksize, false, CV_32F ); return createSeparableLinearFilter_GPU(srcType, dstType, diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp index 5e6966b975..4052f23f96 100644 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@ -256,8 +256,9 @@ namespace cv Context *clCxt = src.clCxt; CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST || interpolation == INTER_CUBIC || interpolation== INTER_LANCZOS4); - CV_Assert((map1.type() == CV_16SC2)&&(!map2.data) || (map1.type()== CV_32FC2)&&!map2.data);//more - CV_Assert((!map2.data || map2.size()== map1.size())); + CV_Assert((map1.type() == CV_16SC2 && !map2.data) || (map1.type()== CV_32FC2 && !map2.data) || (map1.type() == CV_32FC1 && map2.type() == CV_32FC1)); + CV_Assert(!map2.data || map2.size()== map1.size()); + CV_Assert(dst.size() == map1.size()); dst.create(map1.size(), src.type()); @@ -279,6 +280,14 @@ namespace cv kernelName = "remapNNSConstant"; } + else if(map1.type() == CV_32FC1 && map2.type() == CV_32FC1) + { + if(interpolation == INTER_LINEAR && borderType == BORDER_CONSTANT) + kernelName = "remapLNF1Constant"; + else if (interpolation == INTER_NEAREST && borderType == BORDER_CONSTANT) + kernelName = "remapNNF1Constant"; + } + int channels = dst.channels(); int depth = dst.depth(); int type = src.type(); @@ -402,7 +411,36 @@ namespace cv { args.push_back( make_pair(sizeof(cl_float4),(void*)&borderValue)); } - } + } + if(map1.channels() == 1) + { + args.push_back( make_pair(sizeof(cl_mem),(void*)&dst.data)); + args.push_back( make_pair(sizeof(cl_mem),(void*)&src.data)); + // args.push_back( make_pair(sizeof(cl_mem),(void*)&srcImage)); //imageBuffer + args.push_back( make_pair(sizeof(cl_mem),(void*)&map1.data)); + args.push_back( make_pair(sizeof(cl_mem),(void*)&map2.data)); + args.push_back( make_pair(sizeof(cl_int),(void*)&dst.offset)); + args.push_back( make_pair(sizeof(cl_int),(void*)&src.offset)); + args.push_back( make_pair(sizeof(cl_int),(void*)&map1.offset)); + args.push_back( make_pair(sizeof(cl_int),(void*)&dst.step)); + args.push_back( make_pair(sizeof(cl_int),(void*)&src.step)); + args.push_back( make_pair(sizeof(cl_int),(void*)&map1.step)); + args.push_back( make_pair(sizeof(cl_int),(void*)&src.cols)); + args.push_back( make_pair(sizeof(cl_int),(void*)&src.rows)); + args.push_back( make_pair(sizeof(cl_int),(void*)&dst.cols)); + args.push_back( make_pair(sizeof(cl_int),(void*)&dst.rows)); + args.push_back( make_pair(sizeof(cl_int),(void*)&map1.cols)); + args.push_back( make_pair(sizeof(cl_int),(void*)&map1.rows)); + args.push_back( make_pair(sizeof(cl_int), (void *)&cols)); + if(src.clCxt -> impl -> double_support != 0) + { + args.push_back( make_pair(sizeof(cl_double4),(void*)&borderValue)); + } + else + { + args.push_back( make_pair(sizeof(cl_float4),(void*)&borderValue)); + } + } openCLExecuteKernel(clCxt,&imgproc_remap,kernelName,globalThreads,localThreads,args,src.channels(),src.depth()); } diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp index 5075d30938..42ad8bc120 100644 --- a/modules/ocl/src/initialization.cpp +++ b/modules/ocl/src/initialization.cpp @@ -391,7 +391,7 @@ namespace cv size_t region[3] = {width, height, 1}; if(kind == clMemcpyHostToDevice) { - if(dpitch == width || channels==3) + if(dpitch == width || channels==3 || height == 1) { openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)dst, CL_TRUE, 0, width*height, src, 0, NULL, NULL)); @@ -404,7 +404,7 @@ namespace cv } else if(kind == clMemcpyDeviceToHost) { - if(spitch == width || channels==3) + if(spitch == width || channels==3 || height == 1) { openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)src, CL_TRUE, 0, width*height, dst, 0, NULL, NULL)); diff --git a/modules/ocl/src/kernels/imgproc_remap.cl b/modules/ocl/src/kernels/imgproc_remap.cl index c81188bf20..5408ea8e4f 100644 --- a/modules/ocl/src/kernels/imgproc_remap.cl +++ b/modules/ocl/src/kernels/imgproc_remap.cl @@ -144,7 +144,53 @@ __kernel void remapNNFConstant_C1_D0(__global unsigned char* dst, __global unsig *d = dst_data; } +} + +__kernel void remapNNF1Constant_C1_D0(__global unsigned char* dst, __global unsigned char const * restrict src, + __global float * map1, __global float * map2, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, + int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows, int threadCols, F4 nVal) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < threadCols && y < dst_rows) + { + x = x << 2; + int gx = x - (dst_offset&3); + int4 Gx = (int4)(gx, gx+1, gx+2, gx+3); + + uchar4 nval =convert_uchar4(nVal); + uchar val = nval.s0; + int dstStart = (y * dst_step + x + dst_offset) - (dst_offset&3); + + int map1Start = y * map1_step + (x << 2) + map1_offset - ((dst_offset & 3) << 2); + float4 map1_data; + float4 map2_data; + + map1_data = *((__global float4 *)((__global char*)map1 + map1Start)); + map2_data = *((__global float4 *)((__global char*)map2 + map1Start)); + float8 map_data = (float8)(map1_data.s0, map2_data.s0, map1_data.s1, map2_data.s1, map1_data.s2, map2_data.s2, map1_data.s3, map2_data.s3); + int8 map_dataZ = convert_int8_sat_rte(map_data); + int4 srcIdx = map_dataZ.odd * src_step + map_dataZ.even + src_offset; + + uchar4 src_data; + + src_data.s0 = *(src + srcIdx.s0); + src_data.s1 = *(src + srcIdx.s1); + src_data.s2 = *(src + srcIdx.s2); + src_data.s3 = *(src + srcIdx.s3); + uchar4 dst_data; + dst_data = convert_uchar4(map_dataZ.even >= (int4)(src_cols) || map_dataZ.odd >= (int4)(src_rows)) ? (uchar4)(val) : src_data; + __global uchar4* d = (__global uchar4 *)(dst + dstStart); + + uchar4 dVal = *d; + + int4 con = (Gx >= 0 && Gx < dst_cols && y >= 0 && y < dst_rows); + + dst_data = (convert_uchar4(con) != convert_uchar4((int4)(0))) ? dst_data : dVal; + *d = dst_data; + } } @@ -244,6 +290,60 @@ __kernel void remapNNFConstant_C4_D0(__global unsigned char* dst, __global unsig } +__kernel void remapNNF1Constant_C4_D0(__global unsigned char* dst, __global unsigned char const * restrict src, + __global float * map1, __global float * map2, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, + int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows, int threadCols, F4 nVal) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < threadCols && y < dst_rows) + { + x = x << 4; + int gx = x - (dst_offset&15); + int16 Gx = (int16)(gx, gx+1, gx+2, gx+3, gx+4, gx+5, gx+6, gx+7, gx+8, gx+9, gx+10, gx+11, gx+12, gx+13, gx+14, gx+15); + + uchar4 nval =convert_uchar4(nVal); + + int dstStart = (y * dst_step + x + dst_offset) - (dst_offset&15); + + int map1Start = y * map1_step + x + map1_offset - (dst_offset&15); + float4 map1_data; + float4 map2_data; + + map1_data = *((__global float4 *)((__global char*)map1 + map1Start)); + map2_data = *((__global float4 *)((__global char*)map2 + map1Start)); + float8 map_data = (float8)(map1_data.s0, map2_data.s0, map1_data.s1, map2_data.s1, map1_data.s2, map2_data.s2, map1_data.s3, map2_data.s3); + int8 map1_dataZ = convert_int8_sat_rte(map_data); + + int4 srcIdx = map1_dataZ.odd * src_step + (map1_dataZ.even <<((int4)(2))) + src_offset; + uchar4 src_a, src_b, src_c, src_d; + src_a = *((__global uchar4 *)((__global char*)src + srcIdx.s0)); + src_b = *((__global uchar4 *)((__global char*)src + srcIdx.s1)); + src_c = *((__global uchar4 *)((__global char*)src + srcIdx.s2)); + src_d = *((__global uchar4 *)((__global char*)src + srcIdx.s3)); + + uchar16 dst_data; + uchar4 dst_a, dst_b, dst_c, dst_d; + dst_a = (map1_dataZ.s0 >= src_cols || map1_dataZ.s1 >= src_rows)? nval : src_a; + dst_b = (map1_dataZ.s2 >= src_cols || map1_dataZ.s3 >= src_rows)? nval : src_b; + dst_c = (map1_dataZ.s4 >= src_cols || map1_dataZ.s5 >= src_rows)? nval : src_c; + dst_d = (map1_dataZ.s6 >= src_cols || map1_dataZ.s7 >= src_rows)? nval : src_d; + + dst_data = (uchar16)(dst_a, dst_b, dst_c, dst_d); + __global uchar16* d = (__global uchar16 *)(dst + dstStart); + + uchar16 dVal = *d; + + int16 con = (Gx >= 0 && Gx < (dst_cols<<2) && y >= 0 && y < dst_rows); + dst_data = (convert_uchar16(con) != ((uchar16)(0))) ? dst_data : dVal; + + *d = dst_data; + + } + +} + __kernel void remapNNSConstant_C1_D5(__global float* dst, __global float const * restrict src, __global short * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, @@ -349,6 +449,61 @@ __kernel void remapNNFConstant_C1_D5(__global float* dst, __global float const * } +__kernel void remapNNF1Constant_C1_D5(__global float* dst, __global float const * restrict src, + __global float * map1, __global float * map2, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, + int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows ,int threadCols, F4 nVal) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < threadCols && y < dst_rows) + { + x = x << 4; + + int gx = x - (dst_offset&15); + int4 Gx = (int4)(gx, gx+4, gx+8, gx+12); + + float4 nval =convert_float4(nVal); + float val = nval.s0; + + int dstStart = y * dst_step + x + dst_offset - (dst_offset&15); + + int map1Start = y * map1_step + x + map1_offset - (dst_offset&15); + float4 map1_data; + float4 map2_data; + + map1_data = *((__global float4 *)((__global char*)map1 + map1Start)); + map2_data = *((__global float4 *)((__global char*)map2 + map1Start)); + float8 map_data = (float8)(map1_data.s0, map2_data.s0, map1_data.s1, map2_data.s1, map1_data.s2, map2_data.s2, map1_data.s3, map2_data.s3); + int8 map1_dataZ = convert_int8_sat_rte(map_data); + + int4 srcIdx = convert_int4(map1_dataZ.odd) * src_step + convert_int4(map1_dataZ.even <<(int4)(2)) + src_offset; + + float4 src_data; + src_data.s0 = *((__global float *)((__global char*)src + srcIdx.s0)); + src_data.s1 = *((__global float *)((__global char*)src + srcIdx.s1)); + src_data.s2 = *((__global float *)((__global char*)src + srcIdx.s2)); + src_data.s3 = *((__global float *)((__global char*)src + srcIdx.s3)); + float4 dst_data; + + dst_data.s0 = (map1_dataZ.s0 >= src_cols || map1_dataZ.s1 >= src_rows)? val : src_data.s0; + dst_data.s1 = (map1_dataZ.s2 >= src_cols || map1_dataZ.s3 >= src_rows)? val : src_data.s1; + dst_data.s2 = (map1_dataZ.s4 >= src_cols || map1_dataZ.s5 >= src_rows)? val : src_data.s2; + dst_data.s3 = (map1_dataZ.s6 >= src_cols || map1_dataZ.s7 >= src_rows)? val : src_data.s3; + + + __global float4* d = (__global float4 *)((__global uchar*)dst + dstStart); + + float4 dVal = *d; + + int4 con = (Gx >= 0 && Gx < (dst_cols<<2) && y >= 0 && y < dst_rows); + dst_data = (convert_float4(con) != (float4)(0)) ? dst_data : dVal; + + *d = dst_data; + + } + +} __kernel void remapNNSConstant_C4_D5(__global float * dst, __global float const * restrict src, __global short * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, @@ -389,6 +544,29 @@ __kernel void remapNNFConstant_C4_D5(__global float * dst, __global float const } } +__kernel void remapNNF1Constant_C4_D5(__global float * dst, __global float const * restrict src, + __global float * map1, __global float * map2, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, + int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < threadCols && y < dst_rows) + { + int dstIdx = y * dst_step + (x << 4) + dst_offset ; + int mapIdx = y * map1_step + (x << 2) + map1_offset ; + float map1_data = *((__global float *)((__global char*)map1 + mapIdx)); + float map2_data = *((__global float *)((__global char*)map2 + mapIdx)); + float2 map_data = (float2)(map1_data, map2_data); + int2 map1_dataZ = convert_int2_sat_rte(map_data); + int srcIdx = map1_dataZ.y * src_step + (map1_dataZ.x << 4) + src_offset; + float4 nval = convert_float4(nVal); + float4 src_data = *((__global float4 *)((__global uchar *)src + srcIdx)); + *((__global float4 *)((__global uchar*)dst + dstIdx)) = (map1_dataZ.x >= src_cols || map1_dataZ.y >= src_rows) ? nval : src_data; + } +} + + __kernel void remapLNFConstant_C1_D0(__global unsigned char* dst, __global unsigned char const * restrict src, __global float * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, @@ -493,6 +671,113 @@ __kernel void remapLNFConstant_C1_D0(__global unsigned char* dst, __global unsig } } +__kernel void remapLNF1Constant_C1_D0(__global unsigned char* dst, __global unsigned char const * restrict src, + __global float * map1, __global float * map2, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, + int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal) +{ + + int x = get_global_id(0); + int y = get_global_id(1); + if(x < threadCols && y < dst_rows) + { + x = x << 2; + int gx = x - (dst_offset&3); + int4 Gx = (int4)(gx, gx+1, gx+2, gx+3); + + uchar4 nval =convert_uchar4(nVal); + uchar val = nval.s0; + + + int dstStart = (y * dst_step + x + dst_offset) - (dst_offset&3); + + int map1Start = y * map1_step + (x << 2) + map1_offset - ((dst_offset & 3) << 2); + float4 map1_data; + float4 map2_data; + + map1_data = *((__global float4 *)((__global char*)map1 + map1Start)); + map2_data = *((__global float4 *)((__global char*)map2 + map1Start)); + float8 map_data = (float8)(map1_data.s0, map2_data.s0, map1_data.s1, map2_data.s1, map1_data.s2, map2_data.s2, map1_data.s3, map2_data.s3); + int8 map1_dataD = convert_int8(map_data); + float8 temp = map_data - convert_float8(map1_dataD); + + float4 u = temp.even; + float4 v = temp.odd; + float4 ud = (float4)(1.0) - u; + float4 vd = (float4)(1.0) - v; + //float8 map1_dataU = map1_dataD + 1; + + int4 map1_dataDx = map1_dataD.even; + int4 map1_dataDy = map1_dataD.odd; + int4 map1_dataDx1 = map1_dataDx + (int4)(1); + int4 map1_dataDy1 = map1_dataDy + (int4)(1); + + int4 src_StartU = map1_dataDy * src_step + map1_dataDx + src_offset; + int4 src_StartD = src_StartU + src_step; + /* + //not using the vload + int4 src_StartU1 = src_StartU + (int4)(1); + int4 src_StartD1 = src_StartD + (int4)(1); + + uchar4 a, b, c, d; + a.x = *(src_StartU.x + src); + a.y = *(src_StartU.y + src); + a.z = *(src_StartU.z + src); + a.w = *(src_StartU.w + src); + + b.x = *(src_StartU1.x + src); + b.y = *(src_StartU1.y + src); + b.z = *(src_StartU1.z + src); + b.w = *(src_StartU1.w + src); + + c.x = *(src_StartD.x + src); + c.y = *(src_StartD.y + src); + c.z = *(src_StartD.z + src); + c.w = *(src_StartD.w + src); + + d.x = *(src_StartD1.x + src); + d.y = *(src_StartD1.y + src); + d.z = *(src_StartD1.z + src); + d.w = *(src_StartD1.w + src); + */ + uchar2 aU, aD, bU, bD, cU, cD, dU, dD; + + aU = vload2(0, src + src_StartU.s0); + bU = vload2(0, src + src_StartU.s1); + cU = vload2(0, src + src_StartU.s2); + dU = vload2(0, src + src_StartU.s3); + aD = vload2(0, src + src_StartD.s0); + bD = vload2(0, src + src_StartD.s1); + cD = vload2(0, src + src_StartD.s2); + dD = vload2(0, src + src_StartD.s3); + + uchar4 a, b, c, d; + a = (uchar4)(aU.x, bU.x, cU.x, dU.x); + b = (uchar4)(aU.y, bU.y, cU.y, dU.y); + c = (uchar4)(aD.x, bD.x, cD.x, dD.x); + d = (uchar4)(aD.y, bD.y, cD.y, dD.y); + + int4 ac =(map1_dataDx >= src_cols || map1_dataDy >= src_rows || map1_dataDy< 0 || map1_dataDy < 0); + int4 bc =(map1_dataDx1 >= src_cols || map1_dataDy >= src_rows || map1_dataDx1 < 0 || map1_dataDy < 0); + int4 cc =(map1_dataDx >= src_cols || map1_dataDy1 >= src_rows || map1_dataDy1 < 0 || map1_dataDx < 0); + int4 dc =(map1_dataDx1 >= src_cols || map1_dataDy1 >= src_rows || map1_dataDy1 < 0 || map1_dataDy1 < 0); + a = (convert_uchar4(ac) == (uchar4)(0))? a : val; + b = (convert_uchar4(bc) == (uchar4)(0))? b : val; + c = (convert_uchar4(cc) == (uchar4)(0))? c : val; + d = (convert_uchar4(dc) == (uchar4)(0))? d : val; + + uchar4 dst_data = convert_uchar4_sat_rte((convert_float4(a))* ud * vd +(convert_float4(b))* u * vd + (convert_float4(c))* ud * v + (convert_float4(d)) * u * v ); + + __global uchar4* D = (__global uchar4 *)(dst + dstStart); + + uchar4 dVal = *D; + int4 con = (Gx >= 0 && Gx < dst_cols && y >= 0 && y < dst_rows); + dst_data = (convert_uchar4(con) != (uchar4)(0)) ? dst_data : dVal; + + *D = dst_data; + } +} + + __kernel void remapLNSConstant_C1_D0(__global unsigned char* dst, __global unsigned char const * restrict src, __global short * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal) @@ -626,6 +911,97 @@ __kernel void remapLNFConstant_C4_D0(__global unsigned char* dst, __global unsig *D = dst_data; } } + + +__kernel void remapLNF1Constant_C4_D0(__global unsigned char* dst, __global unsigned char const * restrict src, + __global float * map1, __global float * map2, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, + int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal) +{ + + int x = get_global_id(0); + int y = get_global_id(1); + if(x < threadCols && y < dst_rows) + { + x = x << 4; + int gx = x - (dst_offset&15); + int16 Gx = (int16)(gx, gx+1, gx+2, gx+3, gx+4, gx+5, gx+6, gx+7, gx+8, gx+9, gx+10, gx+11, gx+12, gx+13, gx+14, gx+15); + + uchar4 nval =convert_uchar4(nVal); + + int dstStart = (y * dst_step + x + dst_offset) - (dst_offset&15); + + int map1Start = y * map1_step + x + map1_offset - (dst_offset & 15); + float4 map1_data; + float4 map2_data; + + map1_data = *((__global float4 *)((__global char*)map1 + map1Start)); + map2_data = *((__global float4 *)((__global char*)map2 + map1Start)); + float8 map_data = (float8)(map1_data.s0, map2_data.s0, map1_data.s1, map2_data.s1, map1_data.s2, map2_data.s2, map1_data.s3, map2_data.s3); + int8 map1_dataD = convert_int8(map_data); + float8 temp = map_data - convert_float8(map1_dataD); + + float4 u = temp.even; + float4 v = temp.odd; + float4 ud = (float4)(1.0) - u; + float4 vd = (float4)(1.0) - v; + + //float8 map1_dataU = map1_dataD + 1; + + int4 map1_dataDx = map1_dataD.even; + int4 map1_dataDy = map1_dataD.odd; + int4 map1_dataDx1 = map1_dataDx + (int4)(1); + int4 map1_dataDy1 = map1_dataDy + (int4)(1); + + int4 src_StartU = map1_dataDy * src_step + (convert_int4(map1_dataDx) << (int4)(2)) + src_offset; + int4 src_StartD = src_StartU + src_step; + + uchar8 aU, bU, cU, dU, aD, bD, cD, dD; + aU = vload8(0, src + src_StartU.s0); + bU = vload8(0, src + src_StartU.s1); + cU = vload8(0, src + src_StartU.s2); + dU = vload8(0, src + src_StartU.s3); + aD = vload8(0, src + src_StartD.s0); + bD = vload8(0, src + src_StartD.s1); + cD = vload8(0, src + src_StartD.s2); + dD = vload8(0, src + src_StartD.s3); + uchar16 a, b, c, d; + a = (uchar16)(aU.s0123, bU.s0123, cU.s0123, dU.s0123); + b = (uchar16)(aU.s4567, bU.s4567, cU.s4567, dU.s4567); + c = (uchar16)(aD.s0123, bD.s0123, cD.s0123, dD.s0123); + d = (uchar16)(aD.s4567, bD.s4567, cD.s4567, dD.s4567); + int4 ac =(map1_dataDx >= src_cols || map1_dataDy >= src_rows || map1_dataDy< 0 || map1_dataDy < 0); + int4 bc =(map1_dataDx1 >= src_cols || map1_dataDy >= src_rows || map1_dataDx1 < 0 || map1_dataDy < 0); + int4 cc =(map1_dataDx >= src_cols || map1_dataDy1 >= src_rows || map1_dataDy1 < 0 || map1_dataDx < 0); + int4 dc =(map1_dataDx1 >= src_cols || map1_dataDy1 >= src_rows || map1_dataDy1 < 0 || map1_dataDy1 < 0); + + int16 acc = (int16)((int4)(ac.x), (int4)(ac.y), (int4)(ac.z), (int4)(ac.w)); + int16 bcc = (int16)((int4)(bc.x), (int4)(bc.y), (int4)(bc.z), (int4)(bc.w)); + int16 ccc = (int16)((int4)(cc.x), (int4)(cc.y), (int4)(cc.z), (int4)(cc.w)); + int16 dcc = (int16)((int4)(dc.x), (int4)(dc.y), (int4)(dc.z), (int4)(dc.w)); + + uchar16 val = (uchar16)(nval, nval, nval, nval); + a = (convert_uchar16(acc) == (uchar16)(0))? a : val; + b = (convert_uchar16(bcc) == (uchar16)(0))? b : val; + c = (convert_uchar16(ccc) == (uchar16)(0))? c : val; + d = (convert_uchar16(dcc) == (uchar16)(0))? d : val; + + float16 U = (float16)((float4)(u.x), (float4)(u.y), (float4)(u.z), (float4)(u.w)); + float16 V = (float16)((float4)(v.x), (float4)(v.y), (float4)(v.z), (float4)(v.w)); + float16 Ud = (float16)((float4)(ud.x), (float4)(ud.y), (float4)(ud.z), (float4)(ud.w)); + float16 Vd = (float16)((float4)(vd.x), (float4)(vd.y), (float4)(vd.z), (float4)(vd.w)); + + uchar16 dst_data = convert_uchar16_sat_rte((convert_float16(a))* Ud * Vd +(convert_float16(b))* U * Vd + (convert_float16(c))* Ud * V + (convert_float16(d)) * U * V ); + + __global uchar16* D = (__global uchar16 *)(dst + dstStart); + + uchar16 dVal = *D; + int16 con = (Gx >= 0 && Gx < (dst_cols<<2) && y >= 0 && y < dst_rows); + dst_data = (convert_uchar16(con) != (uchar16)(0)) ? dst_data : dVal; + + *D = dst_data; + } +} + __kernel void remapLNSConstant_C4_D0(__global unsigned char* dst, __global unsigned char const * restrict src, __global short * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows, int threadCols, F4 nVal) @@ -774,6 +1150,111 @@ __kernel void remapLNFConstant_C1_D5(__global float* dst, __global float const * *D = dst_data; } } + +__kernel void remapLNF1Constant_C1_D5(__global float* dst, __global float const * restrict src, + __global float * map1, __global float * map2, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, + int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal) +{ + + int x = get_global_id(0); + int y = get_global_id(1); + if(x < threadCols && y < dst_rows) + { + x = x << 4; + int gx = x - (dst_offset&15); + int4 Gx = (int4)(gx, gx+4, gx+8, gx+12); + + float4 nval =convert_float4(nVal); + float4 val = (float4)(nval.s0); + + int dstStart = y * dst_step + x + dst_offset - (dst_offset & 15); + int map1Start = y * map1_step + x + map1_offset - (dst_offset & 15); + float4 map1_data; + float4 map2_data; + + map1_data = *((__global float4 *)((__global char*)map1 + map1Start)); + map2_data = *((__global float4 *)((__global char*)map2 + map1Start)); + float8 map_data = (float8)(map1_data.s0, map2_data.s0, map1_data.s1, map2_data.s1, map1_data.s2, map2_data.s2, map1_data.s3, map2_data.s3); + int8 map1_dataD = convert_int8(map_data); + float8 temp = map_data - convert_float8(map1_dataD); + + float4 u = temp.even; + float4 v = temp.odd; + float4 ud = (float4)(1.0) - u; + float4 vd = (float4)(1.0) - v; + //float8 map1_dataU = map1_dataD + 1; + + int4 map1_dataDx = map1_dataD.even; + int4 map1_dataDy = map1_dataD.odd; + int4 map1_dataDx1 = map1_dataDx + (int4)(1); + int4 map1_dataDy1 = map1_dataDy + (int4)(1); + + int4 src_StartU = map1_dataDy * src_step + (map1_dataDx << (int4)(2)) + src_offset; + int4 src_StartD = src_StartU + src_step; + /* + //not using the vload + int4 src_StartU1 = src_StartU + (int4)(1); + int4 src_StartD1 = src_StartD + (int4)(1); + + float4 a, b, c, d; + a.x = *(src_StartU.x + src); + a.y = *(src_StartU.y + src); + a.z = *(src_StartU.z + src); + a.w = *(src_StartU.w + src); + + b.x = *(src_StartU1.x + src); + b.y = *(src_StartU1.y + src); + b.z = *(src_StartU1.z + src); + b.w = *(src_StartU1.w + src); + + c.x = *(src_StartD.x + src); + c.y = *(src_StartD.y + src); + c.z = *(src_StartD.z + src); + c.w = *(src_StartD.w + src); + + d.x = *(src_StartD1.x + src); + d.y = *(src_StartD1.y + src); + d.z = *(src_StartD1.z + src); + d.w = *(src_StartD1.w + src); + */ + float2 aU, aD, bU, bD, cU, cD, dU, dD; + + aU = vload2(0, (__global float *)((__global char*)src + src_StartU.s0)); + bU = vload2(0, (__global float *)((__global char*)src + src_StartU.s1)); + cU = vload2(0, (__global float *)((__global char*)src + src_StartU.s2)); + dU = vload2(0, (__global float *)((__global char*)src + src_StartU.s3)); + aD = vload2(0, (__global float *)((__global char*)src + src_StartD.s0)); + bD = vload2(0, (__global float *)((__global char*)src + src_StartD.s1)); + cD = vload2(0, (__global float *)((__global char*)src + src_StartD.s2)); + dD = vload2(0, (__global float *)((__global char*)src + src_StartD.s3)); + + float4 a, b, c, d; + a = (float4)(aU.x, bU.x, cU.x, dU.x); + b = (float4)(aU.y, bU.y, cU.y, dU.y); + c = (float4)(aD.x, bD.x, cD.x, dD.x); + d = (float4)(aD.y, bD.y, cD.y, dD.y); + + int4 ac =(map1_dataDx >= (int4)(src_cols) || map1_dataDy >= (int4)(src_rows) || map1_dataDy < (int4)(0) || map1_dataDy < (int4)(0)); + int4 bc =(map1_dataDx1 >= (int4)(src_cols) || map1_dataDy >= (int4)(src_rows) || map1_dataDx1 < (int4)(0) || map1_dataDy < (int4)(0)); + int4 cc =(map1_dataDx >= (int4)(src_cols) || map1_dataDy1 >= (int4)(src_rows) || map1_dataDy1 < (int4)(0) || map1_dataDx < (int4)(0)); + int4 dc =(map1_dataDx1 >= (int4)(src_cols) || map1_dataDy1 >= (int4)(src_rows) || map1_dataDy1 < (int4)(0) || map1_dataDy1 < (int4)(0)); + a = (convert_float4(ac) == (float4)(0))? a : val; + b = (convert_float4(bc) == (float4)(0))? b : val; + c = (convert_float4(cc) == (float4)(0))? c : val; + d = (convert_float4(dc) == (float4)(0))? d : val; + + float4 dst_data = a * ud * vd + b * u * vd + c * ud * v + d * u * v ; + + __global float4* D = (__global float4 *)((__global char*)dst + dstStart); + + float4 dVal = *D; + int4 con = (Gx >= 0 && Gx < (dst_cols << 2) && y >= 0 && y < dst_rows); + dst_data = (convert_float4(con) != (float4)(0)) ? dst_data : dVal; + + *D = dst_data; + } +} + __kernel void remapLNSConstant_C1_D5(__global float* dst, __global float const * restrict src, __global short * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows ,int threadCols, F4 nVal) @@ -873,6 +1354,53 @@ __kernel void remapLNFConstant_C4_D5(__global float * dst, __global float const } } +__kernel void remapLNF1Constant_C4_D5(__global float * dst, __global float const * restrict src, + __global float * map1, __global float * map2, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, + int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < threadCols && y < dst_rows) + { + int dstIdx = y * dst_step + (x << 4) + dst_offset ; + int mapIdx = y * map1_step + (x << 2) + map1_offset ; + float map1_data = *((__global float *)((__global char*)map1 + mapIdx)); + float map2_data = *((__global float *)((__global char*)map2 + mapIdx)); + float2 map_data = (float2)(map1_data, map2_data); + int2 map1_dataZ = convert_int2(map_data); + + int mX = map1_dataZ.x; + int mY = map1_dataZ.y; + int mX1 = map1_dataZ.x + 1; + int mY1 = map1_dataZ.y + 1; + + float u = map1_data - convert_float(map1_dataZ.x); + float v = map2_data - convert_float(map1_dataZ.y); + float ud = 1.0 - u; + float vd = 1.0 - v; + + int srcIdx = map1_dataZ.y * src_step + (map1_dataZ.x << 4) + src_offset; + float8 src_dataU = vload8(0,(__global float *)((__global char*)src + srcIdx)); + float8 src_dataD = vload8(0,(__global float *)((__global char*)src + srcIdx + src_step)); + + float4 a = src_dataU.lo; + float4 b = src_dataU.hi; + float4 c = src_dataD.lo; + float4 d = src_dataD.hi; + + float4 nval = convert_float4(nVal); + a = (mX >= src_cols || mY >= src_rows ) ? nval : a; + b = (mX1 >= src_cols || mY >= src_rows ) ? nval : b; + c = (mX >= src_cols || mY1 >= src_rows ) ? nval : c; + d = (mX1 >= src_cols || mY1 >= src_rows ) ? nval : d; + + float4 dst_data = a * ud * vd + b * u * vd + c * ud * v + d * u * v; + *((__global float4 *)((__global uchar*)dst + dstIdx)) = a * ud * vd + b * u * vd + c * ud * v + d * u * v ; + + } +} + /* //////////////////////////////////////////////////////////////////////// diff --git a/modules/ocl/test/main.cpp b/modules/ocl/test/main.cpp index 641663bcac..2fa8d26538 100644 --- a/modules/ocl/test/main.cpp +++ b/modules/ocl/test/main.cpp @@ -88,6 +88,7 @@ int main(int argc, char **argv) std::cout << "no device found\n"; return -1; } + //setDevice(oclinfo[1]); return RUN_ALL_TESTS(); } diff --git a/modules/ocl/test/test_arithm.cpp b/modules/ocl/test/test_arithm.cpp index 91c195756c..da18e24af5 100644 --- a/modules/ocl/test/test_arithm.cpp +++ b/modules/ocl/test/test_arithm.cpp @@ -1523,7 +1523,7 @@ TEST_P(AddWeighted, Mat) //********test**************** INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine( - Values(CV_8UC1, CV_8UC4), + Values(CV_8UC1, CV_8UC3, CV_8UC4), Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine( @@ -1535,40 +1535,40 @@ INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine( Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine( - Values(CV_8UC1, CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), + Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), Values(false))); INSTANTIATE_TEST_CASE_P(Arithm, Mul, Combine( - Values(CV_8UC1, CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), + Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, Div, Combine( - Values(CV_8UC1, CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), + Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine( - Values(CV_8UC1, CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), + Values(CV_8UC1,CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine( - Values(CV_32FC1, CV_32FC4), + Values(CV_32FC1, CV_32FC3,CV_32FC4), Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine( - Values(CV_32FC1, CV_32FC4), + Values(CV_32FC1, CV_32FC3,CV_32FC4), Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine( - Values(CV_32FC1, CV_32FC4), + Values(CV_32FC1, CV_32FC3,CV_32FC4), Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine( - Values(CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1), + Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32FC1), Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine( - Values(CV_8UC1, CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), + Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, MinMax, Combine( @@ -1588,30 +1588,30 @@ INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, Combine( Values(false))); -INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32FC1, CV_32FC4), Values(false))); +INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32FC1, CV_32FC3,CV_32FC4), Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine( - Values(CV_8UC1, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), Values(false))); + Values(CV_8UC1, CV_32SC1, CV_32SC4, CV_32FC1,CV_32FC3, CV_32FC4), Values(false))); //Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine( - Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC4), Values(false))); + Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC3,CV_32FC4), Values(false))); //Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine( - Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC4), Values(false))); + Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC3,CV_32FC4), Values(false))); //Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine( - Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC4), Values(false))); + Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC3,CV_32FC4), Values(false))); //Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(Values(CV_8UC1, CV_32SC1, CV_32FC1), Values(false))); // Values(false) is the reserved parameter -INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(Values(CV_32FC1, CV_32FC4), Values(false))); +INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(Values(CV_32FC1, CV_32FC3, CV_32FC4), Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, MagnitudeSqr, Combine( diff --git a/modules/ocl/test/test_blend.cpp b/modules/ocl/test/test_blend.cpp index 579b75fdc8..7d76d418d1 100644 --- a/modules/ocl/test/test_blend.cpp +++ b/modules/ocl/test/test_blend.cpp @@ -79,5 +79,5 @@ TEST_P(Blend, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine( DIFFERENT_SIZES, - testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4)) + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3),MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4)) )); \ No newline at end of file diff --git a/modules/ocl/test/test_canny.cpp b/modules/ocl/test/test_canny.cpp index e728c99be5..e6fb885cf4 100644 --- a/modules/ocl/test/test_canny.cpp +++ b/modules/ocl/test/test_canny.cpp @@ -103,6 +103,6 @@ TEST_P(Canny, Accuracy) EXPECT_MAT_SIMILAR(edges_gold, edges, 1e-2); } -INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny, testing::Combine( +INSTANTIATE_TEST_CASE_P(ocl_ImgProc, Canny, testing::Combine( testing::Values(AppertureSize(3), AppertureSize(5)), testing::Values(L2gradient(false), L2gradient(true)))); diff --git a/modules/ocl/test/test_filters.cpp b/modules/ocl/test/test_filters.cpp index 3774b10fa2..cb629bfd5b 100644 --- a/modules/ocl/test/test_filters.cpp +++ b/modules/ocl/test/test_filters.cpp @@ -822,35 +822,35 @@ TEST_P(GaussianBlur, Mat) -INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), +INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4), Values(cv::Size(3, 3), cv::Size(5, 5), cv::Size(7, 7)), Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101))); INSTANTIATE_TEST_CASE_P(Filters, Laplacian, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 3))); //INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3))); -INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(false))); +INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4), Values(false))); //INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3))); -INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(false))); +INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4), Values(false))); -INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), +INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2), Values(0, 1), Values(3, 5), Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE))); INSTANTIATE_TEST_CASE_P(Filter, Scharr, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(0, 1), Values(0, 1), + Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4), Values(0, 1), Values(0, 1), Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE))); INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4), Values(cv::Size(3, 3), cv::Size(5, 5)), Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE))); diff --git a/modules/ocl/test/test_hog.cpp b/modules/ocl/test/test_hog.cpp index d4e95a47ae..ed759f03c2 100644 --- a/modules/ocl/test/test_hog.cpp +++ b/modules/ocl/test/test_hog.cpp @@ -184,9 +184,9 @@ TEST_P(HOG, Detect) } -INSTANTIATE_TEST_CASE_P(GPU_ImgProc, HOG, testing::Combine( +INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, HOG, testing::Combine( testing::Values(cv::Size(64, 128), cv::Size(48, 96)), - testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)))); + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3),MatType(CV_8UC4)))); #endif //HAVE_OPENCL diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp index 775217b607..73ff7d18af 100644 --- a/modules/ocl/test/test_imgproc.cpp +++ b/modules/ocl/test/test_imgproc.cpp @@ -793,14 +793,12 @@ TEST_P(WarpPerspective, Mat) PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int) { int srcType; - // int dstType; int map1Type; int map2Type; cv::Scalar val; int interpolation; int bordertype; - //Scalar& borderValue; cv::Mat src; cv::Mat dst; @@ -833,9 +831,6 @@ PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int) //ocl mat for testing cv::ocl::oclMat gdst; - cv::ocl::oclMat gsrc; - cv::ocl::oclMat gmap1; - cv::ocl::oclMat gmap2; //ocl mat with roi cv::ocl::oclMat gsrc_roi; @@ -846,47 +841,57 @@ PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int) virtual void SetUp() { srcType = GET_PARAM(0); - // dstType = GET_PARAM(1); map1Type = GET_PARAM(1); map2Type = GET_PARAM(2); interpolation = GET_PARAM(3); bordertype = GET_PARAM(4); - // borderValue = GET_PARAM(6); //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE); //CV_Assert(devnums > 0); cv::RNG& rng = TS::ptr()->get_rng(); - //cv::Size size = cv::Size(20, 20); - cv::Size srcSize = cv::Size(100, 100); - cv::Size dstSize = cv::Size(100, 100); - cv::Size map1Size = cv::Size(100, 100); + cv::Size srcSize = cv::Size(MWIDTH, MHEIGHT); + cv::Size dstSize = cv::Size(MWIDTH, MHEIGHT); + cv::Size map1Size = cv::Size(MWIDTH, MHEIGHT); double min = 5, max = 16; if(srcType != nulltype) { src = randomMat(rng, srcSize, srcType, min, max, false); - gsrc = src; } - if((map1Type == CV_16SC2 && map2Type == nulltype) || (map1Type == CV_32FC2&& map2Type == nulltype)) + if((map1Type == CV_16SC2 && map2Type == nulltype) || (map1Type == CV_32FC2 && map2Type == nulltype)) { map1 = randomMat(rng, map1Size, map1Type, min, max, false); - gmap1 = map1; - } else if (map1Type == CV_32FC1 && map2Type == CV_32FC1) { map1 = randomMat(rng, map1Size, map1Type, min, max, false); map2 = randomMat(rng, map1Size, map1Type, min, max, false); - gmap1 = map1; - gmap2 = map2; } else + { cout<<"The wrong input type"<(); + for (int y = 0; y < src.rows; ++y) + { + const uchar* src_row = src.ptr(y); + + for (int x = 0; x < src.cols; ++x) + ++hist_row[src_row[x]]; + } +} + +PARAM_TEST_CASE(histTestBase, MatType, MatType) +{ + int type_src; + + //src mat + cv::Mat src; + cv::Mat dst_hist; + //set up roi + int roicols; + int roirows; + int srcx; + int srcy; + //src mat with roi + cv::Mat src_roi; + //ocl dst mat, dst_hist and gdst_hist don't have roi + cv::ocl::oclMat gdst_hist; + //ocl mat with roi + cv::ocl::oclMat gsrc_roi; +// std::vector oclinfo; + + virtual void SetUp() + { + type_src = GET_PARAM(0); + + cv::RNG &rng = TS::ptr()->get_rng(); + cv::Size size = cv::Size(MWIDTH, MHEIGHT); + + src = randomMat(rng, size, type_src, 0, 256, false); + +// int devnums = getDevice(oclinfo); +// CV_Assert(devnums > 0); + //if you want to use undefault device, set it here + //setDevice(oclinfo[0]); + } + + void random_roi() + { +#ifdef RANDOMROI + cv::RNG &rng = TS::ptr()->get_rng(); + + //randomize ROI + roicols = rng.uniform(1, src.cols); + roirows = rng.uniform(1, src.rows); + srcx = rng.uniform(0, src.cols - roicols); + srcy = rng.uniform(0, src.rows - roirows); +#else + roicols = src.cols; + roirows = src.rows; + srcx = 0; + srcy = 0; +#endif + src_roi = src(Rect(srcx, srcy, roicols, roirows)); + + gsrc_roi = src_roi; + } +}; +///////////////////////////calcHist/////////////////////////////////////// +struct calcHist : histTestBase {}; + +TEST_P(calcHist, Mat) +{ + for(int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + cv::Mat cpu_hist; + + calcHistGold(src_roi, dst_hist); + cv::ocl::calcHist(gsrc_roi, gdst_hist); + + gdst_hist.download(cpu_hist); + + char sss[1024]; + sprintf(sss, "roicols=%d,roirows=%d,srcx=%d,srcy=%d\n", roicols, roirows, srcx, srcy); + EXPECT_MAT_NEAR(dst_hist, cpu_hist, 0.0, sss); + } +} + + INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine( ONE_TYPE(CV_8UC1), NULL_TYPE, @@ -1371,13 +1452,13 @@ INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine( // // //INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine( -// Values(CV_8UC1, CV_8UC4, CV_32SC1), +// Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1), // NULL_TYPE, -// Values(CV_8UC1,CV_8UC4,CV_32SC1), +// Values(CV_8UC1,CV_8UC3,CV_8UC4,CV_32SC1), // NULL_TYPE, // NULL_TYPE, // Values(false))); // Values(false) is the reserved parameter -// + INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerMinEigenVal, Combine( Values(CV_8UC1,CV_32FC1), NULL_TYPE, @@ -1404,21 +1485,21 @@ INSTANTIATE_TEST_CASE_P(ImgprocTestBase, integral, Combine( Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Imgproc, WarpAffine, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4), Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR, (MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP)))); INSTANTIATE_TEST_CASE_P(Imgproc, WarpPerspective, Combine - (Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + (Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4), Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR, (MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP)))); INSTANTIATE_TEST_CASE_P(Imgproc, Resize, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(cv::Size()), + Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4), Values(cv::Size()), Values(0.5, 1.5, 2), Values(0.5, 1.5, 2), Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR))); @@ -1446,8 +1527,15 @@ INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftProc, Combine( )); INSTANTIATE_TEST_CASE_P(Imgproc, Remap, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(CV_16SC2, CV_32FC2), NULL_TYPE, + Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4), + Values(CV_32FC1, CV_16SC2, CV_32FC2),Values(-1,CV_32FC1), Values((int)cv::INTER_NEAREST, (int)cv::INTER_LINEAR), Values((int)cv::BORDER_CONSTANT))); + + +INSTANTIATE_TEST_CASE_P(histTestBase, calcHist, Combine( + ONE_TYPE(CV_8UC1), + ONE_TYPE(CV_32SC1) //no use +)); + #endif // HAVE_OPENCL diff --git a/modules/ocl/test/test_match_template.cpp b/modules/ocl/test/test_match_template.cpp index 08ea02985f..8eabe38326 100644 --- a/modules/ocl/test/test_match_template.cpp +++ b/modules/ocl/test/test_match_template.cpp @@ -159,7 +159,7 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U, testing::Combine( DIFFERENT_SIZES, testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/), - testing::Values(Channels(1), Channels(4)), + testing::Values(Channels(1), Channels(3),Channels(4)), ALL_TEMPLATE_METHODS ) ); @@ -167,6 +167,6 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U, INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine( DIFFERENT_SIZES, testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/), - testing::Values(Channels(1), Channels(4)), + testing::Values(Channels(1), Channels(3),Channels(4)), testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR)))); diff --git a/modules/ocl/test/test_matrix_operation.cpp b/modules/ocl/test/test_matrix_operation.cpp index edf3e564bd..7d8a2fb542 100644 --- a/modules/ocl/test/test_matrix_operation.cpp +++ b/modules/ocl/test/test_matrix_operation.cpp @@ -493,15 +493,15 @@ TEST_P(convertC3C4, Accuracy) } INSTANTIATE_TEST_CASE_P(MatrixOperation, ConvertTo, Combine( - Values(CV_8UC1, CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), - Values(CV_8UC1, CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4))); + Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), + Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4))); INSTANTIATE_TEST_CASE_P(MatrixOperation, CopyTo, Combine( - Values(CV_8UC1, CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), + Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(MatrixOperation, SetTo, Combine( - Values(CV_8UC1, CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), + Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(MatrixOperation, convertC3C4, Combine( diff --git a/modules/ocl/test/test_split_merge.cpp b/modules/ocl/test/test_split_merge.cpp index ee880c5281..91e65e41b0 100644 --- a/modules/ocl/test/test_split_merge.cpp +++ b/modules/ocl/test/test_split_merge.cpp @@ -364,11 +364,11 @@ TEST_P(Split, Accuracy) INSTANTIATE_TEST_CASE_P(SplitMerge, Merge, Combine( - Values(CV_8U, CV_32S, CV_32F), Values(1, 4))); + Values(CV_8U, CV_32S, CV_32F), Values(1, 3,4))); INSTANTIATE_TEST_CASE_P(SplitMerge, Split , Combine( - Values(CV_8U, CV_32S, CV_32F), Values(1, 4))); + Values(CV_8U, CV_32S, CV_32F), Values(1, 3,4))); #endif // HAVE_OPENCL