diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index 1effac2138..97da8c08d1 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -817,39 +817,44 @@ void cv::ocl::LUT(const oclMat &src, const oclMat &lut, oclMat &dst) ////////////////////////////////////////////////////////////////////////////// //////////////////////////////// exp log ///////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// + static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernelName, const char **kernelString) { - dst.create(src.size(), src.type()); - CV_Assert(src.cols == dst.cols && - src.rows == dst.rows ); - - CV_Assert(src.type() == dst.type()); - CV_Assert( src.type() == CV_32F || src.type() == CV_64F); - Context *clCxt = src.clCxt; - if(!clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F) + if (!clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) { - CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); + CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); return; } - //int channels = dst.oclchannels(); - int depth = dst.depth(); + + CV_Assert( src.depth() == CV_32F || src.depth() == CV_64F); + dst.create(src.size(), src.type()); + + int ddepth = dst.depth(); + int cols1 = src.cols * src.oclchannels(); + int srcoffset1 = src.offset / src.elemSize1(), dstoffset1 = dst.offset / dst.elemSize1(); + int srcstep1 = src.step1(), dststep1 = dst.step1(); size_t localThreads[3] = { 64, 4, 1 }; size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; + std::string buildOptions = format("-D srcT=%s", + ddepth == CV_32F ? "float" : "double"); + vector > args; - args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset )); args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); + args.push_back( make_pair( sizeof(cl_int), (void *)&cols1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows )); + args.push_back( make_pair( sizeof(cl_int), (void *)&srcoffset1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dstoffset1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&srcstep1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dststep1 )); - openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth); + openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, + args, src.oclchannels(), -1, buildOptions.c_str()); } + void cv::ocl::exp(const oclMat &src, oclMat &dst) { arithmetic_exp_log_run(src, dst, "arithm_exp", &arithm_exp); diff --git a/modules/ocl/src/opencl/arithm_exp.cl b/modules/ocl/src/opencl/arithm_exp.cl index 6f537a2870..b2143ba142 100644 --- a/modules/ocl/src/opencl/arithm_exp.cl +++ b/modules/ocl/src/opencl/arithm_exp.cl @@ -42,52 +42,70 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ + #if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif #endif - ////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////EXP////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////// -__kernel void arithm_exp_D5(int rows, int cols, int srcStep, int dstStep, int srcOffset, int dstOffset, __global float *src, __global float *dst) +__kernel void arithm_exp_C1(__global srcT *src, __global srcT *dst, + int cols1, int rows, + int srcOffset1, int dstOffset1, + int srcStep1, int dstStep1) { - int x = get_global_id(0); int y = get_global_id(1); - if(x < cols && y < rows) + if(x < cols1 && y < rows) { - x = x << 2; - int srcIdx = mad24( y, srcStep, x + srcOffset); - int dstIdx = mad24( y, dstStep, x + dstOffset); + int srcIdx = mad24(y, srcStep1, x + srcOffset1); + int dstIdx = mad24(y, dstStep1, x + dstOffset1); - float src_data = *((__global float *)((__global char *)src + srcIdx)); - float dst_data = exp(src_data); + dst[dstIdx] = exp(src[srcIdx]); + } +} + +__kernel void arithm_exp_C2(__global srcT *src, __global srcT *dst, + int cols1, int rows, + int srcOffset1, int dstOffset1, + int srcStep1, int dstStep1) +{ + int x1 = get_global_id(0) << 1; + int y = get_global_id(1); - *((__global float *)((__global char *)dst + dstIdx)) = dst_data; + if(x1 < cols1 && y < rows) + { + int srcIdx = mad24(y, srcStep1, x1 + srcOffset1); + int dstIdx = mad24(y, dstStep1, x1 + dstOffset1); + dst[dstIdx] = exp(src[srcIdx]); + dst[dstIdx + 1] = x1 + 1 < cols1 ? exp(src[srcIdx + 1]) : dst[dstIdx + 1]; } } -#if defined (DOUBLE_SUPPORT) -__kernel void arithm_exp_D6(int rows, int cols, int srcStep, int dstStep, int srcOffset, int dstOffset, __global double *src, __global double *dst) +__kernel void arithm_exp_C4(__global srcT *src, __global srcT *dst, + int cols1, int rows, + int srcOffset1, int dstOffset1, + int srcStep1, int dstStep1) { - int x = get_global_id(0); - int y = get_global_id(1); - if(x < cols && y < rows ) - { - x = x << 3; - int srcIdx = mad24( y, srcStep, x + srcOffset); - int dstIdx = mad24( y, dstStep, x + dstOffset); + int x1 = get_global_id(0) << 2; + int y = get_global_id(1); - double src_data = *((__global double *)((__global char *)src + srcIdx)); - double dst_data = exp(src_data); + if(x1 < cols1 && y < rows) + { + int srcIdx = mad24(y, srcStep1, x1 + srcOffset1); + int dstIdx = mad24(y, dstStep1, x1 + dstOffset1); - *((__global double *)((__global char *)dst + dstIdx )) = dst_data; - // dst[dstIdx] = exp(src[srcIdx]); - } + dst[dstIdx] = exp(src[srcIdx]); + dst[dstIdx + 1] = x1 + 1 < cols1 ? exp(src[srcIdx + 1]) : dst[dstIdx + 1]; + dst[dstIdx + 2] = x1 + 2 < cols1 ? exp(src[srcIdx + 2]) : dst[dstIdx + 2]; + dst[dstIdx + 3] = x1 + 3 < cols1 ? exp(src[srcIdx + 3]) : dst[dstIdx + 3]; + } } - -#endif diff --git a/modules/ocl/src/opencl/arithm_log.cl b/modules/ocl/src/opencl/arithm_log.cl index ea19c9d902..ef8c4dd04e 100644 --- a/modules/ocl/src/opencl/arithm_log.cl +++ b/modules/ocl/src/opencl/arithm_log.cl @@ -1,4 +1,3 @@ - /*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. @@ -43,52 +42,66 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ + #if defined (DOUBLE_SUPPORT) #pragma OPENCL EXTENSION cl_khr_fp64:enable #endif -#define INF_FLOAT -88.029694 -#define INF_DOUBLE -709.0895657128241 - - ////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////LOG///////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////// -__kernel void arithm_log_D5(int rows, int cols, int srcStep, int dstStep, int srcOffset, int dstOffset, __global float *src, __global float *dst) +__kernel void arithm_log_C1(__global srcT *src, __global srcT *dst, + int cols1, int rows, + int srcOffset1, int dstOffset1, + int srcStep1, int dstStep1) { int x = get_global_id(0); int y = get_global_id(1); - if(x < cols && y < rows ) + if(x < cols1 && y < rows) { - x = x << 2; - int srcIdx = mad24( y, srcStep, x + srcOffset); - int dstIdx = mad24( y, dstStep, x + dstOffset); + int srcIdx = mad24(y, srcStep1, x + srcOffset1); + int dstIdx = mad24(y, dstStep1, x + dstOffset1); - float src_data = *((__global float *)((__global char *)src + srcIdx)); - float dst_data = (src_data == 0) ? INF_FLOAT : log(fabs(src_data)); - - *((__global float *)((__global char *)dst + dstIdx)) = dst_data; + dst[dstIdx] = log(src[srcIdx]); } } -#if defined (DOUBLE_SUPPORT) -__kernel void arithm_log_D6(int rows, int cols, int srcStep, int dstStep, int srcOffset, int dstOffset, __global double *src, __global double *dst) +__kernel void arithm_log_C2(__global srcT *src, __global srcT *dst, + int cols1, int rows, + int srcOffset1, int dstOffset1, + int srcStep1, int dstStep1) { - int x = get_global_id(0); + int x1 = get_global_id(0) << 1; int y = get_global_id(1); - if(x < cols && y < rows ) + if(x1 < cols1 && y < rows) { - x = x << 3; - int srcIdx = mad24( y, srcStep, x + srcOffset); - int dstIdx = mad24( y, dstStep, x + dstOffset); + int srcIdx = mad24(y, srcStep1, x1 + srcOffset1); + int dstIdx = mad24(y, dstStep1, x1 + dstOffset1); - double src_data = *((__global double *)((__global char *)src + srcIdx)); - double dst_data = (src_data == 0) ? INF_DOUBLE : log(fabs(src_data)); - *((__global double *)((__global char *)dst + dstIdx)) = dst_data; + dst[dstIdx] = log(src[srcIdx]); + dst[dstIdx + 1] = x1 + 1 < cols1 ? log(src[srcIdx + 1]) : dst[dstIdx + 1]; + } +} + +__kernel void arithm_log_C4(__global srcT *src, __global srcT *dst, + int cols1, int rows, + int srcOffset1, int dstOffset1, + int srcStep1, int dstStep1) +{ + int x1 = get_global_id(0) << 2; + int y = get_global_id(1); + if(x1 < cols1 && y < rows) + { + int srcIdx = mad24(y, srcStep1, x1 + srcOffset1); + int dstIdx = mad24(y, dstStep1, x1 + dstOffset1); + + dst[dstIdx] = log(src[srcIdx]); + dst[dstIdx + 1] = x1 + 1 < cols1 ? log(src[srcIdx + 1]) : dst[dstIdx + 1]; + dst[dstIdx + 2] = x1 + 2 < cols1 ? log(src[srcIdx + 2]) : dst[dstIdx + 2]; + dst[dstIdx + 3] = x1 + 3 < cols1 ? log(src[srcIdx + 3]) : dst[dstIdx + 3]; } } -#endif