diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index f8a069082d..997b2010fb 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -867,30 +867,32 @@ void cv::ocl::log(const oclMat &src, oclMat &dst) static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName) { - int channels = dst.oclchannels(); int depth = dst.depth(); - size_t vector_length = 1; - int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1); - int cols = divUp(dst.cols * channels + offset_cols, vector_length); - size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { cols, dst.rows, 1 }; + size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; + + int src1_step = src1.step / src1.elemSize(), src1_offset = src1.offset / src1.elemSize(); + int src2_step = src2.step / src2.elemSize(), src2_offset = src2.offset / src2.elemSize(); + int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize(); vector > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1_step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1_offset )); args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src2.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src2_step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src2_offset )); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols )); - openCLExecuteKernel(src1.clCxt, &arithm_magnitude, kernelName, globalThreads, localThreads, args, -1, depth); + const char * const channelMap[] = { "", "", "2", "4", "4" }; + std::string buildOptions = format("-D T=%s%s", depth == CV_32F ? "float" : "double", channelMap[dst.channels()]); + + openCLExecuteKernel(src1.clCxt, &arithm_magnitude, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str()); } void cv::ocl::magnitude(const oclMat &src1, const oclMat &src2, oclMat &dst) @@ -964,25 +966,29 @@ static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, o size_t localThreads[3] = { 64, 4, 1 }; size_t globalThreads[3] = { cols, src1.rows, 1 }; - int tmp = angleInDegrees ? 1 : 0; + int src1_step = src1.step / src1.elemSize1(), src1_offset = src1.offset / src1.elemSize1(); + int src2_step = src2.step / src2.elemSize1(), src2_offset = src2.offset / src2.elemSize1(); + int dst_mag_step = dst_mag.step / dst_mag.elemSize1(), dst_mag_offset = dst_mag.offset / dst_mag.elemSize1(); + int dst_cart_step = dst_cart.step / dst_cart.elemSize1(), dst_cart_offset = dst_cart.offset / dst_cart.elemSize1(); + vector > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1_step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1_offset )); args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src2.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src2_step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src2_offset )); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst_mag.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst_mag.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst_mag.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst_mag_step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst_mag_offset )); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst_cart.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst_cart.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst_cart.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst_cart_step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst_cart_offset )); args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows )); args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&tmp )); - openCLExecuteKernel(src1.clCxt, &arithm_cartToPolar, kernelName, globalThreads, localThreads, args, -1, depth); + openCLExecuteKernel(src1.clCxt, &arithm_cartToPolar, kernelName, globalThreads, localThreads, args, + -1, depth, angleInDegrees ? "-D DEGREE" : "-D RADIAN"); } void cv::ocl::cartToPolar(const oclMat &x, const oclMat &y, oclMat &mag, oclMat &angle, bool angleInDegrees) @@ -1008,37 +1014,38 @@ void cv::ocl::cartToPolar(const oclMat &x, const oclMat &y, oclMat &mag, oclMat static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &dst1, oclMat &dst2, bool angleInDegrees, string kernelName) { - int channels = src2.oclchannels(); - int depth = src2.depth(); - - int cols = src2.cols * channels; - int rows = src2.rows; + int channels = src2.oclchannels(), depth = src2.depth(); + int cols = src2.cols * channels, rows = src2.rows; size_t localThreads[3] = { 64, 4, 1 }; size_t globalThreads[3] = { cols, rows, 1 }; - int tmp = angleInDegrees ? 1 : 0; + int src1_step = src1.step / src1.elemSize1(), src1_offset = src1.offset / src1.elemSize1(); + int src2_step = src2.step / src2.elemSize1(), src2_offset = src2.offset / src2.elemSize1(); + int dst1_step = dst1.step / dst1.elemSize1(), dst1_offset = dst1.offset / dst1.elemSize1(); + int dst2_step = dst2.step / dst2.elemSize1(), dst2_offset = dst2.offset / dst2.elemSize1(); + vector > args; if (src1.data) { args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1_step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1_offset )); } args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src2.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src2_step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src2_offset )); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst1.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst1.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst1.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst1_step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst1_offset )); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst2.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst2.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst2.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst2_step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst2_offset )); args.push_back( make_pair( sizeof(cl_int), (void *)&rows )); args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&tmp )); - openCLExecuteKernel(src1.clCxt, &arithm_polarToCart, kernelName, globalThreads, localThreads, args, -1, depth); + openCLExecuteKernel(src1.clCxt, &arithm_polarToCart, kernelName, globalThreads, localThreads, + args, -1, depth, angleInDegrees ? "-D DEGREE" : "-D RADIAN"); } void cv::ocl::polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees) @@ -1623,38 +1630,37 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2, /////////////////////////////////// Pow ////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source) +static void arithmetic_pow_run(const oclMat &src, double p, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source) { int channels = dst.oclchannels(); int depth = dst.depth(); - size_t vector_length = 1; - int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1); - int cols = divUp(dst.cols * channels + offset_cols, vector_length); - int rows = dst.rows; - size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { cols, rows, 1 }; + size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; + + const char * const channelMap[] = { "", "", "2", "4", "4" }; + std::string buildOptions = format("-D T=%s%s", depth == CV_32F ? "float" : "double", channelMap[channels]); + + int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize(); + int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize(); - int dst_step1 = dst.cols * dst.elemSize(); vector > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src_step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset )); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols )); float pf = static_cast(p); - if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) args.push_back( make_pair( sizeof(cl_float), (void *)&pf )); else args.push_back( make_pair( sizeof(cl_double), (void *)&p )); - openCLExecuteKernel(src1.clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth); + openCLExecuteKernel(src.clCxt, source, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str()); } void cv::ocl::pow(const oclMat &x, double p, oclMat &y) diff --git a/modules/ocl/src/opencl/arithm_cartToPolar.cl b/modules/ocl/src/opencl/arithm_cartToPolar.cl index f634f2d421..c65f899b79 100644 --- a/modules/ocl/src/opencl/arithm_cartToPolar.cl +++ b/modules/ocl/src/opencl/arithm_cartToPolar.cl @@ -58,21 +58,21 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr __global float *src2, int src2_step, int src2_offset, __global float *dst1, int dst1_step, int dst1_offset, // magnitude __global float *dst2, int dst2_step, int dst2_offset, // cartToPolar - int rows, int cols, int angInDegree) + int rows, int cols) { int x = get_global_id(0); int y = get_global_id(1); if (x < cols && y < rows) { - int src1_index = mad24(y, src1_step, (x << 2) + src1_offset); - int src2_index = mad24(y, src2_step, (x << 2) + src2_offset); + int src1_index = mad24(y, src1_step, x + src1_offset); + int src2_index = mad24(y, src2_step, x + src2_offset); - int dst1_index = mad24(y, dst1_step, (x << 2) + dst1_offset); - int dst2_index = mad24(y, dst2_step, (x << 2) + dst2_offset); + int dst1_index = mad24(y, dst1_step, x + dst1_offset); + int dst2_index = mad24(y, dst2_step, x + dst2_offset); - float x = *((__global float *)((__global char *)src1 + src1_index)); - float y = *((__global float *)((__global char *)src2 + src2_index)); + float x = src1[src1_index]; + float y = src2[src2_index]; float x2 = x * x; float y2 = y * y; @@ -86,10 +86,12 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr float cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + FLT_EPSILON) + tmp : tmp1 - x*y/(y2 + 0.28f*x2 + FLT_EPSILON); - cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (180/CV_PI); +#ifdef DEGREE + cartToPolar *= (180/CV_PI); +#endif - *((__global float *)((__global char *)dst1 + dst1_index)) = magnitude; - *((__global float *)((__global char *)dst2 + dst2_index)) = cartToPolar; + dst1[dst1_index] = magnitude; + dst2[dst2_index] = cartToPolar; } } @@ -99,21 +101,21 @@ __kernel void arithm_cartToPolar_D6 (__global double *src1, int src1_step, int s __global double *src2, int src2_step, int src2_offset, __global double *dst1, int dst1_step, int dst1_offset, __global double *dst2, int dst2_step, int dst2_offset, - int rows, int cols, int angInDegree) + int rows, int cols) { int x = get_global_id(0); int y = get_global_id(1); if (x < cols && y < rows) { - int src1_index = mad24(y, src1_step, (x << 3) + src1_offset); - int src2_index = mad24(y, src2_step, (x << 3) + src2_offset); + int src1_index = mad24(y, src1_step, x + src1_offset); + int src2_index = mad24(y, src2_step, x + src2_offset); - int dst1_index = mad24(y, dst1_step, (x << 3) + dst1_offset); - int dst2_index = mad24(y, dst2_step, (x << 3) + dst2_offset); + int dst1_index = mad24(y, dst1_step, x + dst1_offset); + int dst2_index = mad24(y, dst2_step, x + dst2_offset); - double x = *((__global double *)((__global char *)src1 + src1_index)); - double y = *((__global double *)((__global char *)src2 + src2_index)); + double x = src1[src1_index]; + double y = src2[src2_index]; double x2 = x * x; double y2 = y * y; @@ -127,10 +129,12 @@ __kernel void arithm_cartToPolar_D6 (__global double *src1, int src1_step, int s double cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + DBL_EPSILON) + tmp : tmp1 - x*y/(y2 + 0.28f*x2 + DBL_EPSILON); - cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (180/CV_PI); +#ifdef DEGREE + cartToPolar *= (180/CV_PI); +#endif - *((__global double *)((__global char *)dst1 + dst1_index)) = magnitude; - *((__global double *)((__global char *)dst2 + dst2_index)) = cartToPolar; + dst1[dst1_index] = magnitude; + dst2[dst2_index] = cartToPolar; } } diff --git a/modules/ocl/src/opencl/arithm_magnitude.cl b/modules/ocl/src/opencl/arithm_magnitude.cl index 6fd2ac3832..1053efd00b 100644 --- a/modules/ocl/src/opencl/arithm_magnitude.cl +++ b/modules/ocl/src/opencl/arithm_magnitude.cl @@ -51,50 +51,24 @@ #endif #endif -__kernel void arithm_magnitude_D5 (__global float *src1, int src1_step, int src1_offset, - __global float *src2, int src2_step, int src2_offset, - __global float *dst, int dst_step, int dst_offset, - int rows, int cols) +__kernel void arithm_magnitude(__global T *src1, int src1_step, int src1_offset, + __global T *src2, int src2_step, int src2_offset, + __global T *dst, int dst_step, int dst_offset, + int rows, int cols) { int x = get_global_id(0); int y = get_global_id(1); if (x < cols && y < rows) { - int src1_index = mad24(y, src1_step, (x << 2) + src1_offset); - int src2_index = mad24(y, src2_step, (x << 2) + src2_offset); - int dst_index = mad24(y, dst_step, (x << 2) + dst_offset); + int src1_index = mad24(y, src1_step, x + src1_offset); + int src2_index = mad24(y, src2_step, x + src2_offset); + int dst_index = mad24(y, dst_step, x + dst_offset); - float data1 = *((__global float *)((__global char *)src1 + src1_index)); - float data2 = *((__global float *)((__global char *)src2 + src2_index)); + T data1 = src1[src1_index]; + T data2 = src2[src2_index]; - float tmp = sqrt(data1 * data1 + data2 * data2); - - *((__global float *)((__global char *)dst + dst_index)) = tmp; - } -} - -#if defined (DOUBLE_SUPPORT) -__kernel void arithm_magnitude_D6 (__global double *src1, int src1_step, int src1_offset, - __global double *src2, int src2_step, int src2_offset, - __global double *dst, int dst_step, int dst_offset, - int rows, int cols) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - int src1_index = mad24(y, src1_step, (x << 3) + src1_offset); - int src2_index = mad24(y, src2_step, (x << 3) + src2_offset); - int dst_index = mad24(y, dst_step, (x << 3) + dst_offset); - - double data1 = *((__global double *)((__global char *)src1 + src1_index)); - double data2 = *((__global double *)((__global char *)src2 + src2_index)); - - double tmp = sqrt(data1 * data1 + data2 * data2); - - *((__global double *)((__global char *)dst + dst_index)) = tmp; + T tmp = hypot(data1, data2); + dst[dst_index] = tmp; } } -#endif diff --git a/modules/ocl/src/opencl/arithm_polarToCart.cl b/modules/ocl/src/opencl/arithm_polarToCart.cl index f3ec3117dc..024f1f0ee4 100644 --- a/modules/ocl/src/opencl/arithm_polarToCart.cl +++ b/modules/ocl/src/opencl/arithm_polarToCart.cl @@ -57,33 +57,38 @@ ///////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////polarToCart with magnitude////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////// + __kernel void arithm_polarToCart_mag_D5 (__global float *src1, int src1_step, int src1_offset,//magnitue __global float *src2, int src2_step, int src2_offset,//angle __global float *dst1, int dst1_step, int dst1_offset, __global float *dst2, int dst2_step, int dst2_offset, - int rows, int cols, int angInDegree) + int rows, int cols) { int x = get_global_id(0); int y = get_global_id(1); if (x < cols && y < rows) { - int src1_index = mad24(y, src1_step, (x << 2) + src1_offset); - int src2_index = mad24(y, src2_step, (x << 2) + src2_offset); + int src1_index = mad24(y, src1_step, x + src1_offset); + int src2_index = mad24(y, src2_step, x + src2_offset); - int dst1_index = mad24(y, dst1_step, (x << 2) + dst1_offset); - int dst2_index = mad24(y, dst2_step, (x << 2) + dst2_offset); + int dst1_index = mad24(y, dst1_step, x + dst1_offset); + int dst2_index = mad24(y, dst2_step, x + dst2_offset); - float x = *((__global float *)((__global char *)src1 + src1_index)); - float y = *((__global float *)((__global char *)src2 + src2_index)); + float x = src1[src1_index]; + float y = src2[src2_index]; +#ifdef DEGREE float ascale = CV_PI/180.0f; - float alpha = angInDegree == 1 ? y * ascale : y; + float alpha = y * ascale; +#else + float alpha = y; +#endif float a = cos(alpha) * x; float b = sin(alpha) * x; - *((__global float *)((__global char *)dst1 + dst1_index)) = a; - *((__global float *)((__global char *)dst2 + dst2_index)) = b; + dst1[dst1_index] = a; + dst2[dst2_index] = b; } } @@ -92,29 +97,33 @@ __kernel void arithm_polarToCart_mag_D6 (__global double *src1, int src1_step, i __global double *src2, int src2_step, int src2_offset,//angle __global double *dst1, int dst1_step, int dst1_offset, __global double *dst2, int dst2_step, int dst2_offset, - int rows, int cols, int angInDegree) + int rows, int cols) { int x = get_global_id(0); int y = get_global_id(1); if (x < cols && y < rows) { - int src1_index = mad24(y, src1_step, (x << 3) + src1_offset); - int src2_index = mad24(y, src2_step, (x << 3) + src2_offset); + int src1_index = mad24(y, src1_step, x + src1_offset); + int src2_index = mad24(y, src2_step, x + src2_offset); - int dst1_index = mad24(y, dst1_step, (x << 3) + dst1_offset); - int dst2_index = mad24(y, dst2_step, (x << 3) + dst2_offset); + int dst1_index = mad24(y, dst1_step, x + dst1_offset); + int dst2_index = mad24(y, dst2_step, x + dst2_offset); - double x = *((__global double *)((__global char *)src1 + src1_index)); - double y = *((__global double *)((__global char *)src2 + src2_index)); + double x = src1[src1_index]; + double y = src2[src2_index]; +#ifdef DEGREE float ascale = CV_PI/180.0; - double alpha = angInDegree == 1 ? y * ascale : y; + float alpha = y * ascale; +#else + float alpha = y; +#endif double a = cos(alpha) * x; double b = sin(alpha) * x; - *((__global double *)((__global char *)dst1 + dst1_index)) = a; - *((__global double *)((__global char *)dst2 + dst2_index)) = b; + dst1[dst1_index] = a; + dst2[dst2_index] = b; } } #endif @@ -122,30 +131,35 @@ __kernel void arithm_polarToCart_mag_D6 (__global double *src1, int src1_step, i ///////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////polarToCart without magnitude////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////// + __kernel void arithm_polarToCart_D5 (__global float *src, int src_step, int src_offset,//angle __global float *dst1, int dst1_step, int dst1_offset, __global float *dst2, int dst2_step, int dst2_offset, - int rows, int cols, int angInDegree) + int rows, int cols) { int x = get_global_id(0); int y = get_global_id(1); if (x < cols && y < rows) { - int src_index = mad24(y, src_step, (x << 2) + src_offset); + int src_index = mad24(y, src_step, x + src_offset); - int dst1_index = mad24(y, dst1_step, (x << 2) + dst1_offset); - int dst2_index = mad24(y, dst2_step, (x << 2) + dst2_offset); + int dst1_index = mad24(y, dst1_step, x + dst1_offset); + int dst2_index = mad24(y, dst2_step, x + dst2_offset); - float y = *((__global float *)((__global char *)src + src_index)); + float y = src[src_index]; +#ifdef DEGREE float ascale = CV_PI/180.0f; - float alpha = angInDegree == 1 ? y * ascale : y; + float alpha = y * ascale; +#else + float alpha = y; +#endif float a = cos(alpha); float b = sin(alpha); - *((__global float *)((__global char *)dst1 + dst1_index)) = a; - *((__global float *)((__global char *)dst2 + dst2_index)) = b; + dst1[dst1_index] = a; + dst2[dst2_index] = b; } } @@ -153,27 +167,31 @@ __kernel void arithm_polarToCart_D5 (__global float *src, int src_step, int sr __kernel void arithm_polarToCart_D6 (__global float *src, int src_step, int src_offset,//angle __global float *dst1, int dst1_step, int dst1_offset, __global float *dst2, int dst2_step, int dst2_offset, - int rows, int cols, int angInDegree) + int rows, int cols) { int x = get_global_id(0); int y = get_global_id(1); if (x < cols && y < rows) { - int src_index = mad24(y, src_step, (x << 3) + src_offset); + int src_index = mad24(y, src_step, x + src_offset); - int dst1_index = mad24(y, dst1_step, (x << 3) + dst1_offset); - int dst2_index = mad24(y, dst2_step, (x << 3) + dst2_offset); + int dst1_index = mad24(y, dst1_step, x + dst1_offset); + int dst2_index = mad24(y, dst2_step, x + dst2_offset); - double y = *((__global double *)((__global char *)src + src_index)); + double y = src[src_index]; - float ascale = CV_PI/180.0; - double alpha = angInDegree == 1 ? y * ascale : y; +#ifdef DEGREE + float ascale = CV_PI/180.0f; + float alpha = y * ascale; +#else + float alpha = y; +#endif double a = cos(alpha); double b = sin(alpha); - *((__global double *)((__global char *)dst1 + dst1_index)) = a; - *((__global double *)((__global char *)dst2 + dst2_index)) = b; + dst1[dst1_index] = a; + dst2[dst2_index] = b; } } #endif diff --git a/modules/ocl/src/opencl/arithm_pow.cl b/modules/ocl/src/opencl/arithm_pow.cl index 36a22b6281..bb0673d4a3 100644 --- a/modules/ocl/src/opencl/arithm_pow.cl +++ b/modules/ocl/src/opencl/arithm_pow.cl @@ -56,45 +56,21 @@ /************************************** pow **************************************/ -__kernel void arithm_pow_D5 (__global float *src1, int src1_step, int src1_offset, - __global float *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1, F p) +__kernel void arithm_pow(__global T * src, int src_step, int src_offset, + __global T * dst, int dst_step, int dst_offset, + int rows, int cols, F p) { - int x = get_global_id(0); int y = get_global_id(1); - if(x < cols && y < rows) + if (x < cols && y < rows) { - int src1_index = mad24(y, src1_step, (x << 2) + src1_offset); - int dst_index = mad24(y, dst_step, (x << 2) + dst_offset); - - float src1_data = *((__global float *)((__global char *)src1 + src1_index)); - float tmp = src1_data > 0 ? exp(p * log(src1_data)) : (src1_data == 0 ? 0 : exp(p * log(fabs(src1_data)))); - - *((__global float *)((__global char *)dst + dst_index)) = tmp; - } -} - -#if defined (DOUBLE_SUPPORT) - -__kernel void arithm_pow_D6 (__global double *src1, int src1_step, int src1_offset, - __global double *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1, F p) -{ + int src_index = mad24(y, src_step, x + src_offset); + int dst_index = mad24(y, dst_step, x + dst_offset); - int x = get_global_id(0); - int y = get_global_id(1); + T src_data = src[src_index]; + T tmp = src_data > 0 ? exp(p * log(src_data)) : (src_data == 0 ? 0 : exp(p * log(fabs(src_data)))); - if(x < cols && y < rows) - { - int src1_index = mad24(y, src1_step, (x << 3) + src1_offset); - int dst_index = mad24(y, dst_step, (x << 3) + dst_offset); - - double src1_data = *((__global double *)((__global char *)src1 + src1_index)); - double tmp = src1_data > 0 ? exp(p * log(src1_data)) : (src1_data == 0 ? 0 : exp(p * log(fabs(src1_data)))); - *((__global double *)((__global char *)dst + dst_index)) = tmp; + dst[dst_index] = tmp; } } - -#endif