From 28ac23c1a1e4f79ae68c6bbdecabbcdb57526c2a Mon Sep 17 00:00:00 2001 From: peng xiao Date: Thu, 26 Sep 2013 12:59:21 +0800 Subject: [PATCH 01/39] If device does not support double, returns immediately. Double floating points are not default supported by OpenCL. --- modules/ocl/test/test_matrix_operation.cpp | 30 ++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/modules/ocl/test/test_matrix_operation.cpp b/modules/ocl/test/test_matrix_operation.cpp index b70ee6ccd9..d1d24689b5 100644 --- a/modules/ocl/test/test_matrix_operation.cpp +++ b/modules/ocl/test/test_matrix_operation.cpp @@ -131,6 +131,11 @@ typedef ConvertToTestBase ConvertTo; TEST_P(ConvertTo, Accuracy) { + if((src_depth == CV_64F || dst_depth == CV_64F) && + !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::Context::CL_DOUBLE)) + { + return; // returns silently + } for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); @@ -222,6 +227,11 @@ typedef CopyToTestBase CopyTo; TEST_P(CopyTo, Without_mask) { + if((src.depth() == CV_64F) && + !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::Context::CL_DOUBLE)) + { + return; // returns silently + } for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); @@ -235,6 +245,11 @@ TEST_P(CopyTo, Without_mask) TEST_P(CopyTo, With_mask) { + if(src.depth() == CV_64F && + !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::Context::CL_DOUBLE)) + { + return; // returns silently + } for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); @@ -326,6 +341,11 @@ typedef SetToTestBase SetTo; TEST_P(SetTo, Without_mask) { + if(depth == CV_64F && + !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::Context::CL_DOUBLE)) + { + return; // returns silently + } for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); @@ -339,6 +359,11 @@ TEST_P(SetTo, Without_mask) TEST_P(SetTo, With_mask) { + if(depth == CV_64F && + !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::Context::CL_DOUBLE)) + { + return; // returns silently + } for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); @@ -404,6 +429,11 @@ PARAM_TEST_CASE(convertC3C4, MatType, bool) TEST_P(convertC3C4, Accuracy) { + if(depth == CV_64F && + !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::Context::CL_DOUBLE)) + { + return; // returns silently + } for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); From 9dca7555b4eb8cd6eaa40c756631de0001985aef Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Fri, 27 Sep 2013 15:53:47 +0400 Subject: [PATCH 02/39] fixed ocl::minMax --- modules/ocl/src/arithm.cpp | 137 +++++++--------- modules/ocl/src/opencl/arithm_minMax.cl | 202 +++++++++--------------- modules/ocl/test/test_arithm.cpp | 2 +- 3 files changed, 135 insertions(+), 206 deletions(-) diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index 0dd695bfa6..0962f9256c 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -68,7 +68,6 @@ namespace cv extern const char *arithm_sum; extern const char *arithm_sum_3; extern const char *arithm_minMax; - extern const char *arithm_minMax_mask; extern const char *arithm_minMaxLoc; extern const char *arithm_minMaxLoc_mask; extern const char *arithm_LUT; @@ -455,139 +454,121 @@ void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev) //////////////////////////////////// minMax ///////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -static void arithmetic_minMax_run(const oclMat &src, const oclMat &mask, cl_mem &dst, int vlen , int groupnum, string kernelName) +template +static void arithmetic_minMax_run(const oclMat &src, const oclMat & mask, cl_mem &dst, int groupnum, string kernelName) { - vector > args; - int all_cols = src.step / (vlen * src.elemSize1()); - int pre_cols = (src.offset % src.step) / (vlen * src.elemSize1()); - int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / (vlen * src.elemSize1()) - 1; + int all_cols = src.step / src.elemSize(); + int pre_cols = (src.offset % src.step) / src.elemSize(); + int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1; int invalid_cols = pre_cols + sec_cols; - int cols = all_cols - invalid_cols , elemnum = cols * src.rows;; - int offset = src.offset / (vlen * src.elemSize1()); - int repeat_s = src.offset / src.elemSize1() - offset * vlen; - int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.oclchannels(); - char build_options[50]; - sprintf(build_options, "-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d", src.depth(), repeat_s, repeat_e); + int cols = all_cols - invalid_cols , elemnum = cols * src.rows; + int offset = src.offset / src.elemSize(); + + const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; + const char * const channelMap[] = { " ", " ", "2", "4", "4" }; + + ostringstream stream; + stream << "-D T=" << typeMap[src.depth()] << channelMap[src.channels()]; + stream << " -D MAX_VAL=" << (WT)numeric_limits::max(); + stream << " -D MIN_VAL=" << (WT)numeric_limits::min(); + string buildOptions = stream.str(); + + vector > args; + args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst )); args.push_back( make_pair( sizeof(cl_int) , (void *)&cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&invalid_cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&offset)); args.push_back( make_pair( sizeof(cl_int) , (void *)&elemnum)); args.push_back( make_pair( sizeof(cl_int) , (void *)&groupnum)); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); + + int minvalid_cols = 0, moffset = 0; if (!mask.empty()) { - int mall_cols = mask.step / (vlen * mask.elemSize1()); - int mpre_cols = (mask.offset % mask.step) / (vlen * mask.elemSize1()); - int msec_cols = mall_cols - (mask.offset % mask.step + mask.cols * mask.elemSize() - 1) / (vlen * mask.elemSize1()) - 1; - int minvalid_cols = mpre_cols + msec_cols; - int moffset = mask.offset / (vlen * mask.elemSize1()); + int mall_cols = mask.step / mask.elemSize(); + int mpre_cols = (mask.offset % mask.step) / mask.elemSize(); + int msec_cols = mall_cols - (mask.offset % mask.step + mask.cols * mask.elemSize() - 1) / mask.elemSize() - 1; + minvalid_cols = mpre_cols + msec_cols; + moffset = mask.offset / mask.elemSize(); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&minvalid_cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&moffset )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data )); + + kernelName += "_mask"; } - args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst )); - size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1}; - openCLExecuteKernel(src.clCxt, &arithm_minMax, kernelName, gt, lt, args, -1, -1, build_options); -} + size_t globalThreads[3] = {groupnum * 256, 1, 1}; + size_t localThreads[3] = {256, 1, 1}; -static void arithmetic_minMax_mask_run(const oclMat &src, const oclMat &mask, cl_mem &dst, int vlen, int groupnum, string kernelName) -{ - vector > args; - size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1}; - char build_options[50]; - if (src.oclchannels() == 1) - { - int cols = (src.cols - 1) / vlen + 1; - int invalid_cols = src.step / (vlen * src.elemSize1()) - cols; - int offset = src.offset / src.elemSize1(); - int repeat_me = vlen - (mask.cols % vlen == 0 ? vlen : mask.cols % vlen); - int minvalid_cols = mask.step / (vlen * mask.elemSize1()) - cols; - int moffset = mask.offset / mask.elemSize1(); - int elemnum = cols * src.rows; - sprintf(build_options, "-D DEPTH_%d -D REPEAT_E%d", src.depth(), repeat_me); - args.push_back( make_pair( sizeof(cl_int) , (void *)&cols )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&invalid_cols )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&offset)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&elemnum)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&groupnum)); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&minvalid_cols )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&moffset )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst )); - openCLExecuteKernel(src.clCxt, &arithm_minMax_mask, kernelName, gt, lt, args, -1, -1, build_options); - } + openCLExecuteKernel(src.clCxt, &arithm_minMax, kernelName, globalThreads, localThreads, + args, -1, -1, buildOptions.c_str()); } -template void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, +template +void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf) { size_t groupnum = src.clCxt->computeUnits(); CV_Assert(groupnum != 0); - groupnum = groupnum * 2; - int vlen = 8; - int dbsize = groupnum * 2 * vlen * sizeof(T) ; + int dbsize = groupnum * 2 * src.elemSize(); ensureSizeIsEnough(1, dbsize, CV_8UC1, buf); cl_mem buf_data = reinterpret_cast(buf.data); - - if (mask.empty()) - { - arithmetic_minMax_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax"); - } - else - { - arithmetic_minMax_mask_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax_mask"); - } + arithmetic_minMax_run(src, mask, buf_data, groupnum, "arithm_op_minMax"); Mat matbuf = Mat(buf); T *p = matbuf.ptr(); if (minVal != NULL) { *minVal = std::numeric_limits::max(); - for (int i = 0; i < vlen * (int)groupnum; i++) - { + for (int i = 0, end = src.oclchannels() * (int)groupnum; i < end; i++) *minVal = *minVal < p[i] ? *minVal : p[i]; - } } if (maxVal != NULL) { *maxVal = -std::numeric_limits::max(); - for (int i = vlen * (int)groupnum; i < 2 * vlen * (int)groupnum; i++) - { + for (int i = src.oclchannels() * (int)groupnum, end = i << 1; i < end; i++) *maxVal = *maxVal > p[i] ? *maxVal : p[i]; - } } } -typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf); + void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask) { oclMat buf; minMax_buf(src, minVal, maxVal, mask, buf); } +typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf); + void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf) { - CV_Assert(src.oclchannels() == 1); + CV_Assert(src.channels() == 1); + CV_Assert(src.size() == mask.size() || mask.empty()); + CV_Assert(src.step % src.elemSize() == 0); + + if (minVal == NULL && maxVal == NULL) + return; + if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); } + static minMaxFunc functab[8] = { - arithmetic_minMax, - arithmetic_minMax, - arithmetic_minMax, - arithmetic_minMax, - arithmetic_minMax, - arithmetic_minMax, - arithmetic_minMax, + arithmetic_minMax, + arithmetic_minMax, + arithmetic_minMax, + arithmetic_minMax, + arithmetic_minMax, + arithmetic_minMax, + arithmetic_minMax, 0 }; + minMaxFunc func; func = functab[src.depth()]; func(src, minVal, maxVal, mask, buf); diff --git a/modules/ocl/src/opencl/arithm_minMax.cl b/modules/ocl/src/opencl/arithm_minMax.cl index 23b2933066..c5d3ec2abd 100644 --- a/modules/ocl/src/opencl/arithm_minMax.cl +++ b/modules/ocl/src/opencl/arithm_minMax.cl @@ -53,169 +53,117 @@ #endif #endif -#if defined (DEPTH_0) -#define VEC_TYPE uchar8 -#define CONVERT_TYPE convert_uchar8 -#define MIN_VAL 0 -#define MAX_VAL 255 -#endif -#if defined (DEPTH_1) -#define VEC_TYPE char8 -#define CONVERT_TYPE convert_char8 -#define MIN_VAL -128 -#define MAX_VAL 127 -#endif -#if defined (DEPTH_2) -#define VEC_TYPE ushort8 -#define CONVERT_TYPE convert_ushort8 -#define MIN_VAL 0 -#define MAX_VAL 65535 -#endif -#if defined (DEPTH_3) -#define VEC_TYPE short8 -#define CONVERT_TYPE convert_short8 -#define MIN_VAL -32768 -#define MAX_VAL 32767 -#endif -#if defined (DEPTH_4) -#define VEC_TYPE int8 -#define CONVERT_TYPE convert_int8 -#define MIN_VAL INT_MIN -#define MAX_VAL INT_MAX -#endif -#if defined (DEPTH_5) -#define VEC_TYPE float8 -#define CONVERT_TYPE convert_float8 -#define MIN_VAL (-FLT_MAX) -#define MAX_VAL FLT_MAX -#endif -#if defined (DEPTH_6) -#define VEC_TYPE double8 -#define CONVERT_TYPE convert_double8 -#define MIN_VAL (-DBL_MAX) -#define MAX_VAL DBL_MAX -#endif - -#if defined (REPEAT_S0) -#define repeat_s(a) a = a; -#endif -#if defined (REPEAT_S1) -#define repeat_s(a) a.s0 = a.s1; -#endif -#if defined (REPEAT_S2) -#define repeat_s(a) a.s0 = a.s2;a.s1 = a.s2; -#endif -#if defined (REPEAT_S3) -#define repeat_s(a) a.s0 = a.s3;a.s1 = a.s3;a.s2 = a.s3; -#endif -#if defined (REPEAT_S4) -#define repeat_s(a) a.s0 = a.s4;a.s1 = a.s4;a.s2 = a.s4;a.s3 = a.s4; -#endif -#if defined (REPEAT_S5) -#define repeat_s(a) a.s0 = a.s5;a.s1 = a.s5;a.s2 = a.s5;a.s3 = a.s5;a.s4 = a.s5; -#endif -#if defined (REPEAT_S6) -#define repeat_s(a) a.s0 = a.s6;a.s1 = a.s6;a.s2 = a.s6;a.s3 = a.s6;a.s4 = a.s6;a.s5 = a.s6; -#endif -#if defined (REPEAT_S7) -#define repeat_s(a) a.s0 = a.s7;a.s1 = a.s7;a.s2 = a.s7;a.s3 = a.s7;a.s4 = a.s7;a.s5 = a.s7;a.s6 = a.s7; -#endif - -#if defined (REPEAT_E0) -#define repeat_e(a) a = a; -#endif -#if defined (REPEAT_E1) -#define repeat_e(a) a.s7 = a.s6; -#endif -#if defined (REPEAT_E2) -#define repeat_e(a) a.s7 = a.s5;a.s6 = a.s5; -#endif -#if defined (REPEAT_E3) -#define repeat_e(a) a.s7 = a.s4;a.s6 = a.s4;a.s5 = a.s4; -#endif -#if defined (REPEAT_E4) -#define repeat_e(a) a.s7 = a.s3;a.s6 = a.s3;a.s5 = a.s3;a.s4 = a.s3; -#endif -#if defined (REPEAT_E5) -#define repeat_e(a) a.s7 = a.s2;a.s6 = a.s2;a.s5 = a.s2;a.s4 = a.s2;a.s3 = a.s2; -#endif -#if defined (REPEAT_E6) -#define repeat_e(a) a.s7 = a.s1;a.s6 = a.s1;a.s5 = a.s1;a.s4 = a.s1;a.s3 = a.s1;a.s2 = a.s1; -#endif -#if defined (REPEAT_E7) -#define repeat_e(a) a.s7 = a.s0;a.s6 = a.s0;a.s5 = a.s0;a.s4 = a.s0;a.s3 = a.s0;a.s2 = a.s0;a.s1 = a.s0; -#endif - #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable #pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable /**************************************Array minMax**************************************/ -__kernel void arithm_op_minMax (int cols,int invalid_cols,int offset,int elemnum,int groupnum, - __global VEC_TYPE *src, __global VEC_TYPE *dst) + +__kernel void arithm_op_minMax(__global const T * src, __global T * dst, + int cols, int invalid_cols, int offset, int elemnum, int groupnum) { unsigned int lid = get_local_id(0); unsigned int gid = get_group_id(0); - unsigned int id = get_global_id(0); + unsigned int id = get_global_id(0); + unsigned int idx = offset + id + (id / cols) * invalid_cols; - __local VEC_TYPE localmem_max[128],localmem_min[128]; - VEC_TYPE minval,maxval,temp; - if(id < elemnum) + + __local T localmem_max[128], localmem_min[128]; + T minval = (T)(MAX_VAL), maxval = (T)(MIN_VAL), temp; + + for (int grainSize = groupnum << 8; id < elemnum; id += grainSize) { + idx = offset + id + (id / cols) * invalid_cols; temp = src[idx]; - if(id % cols == 0 ) - { - repeat_s(temp); - } - if(id % cols == cols - 1) + minval = min(minval, temp); + maxval = max(maxval, temp); + } + + if(lid > 127) + { + localmem_min[lid - 128] = minval; + localmem_max[lid - 128] = maxval; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if(lid < 128) + { + localmem_min[lid] = min(minval, localmem_min[lid]); + localmem_max[lid] = max(maxval, localmem_max[lid]); + } + barrier(CLK_LOCAL_MEM_FENCE); + + for (int lsize = 64; lsize > 0; lsize >>= 1) + { + if (lid < lsize) { - repeat_e(temp); + int lid2 = lsize + lid; + localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]); + localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]); } - minval = temp; - maxval = temp; + barrier(CLK_LOCAL_MEM_FENCE); } - else + + if (lid == 0) { - minval = MAX_VAL; - maxval = MIN_VAL; + dst[gid] = localmem_min[0]; + dst[gid + groupnum] = localmem_max[0]; } - for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8)) +} + +__kernel void arithm_op_minMax_mask(__global const T * src, __global T * dst, + int cols, int invalid_cols, int offset, + int elemnum, int groupnum, + const __global uchar * mask, int minvalid_cols, int moffset) +{ + unsigned int lid = get_local_id(0); + unsigned int gid = get_group_id(0); + unsigned int id = get_global_id(0); + + unsigned int idx = offset + id + (id / cols) * invalid_cols; + unsigned int midx = moffset + id + (id / cols) * minvalid_cols; + + __local T localmem_max[128], localmem_min[128]; + T minval = (T)(MAX_VAL), maxval = (T)(MIN_VAL), temp; + + for (int grainSize = groupnum << 8; id < elemnum; id += grainSize) { idx = offset + id + (id / cols) * invalid_cols; - temp = src[idx]; - if(id % cols == 0 ) - { - repeat_s(temp); - } - if(id % cols == cols - 1) + midx = moffset + id + (id / cols) * minvalid_cols; + + if (mask[midx]) { - repeat_e(temp); + temp = src[idx]; + minval = min(minval, temp); + maxval = max(maxval, temp); } - minval = min(minval,temp); - maxval = max(maxval,temp); } + if(lid > 127) { localmem_min[lid - 128] = minval; localmem_max[lid - 128] = maxval; } barrier(CLK_LOCAL_MEM_FENCE); + if(lid < 128) { - localmem_min[lid] = min(minval,localmem_min[lid]); - localmem_max[lid] = max(maxval,localmem_max[lid]); + localmem_min[lid] = min(minval, localmem_min[lid]); + localmem_max[lid] = max(maxval, localmem_max[lid]); } barrier(CLK_LOCAL_MEM_FENCE); - for(int lsize = 64; lsize > 0; lsize >>= 1) + + for (int lsize = 64; lsize > 0; lsize >>= 1) { - if(lid < lsize) + if (lid < lsize) { int lid2 = lsize + lid; - localmem_min[lid] = min(localmem_min[lid] , localmem_min[lid2]); - localmem_max[lid] = max(localmem_max[lid] , localmem_max[lid2]); + localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]); + localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]); } barrier(CLK_LOCAL_MEM_FENCE); } - if( lid == 0) + + if (lid == 0) { dst[gid] = localmem_min[0]; dst[gid + groupnum] = localmem_max[0]; diff --git a/modules/ocl/test/test_arithm.cpp b/modules/ocl/test/test_arithm.cpp index 9b20dbf89c..acac38fea2 100644 --- a/modules/ocl/test/test_arithm.cpp +++ b/modules/ocl/test/test_arithm.cpp @@ -753,7 +753,7 @@ TEST_P(MinMax, MAT) } } -TEST_P(MinMax, DISABLED_MASK) +TEST_P(MinMax, MASK) { for (int j = 0; j < LOOP_TIMES; j++) { From b54228fb8332c8537738cd05be2a79cd334b54a2 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Fri, 27 Sep 2013 17:32:31 +0400 Subject: [PATCH 03/39] fixed ocl::countNonZero --- modules/ocl/src/arithm.cpp | 48 ++++--- modules/ocl/src/opencl/arithm_nonzero.cl | 174 +++++------------------ 2 files changed, 68 insertions(+), 154 deletions(-) diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index 0962f9256c..24420f477d 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -1209,21 +1209,22 @@ void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal, ///////////////////////////// countNonZero /////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -static void arithmetic_countNonZero_run(const oclMat &src, cl_mem &dst, int vlen , int groupnum, string kernelName) +static void arithmetic_countNonZero_run(const oclMat &src, cl_mem &dst, int groupnum, string kernelName) { - vector > args; - int all_cols = src.step / (vlen * src.elemSize1()); - int pre_cols = (src.offset % src.step) / (vlen * src.elemSize1()); - int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / (vlen * src.elemSize1()) - 1; + int ochannels = src.oclchannels(); + int all_cols = src.step / src.elemSize(); + int pre_cols = (src.offset % src.step) / src.elemSize(); + int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1; int invalid_cols = pre_cols + sec_cols; int cols = all_cols - invalid_cols , elemnum = cols * src.rows;; - int offset = src.offset / (vlen * src.elemSize1()); - int repeat_s = src.offset / src.elemSize1() - offset * vlen; - int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.oclchannels(); + int offset = src.offset / src.elemSize(); - char build_options[50]; - sprintf(build_options, "-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d", src.depth(), repeat_s, repeat_e); + const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; + const char * const channelMap[] = { " ", " ", "2", "4", "4" }; + string buildOptions = format("-D srcT=%s%s -D dstT=int%s", typeMap[src.depth()], channelMap[ochannels], + channelMap[ochannels]); + vector > args; args.push_back( make_pair( sizeof(cl_int) , (void *)&cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&invalid_cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&offset)); @@ -1231,33 +1232,44 @@ static void arithmetic_countNonZero_run(const oclMat &src, cl_mem &dst, int vlen args.push_back( make_pair( sizeof(cl_int) , (void *)&groupnum)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst )); - size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1}; - openCLExecuteKernel(src.clCxt, &arithm_nonzero, kernelName, gt, lt, args, -1, -1, build_options); + + size_t globalThreads[3] = { groupnum * 256, 1, 1 }; + size_t localThreads[3] = { 256, 1, 1 }; + + openCLExecuteKernel(src.clCxt, &arithm_nonzero, kernelName, globalThreads, localThreads, + args, -1, -1, buildOptions.c_str()); } int cv::ocl::countNonZero(const oclMat &src) { - size_t groupnum = src.clCxt->computeUnits(); + CV_Assert(src.step % src.elemSize() == 0); + CV_Assert(src.channels() == 1); + + Context *clCxt = src.clCxt; if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "selected device doesn't support double"); } + + size_t groupnum = src.clCxt->computeUnits(); CV_Assert(groupnum != 0); - int vlen = 8 , dbsize = groupnum * vlen; - Context *clCxt = src.clCxt; + int dbsize = groupnum; + string kernelName = "arithm_op_nonzero"; AutoBuffer _buf(dbsize); int *p = (int*)_buf, nonzero = 0; - cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(int)); - arithmetic_countNonZero_run(src, dstBuffer, vlen, groupnum, kernelName); - memset(p, 0, dbsize * sizeof(int)); + + cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(int)); + arithmetic_countNonZero_run(src, dstBuffer, groupnum, kernelName); openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize * sizeof(int)); + for (int i = 0; i < dbsize; i++) nonzero += p[i]; openCLSafeCall(clReleaseMemObject(dstBuffer)); + return nonzero; } diff --git a/modules/ocl/src/opencl/arithm_nonzero.cl b/modules/ocl/src/opencl/arithm_nonzero.cl index e34207de36..921367b3df 100644 --- a/modules/ocl/src/opencl/arithm_nonzero.cl +++ b/modules/ocl/src/opencl/arithm_nonzero.cl @@ -41,151 +41,53 @@ // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // -/// -/**************************************PUBLICFUNC*************************************/ #if defined (DOUBLE_SUPPORT) +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) #pragma OPENCL EXTENSION cl_khr_fp64:enable #endif - -#if defined (DEPTH_0) -#define VEC_TYPE uchar8 -#endif -#if defined (DEPTH_1) -#define VEC_TYPE char8 -#endif -#if defined (DEPTH_2) -#define VEC_TYPE ushort8 -#endif -#if defined (DEPTH_3) -#define VEC_TYPE short8 -#endif -#if defined (DEPTH_4) -#define VEC_TYPE int8 -#endif -#if defined (DEPTH_5) -#define VEC_TYPE float8 -#endif -#if defined (DEPTH_6) -#define VEC_TYPE double8 #endif -#if defined (REPEAT_S0) -#define repeat_s(a) a = a; -#endif -#if defined (REPEAT_S1) -#define repeat_s(a) a.s0 = 0; -#endif -#if defined (REPEAT_S2) -#define repeat_s(a) a.s0 = 0;a.s1 = 0; -#endif -#if defined (REPEAT_S3) -#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0; -#endif -#if defined (REPEAT_S4) -#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0; -#endif -#if defined (REPEAT_S5) -#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0; -#endif -#if defined (REPEAT_S6) -#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0; -#endif -#if defined (REPEAT_S7) -#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0;a.s6 = 0; -#endif +/**************************************Count NonZero**************************************/ -#if defined (REPEAT_E0) -#define repeat_e(a) a = a; -#endif -#if defined (REPEAT_E1) -#define repeat_e(a) a.s7 = 0; -#endif -#if defined (REPEAT_E2) -#define repeat_e(a) a.s7 = 0;a.s6 = 0; -#endif -#if defined (REPEAT_E3) -#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0; -#endif -#if defined (REPEAT_E4) -#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0; -#endif -#if defined (REPEAT_E5) -#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0; -#endif -#if defined (REPEAT_E6) -#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0; -#endif -#if defined (REPEAT_E7) -#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0;a.s1 = 0; -#endif +__kernel void arithm_op_nonzero(int cols, int invalid_cols, int offset, int elemnum, int groupnum, + __global srcT *src, __global dstT *dst) +{ + unsigned int lid = get_local_id(0); + unsigned int gid = get_group_id(0); + unsigned int id = get_global_id(0); -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable + unsigned int idx = offset + id + (id / cols) * invalid_cols; + __local dstT localmem_nonzero[128]; + dstT nonzero = (dstT)(0); + srcT zero = (srcT)(0), one = (srcT)(1); -/**************************************Count NonZero**************************************/ -__kernel void arithm_op_nonzero (int cols,int invalid_cols,int offset,int elemnum,int groupnum, - __global VEC_TYPE *src, __global int8 *dst) -{ - unsigned int lid = get_local_id(0); - unsigned int gid = get_group_id(0); - unsigned int id = get_global_id(0); - unsigned int idx = offset + id + (id / cols) * invalid_cols; - __local int8 localmem_nonzero[128]; - int8 nonzero; - VEC_TYPE zero=0,one=1,temp; - if(id < elemnum) - { - temp = src[idx]; - if(id % cols == 0 ) - { - repeat_s(temp); - } - if(id % cols == cols - 1) - { - repeat_e(temp); - } - nonzero = convert_int8(temp == zero ? zero:one); - } - else - { - nonzero = 0; - } - for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8)) - { - idx = offset + id + (id / cols) * invalid_cols; - temp = src[idx]; - if(id % cols == 0 ) - { - repeat_s(temp); - } - if(id % cols == cols - 1) - { - repeat_e(temp); - } - nonzero = nonzero + convert_int8(temp == zero ? zero:one); - } - if(lid > 127) - { - localmem_nonzero[lid - 128] = nonzero; - } - barrier(CLK_LOCAL_MEM_FENCE); - if(lid < 128) - { - localmem_nonzero[lid] = nonzero + localmem_nonzero[lid]; - } - barrier(CLK_LOCAL_MEM_FENCE); - for(int lsize = 64; lsize > 0; lsize >>= 1) - { - if(lid < lsize) - { + for (int grain = groupnum << 8; id < elemnum; id += grain) + { + idx = offset + id + (id / cols) * invalid_cols; + nonzero += src[idx] == zero ? zero : one; + } + + if (lid > 127) + localmem_nonzero[lid - 128] = nonzero; + barrier(CLK_LOCAL_MEM_FENCE); + + if (lid < 128) + localmem_nonzero[lid] = nonzero + localmem_nonzero[lid]; + barrier(CLK_LOCAL_MEM_FENCE); + + for (int lsize = 64; lsize > 0; lsize >>= 1) + { + if (lid < lsize) + { int lid2 = lsize + lid; localmem_nonzero[lid] = localmem_nonzero[lid] + localmem_nonzero[lid2]; - } - barrier(CLK_LOCAL_MEM_FENCE); - } - if( lid == 0) - { - dst[gid] = localmem_nonzero[0]; - } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + + if (lid == 0) + dst[gid] = localmem_nonzero[0]; } From 308fbeb04baa579dde1ca2ec917b0d2375d85f35 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Mon, 30 Sep 2013 12:44:59 +0400 Subject: [PATCH 04/39] Make libv4l support optional for testing purposes. --- CMakeLists.txt | 1 + cmake/OpenCVFindLibsVideo.cmake | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 59547653fc..1a1598d132 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -153,6 +153,7 @@ OCV_OPTION(WITH_CSTRIPES "Include C= support" OFF OCV_OPTION(WITH_TIFF "Include TIFF support" ON IF (NOT IOS) ) OCV_OPTION(WITH_UNICAP "Include Unicap support (GPL)" OFF IF (UNIX AND NOT APPLE AND NOT ANDROID) ) OCV_OPTION(WITH_V4L "Include Video 4 Linux support" ON IF (UNIX AND NOT ANDROID) ) +OCV_OPTION(WITH_LIBV4L "Use libv4l for Video 4 Linux support" ON IF (UNIX AND NOT ANDROID) ) OCV_OPTION(WITH_DSHOW "Build HighGUI with DirectShow support" ON IF (WIN32 AND NOT ARM) ) OCV_OPTION(WITH_MSMF "Build HighGUI with Media Foundation support" OFF IF WIN32 ) OCV_OPTION(WITH_XIMEA "Include XIMEA cameras support" OFF IF (NOT ANDROID AND NOT APPLE) ) diff --git a/cmake/OpenCVFindLibsVideo.cmake b/cmake/OpenCVFindLibsVideo.cmake index dcd5e87d85..00ed56ad31 100644 --- a/cmake/OpenCVFindLibsVideo.cmake +++ b/cmake/OpenCVFindLibsVideo.cmake @@ -125,7 +125,9 @@ endif(WITH_XINE) # --- V4L --- ocv_clear_vars(HAVE_LIBV4L HAVE_CAMV4L HAVE_CAMV4L2 HAVE_VIDEOIO) if(WITH_V4L) - CHECK_MODULE(libv4l1 HAVE_LIBV4L) + if(WITH_LIBV4L) + CHECK_MODULE(libv4l1 HAVE_LIBV4L) + endif() CHECK_INCLUDE_FILE(linux/videodev.h HAVE_CAMV4L) CHECK_INCLUDE_FILE(linux/videodev2.h HAVE_CAMV4L2) CHECK_INCLUDE_FILE(sys/videoio.h HAVE_VIDEOIO) From b864f48274378a91d1aaa35ba3468693b75d201f Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Fri, 27 Sep 2013 17:56:30 +0400 Subject: [PATCH 05/39] fixed ocl::sum, ocl::sqrSum, ocl::absSum --- modules/ocl/src/arithm.cpp | 106 +++++++---- modules/ocl/src/opencl/arithm_sum.cl | 159 +++------------- modules/ocl/src/opencl/arithm_sum_3.cl | 247 ------------------------- modules/ocl/test/test_arithm.cpp | 120 +++++++++++- 4 files changed, 212 insertions(+), 420 deletions(-) delete mode 100644 modules/ocl/src/opencl/arithm_sum_3.cl diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index 24420f477d..deb5163a4a 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -66,7 +66,6 @@ namespace cv extern const char *arithm_nonzero; extern const char *arithm_sum; - extern const char *arithm_sum_3; extern const char *arithm_minMax; extern const char *arithm_minMaxLoc; extern const char *arithm_minMaxLoc_mask; @@ -317,21 +316,28 @@ void cv::ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst , int ////////////////////////////////// sum ////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -//type = 0 sum,type = 1 absSum,type = 2 sqrSum -static void arithmetic_sum_buffer_run(const oclMat &src, cl_mem &dst, int vlen , int groupnum, int type = 0) +enum { SUM = 0, ABS_SUM, SQR_SUM }; + +static void arithmetic_sum_buffer_run(const oclMat &src, cl_mem &dst, int groupnum, int type, int ddepth) { - vector > args; - int all_cols = src.step / (vlen * src.elemSize1()); - int pre_cols = (src.offset % src.step) / (vlen * src.elemSize1()); - int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / (vlen * src.elemSize1()) - 1; + int ochannels = src.oclchannels(); + int all_cols = src.step / src.elemSize(); + int pre_cols = (src.offset % src.step) / src.elemSize(); + int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1; int invalid_cols = pre_cols + sec_cols; int cols = all_cols - invalid_cols , elemnum = cols * src.rows;; - int offset = src.offset / (vlen * src.elemSize1()); - int repeat_s = src.offset / src.elemSize1() - offset * vlen; - int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.oclchannels(); - char build_options[512]; - CV_Assert(type == 0 || type == 1 || type == 2); - sprintf(build_options, "-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d -D FUNC_TYPE_%d", src.depth(), repeat_s, repeat_e, type); + int offset = src.offset / src.elemSize(); + + const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; + const char * const funcMap[] = { "FUNC_SUM", "FUNC_ABS_SUM", "FUNC_SQR_SUM" }; + const char * const channelMap[] = { " ", " ", "2", "4", "4" }; + string buildOptions = format("-D srcT=%s%s -D dstT=%s%s -D convertToDstT=convert_%s%s -D %s", + typeMap[src.depth()], channelMap[ochannels], + typeMap[ddepth], channelMap[ochannels], + typeMap[ddepth], channelMap[ochannels], + funcMap[type]); + + vector > args; args.push_back( make_pair( sizeof(cl_int) , (void *)&cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&invalid_cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&offset)); @@ -339,55 +345,63 @@ static void arithmetic_sum_buffer_run(const oclMat &src, cl_mem &dst, int vlen , args.push_back( make_pair( sizeof(cl_int) , (void *)&groupnum)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst )); - size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1}; - if (src.oclchannels() != 3) - openCLExecuteKernel(src.clCxt, &arithm_sum, "arithm_op_sum", gt, lt, args, -1, -1, build_options); - else - openCLExecuteKernel(src.clCxt, &arithm_sum_3, "arithm_op_sum_3", gt, lt, args, -1, -1, build_options); + size_t globalThreads[3] = { groupnum * 256, 1, 1 }; + size_t localThreads[3] = { 256, 1, 1 }; + + openCLExecuteKernel(src.clCxt, &arithm_sum, "arithm_op_sum", globalThreads, localThreads, + args, -1, -1, buildOptions.c_str()); } template -Scalar arithmetic_sum(const oclMat &src, int type = 0) +Scalar arithmetic_sum(const oclMat &src, int type, int ddepth) { + CV_Assert(src.step % src.elemSize() == 0); + size_t groupnum = src.clCxt->computeUnits(); CV_Assert(groupnum != 0); - int vlen = src.oclchannels() == 3 ? 12 : 8, dbsize = groupnum * vlen; + + int dbsize = groupnum * src.oclchannels(); Context *clCxt = src.clCxt; AutoBuffer _buf(dbsize); T *p = (T*)_buf; - cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(T)); - Scalar s = Scalar::all(0.0); - arithmetic_sum_buffer_run(src, dstBuffer, vlen, groupnum, type); - memset(p, 0, dbsize * sizeof(T)); + + cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(T)); + arithmetic_sum_buffer_run(src, dstBuffer, groupnum, type, ddepth); openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize * sizeof(T)); + openCLFree(dstBuffer); + + Scalar s = Scalar::all(0.0); for (int i = 0; i < dbsize;) - { - for (int j = 0; j < src.oclchannels(); j++, i++) + for (int j = 0; j < src.oclchannels(); j++, i++) s.val[j] += p[i]; - } - openCLFree(dstBuffer); return s; } -typedef Scalar (*sumFunc)(const oclMat &src, int type); +typedef Scalar (*sumFunc)(const oclMat &src, int type, int ddepth); + Scalar cv::ocl::sum(const oclMat &src) { if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); } - static sumFunc functab[2] = + static sumFunc functab[3] = { + arithmetic_sum, arithmetic_sum, arithmetic_sum }; - sumFunc func; - func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)]; - return func(src, 0); + bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE); + int ddepth = std::max(src.depth(), CV_32S); + if (!hasDouble && ddepth == CV_64F) + ddepth = CV_32F; + + sumFunc func = functab[ddepth - CV_32S]; + return func(src, SUM, ddepth); } Scalar cv::ocl::absSum(const oclMat &src) @@ -396,15 +410,20 @@ Scalar cv::ocl::absSum(const oclMat &src) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); } - static sumFunc functab[2] = + static sumFunc functab[3] = { + arithmetic_sum, arithmetic_sum, arithmetic_sum }; - sumFunc func; - func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)]; - return func(src, 1); + bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE); + int ddepth = std::max(src.depth(), CV_32S); + if (!hasDouble && ddepth == CV_64F) + ddepth = CV_32F; + + sumFunc func = functab[ddepth - CV_32S]; + return func(src, ABS_SUM, ddepth); } Scalar cv::ocl::sqrSum(const oclMat &src) @@ -413,15 +432,20 @@ Scalar cv::ocl::sqrSum(const oclMat &src) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); } - static sumFunc functab[2] = + static sumFunc functab[3] = { + arithmetic_sum, arithmetic_sum, arithmetic_sum }; - sumFunc func; - func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)]; - return func(src, 2); + bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE); + int ddepth = std::max(src.depth(), CV_32S); + if (!hasDouble && ddepth == CV_64F) + ddepth = CV_32F; + + sumFunc func = functab[ddepth - CV_32S]; + return func(src, SQR_SUM, ddepth); } ////////////////////////////////////////////////////////////////////////////// diff --git a/modules/ocl/src/opencl/arithm_sum.cl b/modules/ocl/src/opencl/arithm_sum.cl index 280b0a5111..4011f03bea 100644 --- a/modules/ocl/src/opencl/arithm_sum.cl +++ b/modules/ocl/src/opencl/arithm_sum.cl @@ -43,163 +43,62 @@ // //M*/ -/**************************************PUBLICFUNC*************************************/ #if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable -#define RES_TYPE double8 -#define CONVERT_RES_TYPE convert_double8 -#else -#define RES_TYPE float8 -#define CONVERT_RES_TYPE convert_float8 +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable #endif - -#if defined (DEPTH_0) -#define VEC_TYPE uchar8 -#endif -#if defined (DEPTH_1) -#define VEC_TYPE char8 -#endif -#if defined (DEPTH_2) -#define VEC_TYPE ushort8 -#endif -#if defined (DEPTH_3) -#define VEC_TYPE short8 -#endif -#if defined (DEPTH_4) -#define VEC_TYPE int8 -#endif -#if defined (DEPTH_5) -#define VEC_TYPE float8 -#endif -#if defined (DEPTH_6) -#define VEC_TYPE double8 -#endif - -#if defined (FUNC_TYPE_0) -#define FUNC(a,b) b += a; -#endif -#if defined (FUNC_TYPE_1) -#define FUNC(a,b) b = b + (a >= 0 ? a : -a); -#endif -#if defined (FUNC_TYPE_2) -#define FUNC(a,b) b = b + a * a; -#endif - -#if defined (REPEAT_S0) -#define repeat_s(a) a = a; -#endif -#if defined (REPEAT_S1) -#define repeat_s(a) a.s0 = 0; -#endif -#if defined (REPEAT_S2) -#define repeat_s(a) a.s0 = 0;a.s1 = 0; -#endif -#if defined (REPEAT_S3) -#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0; -#endif -#if defined (REPEAT_S4) -#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0; -#endif -#if defined (REPEAT_S5) -#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0; -#endif -#if defined (REPEAT_S6) -#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0; -#endif -#if defined (REPEAT_S7) -#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0;a.s6 = 0; #endif -#if defined (REPEAT_E0) -#define repeat_e(a) a = a; +#if defined (FUNC_SUM) +#define FUNC(a, b) b += a; #endif -#if defined (REPEAT_E1) -#define repeat_e(a) a.s7 = 0; +#if defined (FUNC_ABS_SUM) +#define FUNC(a, b) b += a >= 0 ? a : -a; #endif -#if defined (REPEAT_E2) -#define repeat_e(a) a.s7 = 0;a.s6 = 0; +#if defined (FUNC_SQR_SUM) +#define FUNC(a, b) b += a * a; #endif -#if defined (REPEAT_E3) -#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0; -#endif -#if defined (REPEAT_E4) -#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0; -#endif -#if defined (REPEAT_E5) -#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0; -#endif -#if defined (REPEAT_E6) -#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0; -#endif -#if defined (REPEAT_E7) -#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0;a.s1 = 0; -#endif - -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable /**************************************Array buffer SUM**************************************/ -__kernel void arithm_op_sum (int cols,int invalid_cols,int offset,int elemnum,int groupnum, - __global VEC_TYPE *src, __global RES_TYPE *dst) + +__kernel void arithm_op_sum(int cols,int invalid_cols,int offset,int elemnum,int groupnum, + __global srcT *src, __global dstT *dst) { unsigned int lid = get_local_id(0); unsigned int gid = get_group_id(0); - unsigned int id = get_global_id(0); + unsigned int id = get_global_id(0); unsigned int idx = offset + id + (id / cols) * invalid_cols; - __local RES_TYPE localmem_sum[128]; - RES_TYPE sum = 0,temp; - if(id < elemnum) - { - temp = CONVERT_RES_TYPE(src[idx]); - if(id % cols == 0 ) - { - repeat_s(temp); - } - if(id % cols == cols - 1) - { - repeat_e(temp); - } - FUNC(temp,sum); - } - else - { - sum = 0; - } - for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8)) + + __local dstT localmem_sum[128]; + dstT sum = (dstT)(0), temp; + + for (int grainSize = groupnum << 8; id < elemnum; id += grainSize) { idx = offset + id + (id / cols) * invalid_cols; - temp = CONVERT_RES_TYPE(src[idx]); - if(id % cols == 0 ) - { - repeat_s(temp); - } - if(id % cols == cols - 1) - { - repeat_e(temp); - } - FUNC(temp,sum); + temp = convertToDstT(src[idx]); + FUNC(temp, sum); } - if(lid > 127) - { + + if (lid > 127) localmem_sum[lid - 128] = sum; - } barrier(CLK_LOCAL_MEM_FENCE); - if(lid < 128) - { + + if (lid < 128) localmem_sum[lid] = sum + localmem_sum[lid]; - } barrier(CLK_LOCAL_MEM_FENCE); - for(int lsize = 64; lsize > 0; lsize >>= 1) + + for (int lsize = 64; lsize > 0; lsize >>= 1) { - if(lid < lsize) + if (lid < lsize) { int lid2 = lsize + lid; localmem_sum[lid] = localmem_sum[lid] + localmem_sum[lid2]; } barrier(CLK_LOCAL_MEM_FENCE); } - if( lid == 0) - { + + if (lid == 0) dst[gid] = localmem_sum[0]; - } } diff --git a/modules/ocl/src/opencl/arithm_sum_3.cl b/modules/ocl/src/opencl/arithm_sum_3.cl deleted file mode 100644 index 3f6ed08803..0000000000 --- a/modules/ocl/src/opencl/arithm_sum_3.cl +++ /dev/null @@ -1,247 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// @Authors -// Shengen Yan,yanshengen@gmail.com -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other oclMaterials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors as is and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -/**************************************PUBLICFUNC*************************************/ -#if defined (DOUBLE_SUPPORT) -#pragma OPENCL EXTENSION cl_khr_fp64:enable -#define RES_TYPE double4 -#define CONVERT_RES_TYPE convert_double4 -#else -#define RES_TYPE float4 -#define CONVERT_RES_TYPE convert_float4 -#endif - -#if defined (DEPTH_0) -#define VEC_TYPE uchar4 -#endif -#if defined (DEPTH_1) -#define VEC_TYPE char4 -#endif -#if defined (DEPTH_2) -#define VEC_TYPE ushort4 -#endif -#if defined (DEPTH_3) -#define VEC_TYPE short4 -#endif -#if defined (DEPTH_4) -#define VEC_TYPE int4 -#endif -#if defined (DEPTH_5) -#define VEC_TYPE float4 -#endif -#if defined (DEPTH_6) -#define VEC_TYPE double4 -#endif - -#if defined (FUNC_TYPE_0) -#define FUNC(a,b) b += a; -#endif -#if defined (FUNC_TYPE_1) -#define FUNC(a,b) b = b + (a >= 0 ? a : -a); -#endif -#if defined (FUNC_TYPE_2) -#define FUNC(a,b) b = b + a * a; -#endif - -#if defined (REPEAT_S0) -#define repeat_s(a,b,c) a=a; b =b; c=c; -#endif -#if defined (REPEAT_S1) -#define repeat_s(a,b,c) a.s0=0; b=b; c=c; -#endif -#if defined (REPEAT_S2) -#define repeat_s(a,b,c) a.s0=0; a.s1=0; b=b; c=c; -#endif -#if defined (REPEAT_S3) -#define repeat_s(a,b,c) a.s0=0; a.s1=0; a.s2=0; b=b; c=c; -#endif -#if defined (REPEAT_S4) -#define repeat_s(a,b,c) a=0;b=b; c=c; -#endif -#if defined (REPEAT_S5) -#define repeat_s(a,b,c) a=0; b.s0=0;c=c; -#endif -#if defined (REPEAT_S6) -#define repeat_s(a,b,c) a=0; b.s0=0; b.s1=0; c=c; -#endif -#if defined (REPEAT_S7) -#define repeat_s(a,b,c) a=0; b.s0=0; b.s1=0; b.s2=0; c=c; -#endif -#if defined (REPEAT_S8) -#define repeat_s(a,b,c) a=0; b=0; c=c; -#endif -#if defined (REPEAT_S9) -#define repeat_s(a,b,c) a=0; b=0; c.s0=0; -#endif -#if defined (REPEAT_S10) -#define repeat_s(a,b,c) a=0; b=0; c.s0=0; c.s1=0; -#endif -#if defined (REPEAT_S11) -#define repeat_s(a,b,c) a=0; b=0; c.s0=0; c.s1=0; c.s2=0; -#endif - -#if defined (REPEAT_E0) -#define repeat_e(a,b,c) a=a; b =b; c=c; -#endif -#if defined (REPEAT_E1) -#define repeat_e(a,b,c) a=a; b=b; c.s3=0; -#endif -#if defined (REPEAT_E2) -#define repeat_e(a,b,c) a=a; b=b; c.s3=0; c.s2=0; -#endif -#if defined (REPEAT_E3) -#define repeat_e(a,b,c) a=a; b=b; c.s3=0; c.s2=0; c.s1=0; -#endif -#if defined (REPEAT_E4) -#define repeat_e(a,b,c) a=a; b=b; c=0; -#endif -#if defined (REPEAT_E5) -#define repeat_e(a,b,c) a=a; b.s3=0; c=0; -#endif -#if defined (REPEAT_E6) -#define repeat_e(a,b,c) a=a; b.s3=0; b.s2=0; c=0; -#endif -#if defined (REPEAT_E7) -#define repeat_e(a,b,c) a=a; b.s3=0; b.s2=0; b.s1=0; c=0; -#endif -#if defined (REPEAT_E8) -#define repeat_e(a,b,c) a=a; b=0; c=0; -#endif -#if defined (REPEAT_E9) -#define repeat_e(a,b,c) a.s3=0; b=0; c=0; -#endif -#if defined (REPEAT_E10) -#define repeat_e(a,b,c) a.s3=0; a.s2=0; b=0; c=0; -#endif -#if defined (REPEAT_E11) -#define repeat_e(a,b,c) a.s3=0; a.s2=0; a.s1=0; b=0; c=0; -#endif - -__kernel void arithm_op_sum_3 (int cols,int invalid_cols,int offset,int elemnum,int groupnum, - __global VEC_TYPE *src, __global RES_TYPE *dst) -{ - unsigned int lid = get_local_id(0); - unsigned int gid = get_group_id(0); - unsigned int id = get_global_id(0); - unsigned int idx = offset + id + (id / cols) * invalid_cols; - idx = idx * 3; - __local RES_TYPE localmem_sum1[128]; - __local RES_TYPE localmem_sum2[128]; - __local RES_TYPE localmem_sum3[128]; - RES_TYPE sum1 = 0,sum2 = 0,sum3 = 0,temp1,temp2,temp3; - if(id < elemnum) - { - temp1 = CONVERT_RES_TYPE(src[idx]); - temp2 = CONVERT_RES_TYPE(src[idx+1]); - temp3 = CONVERT_RES_TYPE(src[idx+2]); - if(id % cols == 0 ) - { - repeat_s(temp1,temp2,temp3); - } - if(id % cols == cols - 1) - { - repeat_e(temp1,temp2,temp3); - } - FUNC(temp1,sum1); - FUNC(temp2,sum2); - FUNC(temp3,sum3); - } - else - { - sum1 = 0; - sum2 = 0; - sum3 = 0; - } - for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8)) - { - idx = offset + id + (id / cols) * invalid_cols; - idx = idx * 3; - temp1 = CONVERT_RES_TYPE(src[idx]); - temp2 = CONVERT_RES_TYPE(src[idx+1]); - temp3 = CONVERT_RES_TYPE(src[idx+2]); - if(id % cols == 0 ) - { - repeat_s(temp1,temp2,temp3); - } - if(id % cols == cols - 1) - { - repeat_e(temp1,temp2,temp3); - } - FUNC(temp1,sum1); - FUNC(temp2,sum2); - FUNC(temp3,sum3); - } - if(lid > 127) - { - localmem_sum1[lid - 128] = sum1; - localmem_sum2[lid - 128] = sum2; - localmem_sum3[lid - 128] = sum3; - } - barrier(CLK_LOCAL_MEM_FENCE); - if(lid < 128) - { - localmem_sum1[lid] = sum1 + localmem_sum1[lid]; - localmem_sum2[lid] = sum2 + localmem_sum2[lid]; - localmem_sum3[lid] = sum3 + localmem_sum3[lid]; - } - barrier(CLK_LOCAL_MEM_FENCE); - for(int lsize = 64; lsize > 0; lsize >>= 1) - { - if(lid < lsize) - { - int lid2 = lsize + lid; - localmem_sum1[lid] = localmem_sum1[lid] + localmem_sum1[lid2]; - localmem_sum2[lid] = localmem_sum2[lid] + localmem_sum2[lid2]; - localmem_sum3[lid] = localmem_sum3[lid] + localmem_sum3[lid2]; - } - barrier(CLK_LOCAL_MEM_FENCE); - } - if( lid == 0) - { - dst[gid*3] = localmem_sum1[0]; - dst[gid*3+1] = localmem_sum2[0]; - dst[gid*3+2] = localmem_sum3[0]; - } -} diff --git a/modules/ocl/test/test_arithm.cpp b/modules/ocl/test/test_arithm.cpp index acac38fea2..1505419404 100644 --- a/modules/ocl/test/test_arithm.cpp +++ b/modules/ocl/test/test_arithm.cpp @@ -1022,7 +1022,7 @@ TEST_P(MinMaxLoc, MASK) typedef ArithmTestBase Sum; -TEST_P(Sum, DISABLED_MAT) +TEST_P(Sum, MAT) { for (int j = 0; j < LOOP_TIMES; j++) { @@ -1031,7 +1031,121 @@ TEST_P(Sum, DISABLED_MAT) Scalar cpures = cv::sum(src1_roi); Scalar gpures = cv::ocl::sum(gsrc1); - //check results + // check results + EXPECT_NEAR(cpures[0], gpures[0], 0.1); + EXPECT_NEAR(cpures[1], gpures[1], 0.1); + EXPECT_NEAR(cpures[2], gpures[2], 0.1); + EXPECT_NEAR(cpures[3], gpures[3], 0.1); + } +} + +typedef ArithmTestBase SqrSum; + +template +static Scalar sqrSum(const Mat & src) +{ + Scalar sum = Scalar::all(0); + int cn = src.channels(); + WT data[4] = { 0, 0, 0, 0 }; + + int cols = src.cols * cn; + for (int y = 0; y < src.rows; ++y) + { + const T * const sdata = src.ptr(y); + for (int x = 0; x < cols; ) + for (int i = 0; i < cn; ++i, ++x) + { + WT t = static_cast(sdata[x]); + data[i] += t * t; + } + } + + for (int i = 0; i < cn; ++i) + sum[i] = static_cast(data[i]); + + return sum; +} + +typedef Scalar (*sumFunc)(const Mat &); + +TEST_P(SqrSum, MAT) +{ + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + static sumFunc funcs[] = { sqrSum, + sqrSum, + sqrSum, + sqrSum, + sqrSum, + sqrSum, + sqrSum, + 0 }; + + sumFunc func = funcs[src1_roi.depth()]; + CV_Assert(func != 0); + + Scalar cpures = func(src1_roi); + Scalar gpures = cv::ocl::sqrSum(gsrc1); + + // check results + EXPECT_NEAR(cpures[0], gpures[0], 1.0); + EXPECT_NEAR(cpures[1], gpures[1], 1.0); + EXPECT_NEAR(cpures[2], gpures[2], 1.0); + EXPECT_NEAR(cpures[3], gpures[3], 1.0); + } +} + +typedef ArithmTestBase AbsSum; + +template +static Scalar absSum(const Mat & src) +{ + Scalar sum = Scalar::all(0); + int cn = src.channels(); + WT data[4] = { 0, 0, 0, 0 }; + + int cols = src.cols * cn; + for (int y = 0; y < src.rows; ++y) + { + const T * const sdata = src.ptr(y); + for (int x = 0; x < cols; ) + for (int i = 0; i < cn; ++i, ++x) + { + WT t = static_cast(sdata[x]); + data[i] += t >= 0 ? t : -t; + } + } + + for (int i = 0; i < cn; ++i) + sum[i] = static_cast(data[i]); + + return sum; +} + +TEST_P(AbsSum, MAT) +{ + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + static sumFunc funcs[] = { absSum, + absSum, + absSum, + absSum, + absSum, + absSum, + absSum, + 0 }; + + sumFunc func = funcs[src1_roi.depth()]; + CV_Assert(func != 0); + + Scalar cpures = func(src1_roi); + Scalar gpures = cv::ocl::absSum(gsrc1); + + // check results EXPECT_NEAR(cpures[0], gpures[0], 0.1); EXPECT_NEAR(cpures[1], gpures[1], 0.1); EXPECT_NEAR(cpures[2], gpures[2], 0.1); @@ -1319,6 +1433,8 @@ INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(testing::Range(CV_8U, CV_USRTYPE1) INSTANTIATE_TEST_CASE_P(Arithm, MinMax, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, MinMaxLoc, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool())); // + INSTANTIATE_TEST_CASE_P(Arithm, Sum, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, SqrSum, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, AbsSum, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool())); // + INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // + INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + From 7edcefb2be9f0372cd2f3a004fd6cd3c975cca41 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 30 Sep 2013 15:08:26 +0400 Subject: [PATCH 06/39] fixed ocl::phase --- modules/ocl/src/arithm.cpp | 50 ++++------ modules/ocl/src/opencl/arithm_phase.cl | 129 ++++++++++++++----------- modules/ocl/test/test_arithm.cpp | 30 +++--- 3 files changed, 111 insertions(+), 98 deletions(-) diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index deb5163a4a..6467040f1c 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -461,8 +461,8 @@ void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev) m2(sz, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(0)); oclMat dst1(m1), dst2(m2); - //arithmetic_sum_run(src, dst1,"arithm_op_sum"); - //arithmetic_sum_run(src, dst2,"arithm_op_squares_sum"); +// arithmetic_sum_run(src, dst1, "arithm_op_sum"); +// arithmetic_sum_run(src, dst2, "arithm_op_squares_sum"); m1 = (Mat)dst1; m2 = (Mat)dst2; @@ -558,7 +558,6 @@ void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, } } - void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask) { oclMat buf; @@ -928,47 +927,38 @@ static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat return; } - CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols && src1.rows == src2.rows && src2.rows == dst.rows); - CV_Assert(src1.type() == src2.type() && src1.type() == dst.type()); - Context *clCxt = src1.clCxt; - int channels = dst.oclchannels(); - int depth = dst.depth(); - - size_t vector_length = 1; - int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1); - int cols = divUp(dst.cols * channels + offset_cols, vector_length); + int depth = dst.depth(), cols1 = src1.cols * src1.oclchannels(); + int src1step1 = src1.step / src1.elemSize1(), src1offset1 = src1.offset / src1.elemSize1(); + int src2step1 = src2.step / src2.elemSize1(), src2offset1 = src2.offset / src2.elemSize1(); + int dststep1 = dst.step / dst.elemSize1(), dstoffset1 = dst.offset / dst.elemSize1(); size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { cols, dst.rows, 1 }; + size_t globalThreads[3] = { cols1, dst.rows, 1 }; - int dst_step1 = dst.cols * dst.elemSize(); vector > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1step1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1offset1 )); args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src2.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src2step1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src2offset1 )); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dststep1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dstoffset1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&cols1 )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 )); openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth); } -void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle , bool angleInDegrees) +void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle, bool angleInDegrees) { CV_Assert(x.type() == y.type() && x.size() == y.size() && (x.depth() == CV_32F || x.depth() == CV_64F)); + CV_Assert(x.step % x.elemSize() == 0 && y.step % y.elemSize() == 0); + Angle.create(x.size(), x.type()); - string kernelName = angleInDegrees ? "arithm_phase_indegrees" : "arithm_phase_inradians"; - if (angleInDegrees) - arithmetic_phase_run(x, y, Angle, kernelName, &arithm_phase); - else - arithmetic_phase_run(x, y, Angle, kernelName, &arithm_phase); + arithmetic_phase_run(x, y, Angle, angleInDegrees ? "arithm_phase_indegrees" : "arithm_phase_inradians", &arithm_phase); } ////////////////////////////////////////////////////////////////////////////// @@ -1539,8 +1529,8 @@ oclMatExpr::operator oclMat() const /////////////////////////////// transpose //////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -#define TILE_DIM (32) -#define BLOCK_ROWS (256/TILE_DIM) +#define TILE_DIM (32) +#define BLOCK_ROWS (256 / TILE_DIM) static void transpose_run(const oclMat &src, oclMat &dst, string kernelName, bool inplace = false) { diff --git a/modules/ocl/src/opencl/arithm_phase.cl b/modules/ocl/src/opencl/arithm_phase.cl index 9dda5e957a..a30eba4310 100644 --- a/modules/ocl/src/opencl/arithm_phase.cl +++ b/modules/ocl/src/opencl/arithm_phase.cl @@ -45,110 +45,125 @@ // #if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable #endif +#endif + #define CV_PI 3.1415926535898 +#define CV_2PI 2*3.1415926535898 + /**************************************phase inradians**************************************/ -__kernel void arithm_phase_inradians_D5 (__global float *src1, int src1_step, int src1_offset, - __global float *src2, int src2_step, int src2_offset, - __global float *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ +__kernel void arithm_phase_inradians_D5(__global float *src1, int src1_step1, int src1_offset1, + __global float *src2, int src2_step1, int src2_offset1, + __global float *dst, int dst_step1, int dst_offset1, + int cols, int rows) +{ int x = get_global_id(0); int y = get_global_id(1); - if(x < cols && y < rows) + if (x < cols && y < rows) { - int src1_index = mad24(y, src1_step, (x << 2) + src1_offset); - int src2_index = mad24(y, src2_step, (x << 2) + src2_offset); - int dst_index = mad24(y, dst_step, (x << 2) + dst_offset); + int src1_index = mad24(y, src1_step1, x + src1_offset1); + int src2_index = mad24(y, src2_step1, x + src2_offset1); + int dst_index = mad24(y, dst_step1, x + dst_offset1); - float data1 = *((__global float *)((__global char *)src1 + src1_index)); - float data2 = *((__global float *)((__global char *)src2 + src2_index)); - float tmp = atan2(data2,data1); + float data1 = src1[src1_index]; + float data2 = src2[src2_index]; + float tmp = atan2(data2, data1); - *((__global float *)((__global char *)dst + dst_index)) = tmp; - } + if (tmp < 0) + tmp += CV_2PI; + dst[dst_index] = tmp; + } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_phase_inradians_D6 (__global double *src1, int src1_step, int src1_offset, - __global double *src2, int src2_step, int src2_offset, - __global double *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_phase_inradians_D6(__global double *src1, int src1_step1, int src1_offset1, + __global double *src2, int src2_step1, int src2_offset1, + __global double *dst, int dst_step1, int dst_offset1, + int cols, int rows) { - int x = get_global_id(0); int y = get_global_id(1); - if(x < cols && y < rows) + if (x < cols && y < rows) { - int src1_index = mad24(y, src1_step, (x << 3) + src1_offset); - int src2_index = mad24(y, src2_step, (x << 3) + src2_offset); - int dst_index = mad24(y, dst_step, (x << 3) + dst_offset); + int src1_index = mad24(y, src1_step1, x + src1_offset1); + int src2_index = mad24(y, src2_step1, x + src2_offset1); + int dst_index = mad24(y, dst_step1, x + dst_offset1); - double data1 = *((__global double *)((__global char *)src1 + src1_index)); - double data2 = *((__global double *)((__global char *)src2 + src2_index)); + double data1 = src1[src1_index]; + double data2 = src2[src2_index]; + double tmp = atan2(data2, data1); - *((__global double *)((__global char *)dst + dst_index)) = atan2(data2,data1); - } + if (tmp < 0) + tmp += CV_2PI; + dst[dst_index] = tmp; + } } + #endif /**************************************phase indegrees**************************************/ -__kernel void arithm_phase_indegrees_D5 (__global float *src1, int src1_step, int src1_offset, - __global float *src2, int src2_step, int src2_offset, - __global float *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ +__kernel void arithm_phase_indegrees_D5(__global float *src1, int src1_step1, int src1_offset1, + __global float *src2, int src2_step1, int src2_offset1, + __global float *dst, int dst_step1, int dst_offset1, + int cols, int rows) +{ int x = get_global_id(0); int y = get_global_id(1); - if(x < cols && y < rows) + if (x < cols && y < rows) { - int src1_index = mad24(y, src1_step, (x << 2) + src1_offset); - int src2_index = mad24(y, src2_step, (x << 2) + src2_offset); - int dst_index = mad24(y, dst_step, (x << 2) + dst_offset); + int src1_index = mad24(y, src1_step1, x + src1_offset1); + int src2_index = mad24(y, src2_step1, x + src2_offset1); + int dst_index = mad24(y, dst_step1, x + dst_offset1); - float data1 = *((__global float *)((__global char *)src1 + src1_index)); - float data2 = *((__global float *)((__global char *)src2 + src2_index)); - float tmp = atan2(data2,data1); - float tmp_data = 180*tmp/CV_PI; + float data1 = src1[src1_index]; + float data2 = src2[src2_index]; + float tmp = atan2(data2, data1); + tmp = 180 * tmp / CV_PI; - *((__global float *)((__global char *)dst + dst_index)) = tmp_data; - } + if (tmp < 0) + tmp += 360; + dst[dst_index] = tmp; + } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step, int src1_offset, - __global double *src2, int src2_step, int src2_offset, - __global double *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step1, int src1_offset1, + __global double *src2, int src2_step1, int src2_offset1, + __global double *dst, int dst_step1, int dst_offset1, + int cols, int rows) { - int x = get_global_id(0); int y = get_global_id(1); - if(x < cols && y < rows) + if (x < cols && y < rows) { - int src1_index = mad24(y, src1_step, (x << 3) + src1_offset); - int src2_index = mad24(y, src2_step, (x << 3) + src2_offset); - int dst_index = mad24(y, dst_step, (x << 3) + dst_offset); + int src1_index = mad24(y, src1_step1, x + src1_offset1); + int src2_index = mad24(y, src2_step1, x + src2_offset1); + int dst_index = mad24(y, dst_step1, x + dst_offset1); - double data1 = *((__global double *)((__global char *)src1 + src1_index)); - double data2 = *((__global double *)((__global char *)src2 + src2_index)); - double tmp = atan2(data2,data1); - double tmp_data = 180*tmp/CV_PI; + double data1 = src1[src1_index]; + double data2 = src2[src2_index]; + double tmp = atan2(src2[src2_index], src1[src1_index]); - *((__global double *)((__global char *)dst + dst_index)) = tmp_data; - } + tmp = 180 * tmp / CV_PI; + if (tmp < 0) + tmp += 360; + dst[dst_index] = tmp; + } } #endif diff --git a/modules/ocl/test/test_arithm.cpp b/modules/ocl/test/test_arithm.cpp index 1505419404..2438148033 100644 --- a/modules/ocl/test/test_arithm.cpp +++ b/modules/ocl/test/test_arithm.cpp @@ -464,7 +464,6 @@ TEST_P(Mul, Scalar) } } - TEST_P(Mul, Mat_Scalar) { for (int j = 0; j < LOOP_TIMES; j++) @@ -507,7 +506,6 @@ TEST_P(Div, Scalar) } } - TEST_P(Div, Mat_Scalar) { for (int j = 0; j < LOOP_TIMES; j++) @@ -1173,17 +1171,27 @@ TEST_P(CountNonZero, MAT) typedef ArithmTestBase Phase; -TEST_P(Phase, DISABLED_Mat) +TEST_P(Phase, angleInDegrees) { - for (int angelInDegrees = 0; angelInDegrees < 2; angelInDegrees++) + for (int j = 0; j < LOOP_TIMES; j++) { - for (int j = 0; j < LOOP_TIMES; j++) - { - random_roi(); - cv::phase(src1_roi, src2_roi, dst1_roi, angelInDegrees ? true : false); - cv::ocl::phase(gsrc1, gsrc2, gdst1, angelInDegrees ? true : false); - Near(1e-2); - } + random_roi(); + cv::phase(src1_roi, src2_roi, dst1_roi, true); + cv::ocl::phase(gsrc1, gsrc2, gdst1, true); + + Near(1e-2); + } +} + +TEST_P(Phase, angleInRadians) +{ + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + cv::phase(src1_roi, src2_roi, dst1_roi); + cv::ocl::phase(gsrc1, gsrc2, gdst1); + + Near(1e-2); } } From 7379152afbc21f616d7a53cf34fe92607861a940 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 30 Sep 2013 16:54:09 +0400 Subject: [PATCH 07/39] fixed ocl::setIdentity --- modules/ocl/include/opencv2/ocl/ocl.hpp | 3 +- modules/ocl/src/arithm.cpp | 70 ++++++-------------- modules/ocl/src/opencl/arithm_setidentity.cl | 47 +++---------- modules/ocl/test/test_arithm.cpp | 60 +++++++++++------ 4 files changed, 70 insertions(+), 110 deletions(-) diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index d3dbded34d..bb23e1323f 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -584,7 +584,8 @@ namespace cv CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0); - CV_EXPORTS void setIdentity(oclMat& src, double val); + //! initializes a scaled identity matrix + CV_EXPORTS void setIdentity(oclMat& src, const Scalar & val = Scalar(1)); //////////////////////////////// Filter Engine //////////////////////////////// diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index 6467040f1c..883ac8f0da 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -1709,63 +1709,35 @@ void cv::ocl::pow(const oclMat &x, double p, oclMat &y) /////////////////////////////// setIdentity ////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -void cv::ocl::setIdentity(oclMat& src, double scalar) +void cv::ocl::setIdentity(oclMat& src, const Scalar & scalar) { - CV_Assert(src.empty() == false && src.rows == src.cols); - CV_Assert(src.type() == CV_32SC1 || src.type() == CV_32FC1); - int src_step = src.step/src.elemSize(); Context *clCxt = Context::getContext(); - size_t local_threads[] = {16, 16, 1}; - size_t global_threads[] = {src.cols, src.rows, 1}; - - string kernelName = "setIdentityKernel"; - if (src.type() == CV_32FC1) - kernelName += "_F1"; - else if (src.type() == CV_32SC1) - kernelName += "_I1"; - else + if (!clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) { - kernelName += "_D1"; - if (!(clCxt->supportsFeature(Context::CL_DOUBLE))) - { - oclMat temp; - src.convertTo(temp, CV_32FC1); - temp.copyTo(src); - } - + CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); + return; } + CV_Assert(src.step % src.elemSize() == 0); + + int src_step1 = src.step / src.elemSize(), src_offset1 = src.offset / src.elemSize(); + size_t local_threads[] = { 16, 16, 1 }; + size_t global_threads[] = { src.cols, src.rows, 1 }; + + const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; + const char * const channelMap[] = { "", "", "2", "4", "4" }; + string buildOptions = format("-D T=%s%s", typeMap[src.depth()], channelMap[src.oclchannels()]); + vector > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src_step1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset1 )); args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src_step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); - int scalar_i = 0; - float scalar_f = 0.0f; - if (clCxt->supportsFeature(Context::CL_DOUBLE)) - { - if (src.type() == CV_32SC1) - { - scalar_i = (int)scalar; - args.push_back(make_pair(sizeof(cl_int), (void*)&scalar_i)); - } - else - args.push_back(make_pair(sizeof(cl_double), (void*)&scalar)); - } - else - { - if (src.type() == CV_32SC1) - { - scalar_i = (int)scalar; - args.push_back(make_pair(sizeof(cl_int), (void*)&scalar_i)); - } - else - { - scalar_f = (float)scalar; - args.push_back(make_pair(sizeof(cl_float), (void*)&scalar_f)); - } - } + oclMat sc(1, 1, src.type(), scalar); + args.push_back( make_pair( sizeof(cl_mem), (void *)&sc.data )); - openCLExecuteKernel(clCxt, &arithm_setidentity, kernelName, global_threads, local_threads, args, -1, -1); + openCLExecuteKernel(clCxt, &arithm_setidentity, "setIdentity", global_threads, local_threads, + args, -1, -1, buildOptions.c_str()); } diff --git a/modules/ocl/src/opencl/arithm_setidentity.cl b/modules/ocl/src/opencl/arithm_setidentity.cl index 0604ae81dd..921026b40d 100644 --- a/modules/ocl/src/opencl/arithm_setidentity.cl +++ b/modules/ocl/src/opencl/arithm_setidentity.cl @@ -42,6 +42,7 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ + #if defined (DOUBLE_SUPPORT) #ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable @@ -50,51 +51,19 @@ #endif #endif - -#if defined (DOUBLE_SUPPORT) -#define DATA_TYPE double -#else -#define DATA_TYPE float -#endif - -__kernel void setIdentityKernel_F1(__global float* src, int src_row, int src_col, int src_step, DATA_TYPE scalar) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if(x < src_col && y < src_row) - { - if(x == y) - src[y * src_step + x] = scalar; - else - src[y * src_step + x] = 0 * scalar; - } -} - -__kernel void setIdentityKernel_D1(__global DATA_TYPE* src, int src_row, int src_col, int src_step, DATA_TYPE scalar) +__kernel void setIdentity(__global T * src, int src_step, int src_offset, + int cols, int rows, __global const T * scalar) { int x = get_global_id(0); int y = get_global_id(1); - if(x < src_col && y < src_row) + if (x < cols && y < rows) { - if(x == y) - src[y * src_step + x] = scalar; - else - src[y * src_step + x] = 0 * scalar; - } -} + int src_index = mad24(y, src_step, src_offset + x); -__kernel void setIdentityKernel_I1(__global int* src, int src_row, int src_col, int src_step, int scalar) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if(x < src_col && y < src_row) - { - if(x == y) - src[y * src_step + x] = scalar; + if (x == y) + src[src_index] = *scalar; else - src[y * src_step + x] = 0 * scalar; + src[src_index] = 0; } } diff --git a/modules/ocl/test/test_arithm.cpp b/modules/ocl/test/test_arithm.cpp index 2438148033..ee45cf5e3f 100644 --- a/modules/ocl/test/test_arithm.cpp +++ b/modules/ocl/test/test_arithm.cpp @@ -1423,34 +1423,52 @@ TEST_P(AddWeighted, Mat) } } +//////////////////////////////// setIdentity ///////////////////////////////////////////////// + +typedef ArithmTestBase SetIdentity; + +TEST_P(SetIdentity, Mat) +{ + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + cv::setIdentity(dst1_roi, val); + cv::ocl::setIdentity(gdst1, val); + + Near(0); + } +} + //////////////////////////////////////// Instantiation ///////////////////////////////////////// -INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool(), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine(testing::Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine(testing::Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Sub, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + +INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool(), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine(testing::Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine(testing::Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Sub, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, Mul, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, Div, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); -INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + +INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, MinMax, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool())); -INSTANTIATE_TEST_CASE_P(Arithm, MinMaxLoc, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool())); // + +INSTANTIATE_TEST_CASE_P(Arithm, MinMaxLoc, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, Sum, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, SqrSum, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, AbsSum, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); -INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + +INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, SetIdentity, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); #endif // HAVE_OPENCL From a562b5c985ffcad4390c867fa6c2dc064af4e8b0 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 30 Sep 2013 17:36:59 +0400 Subject: [PATCH 08/39] fixed ocl::meanStddev and created accuracy test for this function --- modules/ocl/src/arithm.cpp | 24 ++++++++---------------- modules/ocl/test/test_arithm.cpp | 25 +++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index 883ac8f0da..e81ee56a5a 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -454,23 +454,15 @@ Scalar cv::ocl::sqrSum(const oclMat &src) void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev) { - CV_Assert(src.depth() <= CV_32S); - cv::Size sz(1, 1); - int channels = src.oclchannels(); - Mat m1(sz, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(0)), - m2(sz, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(0)); - oclMat dst1(m1), dst2(m2); - -// arithmetic_sum_run(src, dst1, "arithm_op_sum"); -// arithmetic_sum_run(src, dst2, "arithm_op_squares_sum"); - - m1 = (Mat)dst1; - m2 = (Mat)dst2; - int i = 0, *p = (int *)m1.data, *q = (int *)m2.data; - for (; i < channels; i++) + double total = 1.0 / src.size().area(); + + mean = sum(src); + stddev = sqrSum(src); + + for (int i = 0; i < 4; ++i) { - mean.val[i] = (double)p[i] / (src.cols * src.rows); - stddev.val[i] = std::sqrt(std::max((double) q[i] / (src.cols * src.rows) - mean.val[i] * mean.val[i] , 0.)); + mean[i] *= total; + stddev[i] = std::sqrt(std::max(stddev[i] * total - mean.val[i] * mean.val[i] , 0.)); } } diff --git a/modules/ocl/test/test_arithm.cpp b/modules/ocl/test/test_arithm.cpp index ee45cf5e3f..ac4842e23d 100644 --- a/modules/ocl/test/test_arithm.cpp +++ b/modules/ocl/test/test_arithm.cpp @@ -1440,6 +1440,30 @@ TEST_P(SetIdentity, Mat) } } +//////////////////////////////// setIdentity ///////////////////////////////////////////////// + +typedef ArithmTestBase MeanStdDev; + +TEST_P(MeanStdDev, Mat) +{ + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + Scalar cpu_mean, cpu_stddev; + Scalar gpu_mean, gpu_stddev; + + cv::meanStdDev(src1_roi, cpu_mean, cpu_stddev); + cv::ocl::meanStdDev(gsrc1, gpu_mean, gpu_stddev); + + for (int i = 0; i < 4; ++i) + { + EXPECT_NEAR(cpu_mean[i], gpu_mean[i], 1e-5); + EXPECT_NEAR(cpu_stddev[i], gpu_stddev[i], 0.1); + } + } +} + //////////////////////////////////////// Instantiation ///////////////////////////////////////// INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool(), Bool())); @@ -1470,5 +1494,6 @@ INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(testing::Range(CV_8U, CV_USRTYP INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, SetIdentity, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, MeanStdDev, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); #endif // HAVE_OPENCL From 958604a68e4ac7c1db36c331b519336737544b00 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sun, 29 Sep 2013 06:35:12 +0400 Subject: [PATCH 09/39] perf test: update error message about relative error --- modules/ts/src/ts_perf.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/modules/ts/src/ts_perf.cpp b/modules/ts/src/ts_perf.cpp index dc7f45320e..b4fa00b42a 100644 --- a/modules/ts/src/ts_perf.cpp +++ b/modules/ts/src/ts_perf.cpp @@ -476,6 +476,9 @@ void Regression::verify(cv::FileNode node, cv::InputArray array, double eps, ERR int violations = countViolations(expected, actual, diff, eps, &maxv, &maxa); if (violations > 0) { + if(expected.total() * expected.channels() < 12) + std::cout << " Expected: " << std::endl << expected << std::endl << " Actual:" << std::endl << actual << std::endl; + FAIL() << " Relative difference (" << maxv << " of " << maxa << " allowed) between argument \"" << node.name() << "[" << idx << "]\" and expected value is greater than " << eps << " in " << violations << " points"; } @@ -530,6 +533,9 @@ void Regression::verify(cv::FileNode node, cv::InputArray array, double eps, ERR int violations = countViolations(expected, actual, diff, eps, &maxv, &maxa); if (violations > 0) { + if(expected.total() * expected.channels() < 12) + std::cout << " Expected: " << std::endl << expected << std::endl << " Actual:" << std::endl << actual << std::endl; + FAIL() << " Relative difference (" << maxv << " of " << maxa << " allowed) between argument \"" << node.name() << "\" and expected value is greater than " << eps << " in " << violations << " points"; } From 065b40c6c3b18a95e502cb24e9db383603016f2b Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 30 Sep 2013 18:58:32 +0400 Subject: [PATCH 10/39] fixed and extended ocl::norm --- modules/ocl/src/arithm.cpp | 120 ++++++++++++------ .../src/opencl/arithm_absdiff_nonsaturate.cl | 93 ++++++++++++++ modules/ocl/test/test_arithm.cpp | 67 +++++++++- 3 files changed, 238 insertions(+), 42 deletions(-) create mode 100644 modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index e81ee56a5a..2a663b990a 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -64,6 +64,7 @@ namespace cv { //////////////////////////////// OpenCL kernel strings ///////////////////// + extern const char *arithm_absdiff_nonsaturate; extern const char *arithm_nonzero; extern const char *arithm_sum; extern const char *arithm_minMax; @@ -435,14 +436,12 @@ Scalar cv::ocl::sqrSum(const oclMat &src) static sumFunc functab[3] = { arithmetic_sum, - arithmetic_sum, + arithmetic_sum, arithmetic_sum }; bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE); - int ddepth = std::max(src.depth(), CV_32S); - if (!hasDouble && ddepth == CV_64F) - ddepth = CV_32F; + int ddepth = src.depth() <= CV_32S ? CV_32S : (hasDouble ? CV_64F : CV_32F); sumFunc func = functab[ddepth - CV_32S]; return func(src, SQR_SUM, ddepth); @@ -595,57 +594,102 @@ void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, cons double cv::ocl::norm(const oclMat &src1, int normType) { - return norm(src1, oclMat(src1.size(), src1.type(), Scalar::all(0)), normType); + CV_Assert((normType & NORM_RELATIVE) == 0); + return norm(src1, oclMat(), normType); +} + +static void arithm_absdiff_nonsaturate_run(const oclMat & src1, const oclMat & src2, oclMat & diff) +{ + CV_Assert(src1.step % src1.elemSize() == 0 && (src2.empty() || src2.step % src2.elemSize() == 0)); + Context *clCxt = src1.clCxt; + + int ddepth = CV_64F; + diff.create(src1.size(), CV_MAKE_TYPE(ddepth, src1.channels())); + + int oclChannels = src1.oclchannels(), sdepth = src1.depth(); + int src1step1 = src1.step / src1.elemSize(), src1offset1 = src1.offset / src1.elemSize(); + int src2step1 = src2.step / src2.elemSize(), src2offset1 = src2.offset / src2.elemSize(); + int diffstep1 = diff.step / diff.elemSize(), diffoffset1 = diff.offset / diff.elemSize(); + + string kernelName = "arithm_absdiff_nonsaturate"; + size_t localThreads[3] = { 16, 16, 1 }; + size_t globalThreads[3] = { diff.cols, diff.rows, 1 }; + + const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; + const char * const channelMap[] = { "", "", "2", "4", "4" }; + + std::string buildOptions = format("-D srcT=%s%s -D dstT=%s%s -D convertToDstT=convert_%s%s", + typeMap[sdepth], channelMap[oclChannels], + typeMap[ddepth], channelMap[oclChannels], + typeMap[ddepth], channelMap[oclChannels]); + + vector > args; + args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1step1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1offset1 )); + + if (!src2.empty()) + { + args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src2step1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src2offset1 )); + + kernelName += "_binary"; + } + + args.push_back( make_pair( sizeof(cl_mem), (void *)&diff.data )); + args.push_back( make_pair( sizeof(cl_int), (void *)&diffstep1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&diffoffset1 )); + + args.push_back( make_pair( sizeof(cl_int), (void *)&src1.cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows )); + + openCLExecuteKernel(clCxt, &arithm_absdiff_nonsaturate, + kernelName, globalThreads, localThreads, + args, -1, -1, buildOptions.c_str()); } double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType) { + CV_Assert(!src1.empty()); + CV_Assert(src2.empty() || (src1.type() == src2.type() && src1.size() == src2.size())); + + if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.depth() == CV_64F) + { + CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); + } + bool isRelative = (normType & NORM_RELATIVE) != 0; - normType &= 7; - CV_Assert(src1.depth() <= CV_32S && src1.type() == src2.type() && ( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2)); - int channels = src1.oclchannels(), i = 0, *p; + normType &= NORM_TYPE_MASK; + CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2); + + Scalar s; + int cn = src1.channels(); double r = 0; - oclMat gm1(src1.size(), src1.type()); - int min_int = (normType == NORM_INF ? CL_INT_MIN : 0); - Mat m(1, 1, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(min_int)); - oclMat gm2(m), emptyMat; - switch(normType) + oclMat diff; + arithm_absdiff_nonsaturate_run(src1, src2, diff); + + switch (normType) { case NORM_INF: - // arithmetic_run(src1, src2, gm1, "arithm_op_absdiff"); - //arithmetic_minMax_run(gm1,emptyMat, gm2,"arithm_op_max"); - m = (gm2); - p = (int *)m.data; - r = -std::numeric_limits::max(); - for (i = 0; i < channels; i++) - { - r = std::max(r, (double)p[i]); - } + diff = diff.reshape(1); + minMax(diff, NULL, &r); break; case NORM_L1: - //arithmetic_run(src1, src2, gm1, "arithm_op_absdiff"); - //arithmetic_sum_run(gm1, gm2,"arithm_op_sum"); - m = (gm2); - p = (int *)m.data; - for (i = 0; i < channels; i++) - { - r = r + (double)p[i]; - } + s = sum(diff); + for (int i = 0; i < cn; ++i) + r += s[i]; break; case NORM_L2: - //arithmetic_run(src1, src2, gm1, "arithm_op_absdiff"); - //arithmetic_sum_run(gm1, gm2,"arithm_op_squares_sum"); - m = (gm2); - p = (int *)m.data; - for (i = 0; i < channels; i++) - { - r = r + (double)p[i]; - } + s = sqrSum(diff); + for (int i = 0; i < cn; ++i) + r += s[i]; r = std::sqrt(r); break; } if (isRelative) r = r / norm(src2, normType); + return r; } diff --git a/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl b/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl new file mode 100644 index 0000000000..e5d8271394 --- /dev/null +++ b/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl @@ -0,0 +1,93 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jia Haipeng, jiahaipeng95@gmail.com +// +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif +#endif + +__kernel void arithm_absdiff_nonsaturate_binary(__global srcT *src1, int src1_step, int src1_offset, + __global srcT *src2, int src2_step, int src2_offset, + __global dstT *dst, int dst_step, int dst_offset, + int cols, int rows) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (x < cols && y < rows) + { + int src1_index = mad24(y, src1_step, x + src1_offset); + int src2_index = mad24(y, src2_step, x + src2_offset); + int dst_index = mad24(y, dst_step, x + dst_offset); + + dstT t0 = convertToDstT(src1[src1_index]); + dstT t1 = convertToDstT(src2[src2_index]); + dstT t2 = t0 - t1; + + dst[dst_index] = t2 >= 0 ? t2 : -t2; + } +} + +__kernel void arithm_absdiff_nonsaturate(__global srcT *src1, int src1_step, int src1_offset, + __global dstT *dst, int dst_step, int dst_offset, + int cols, int rows) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (x < cols && y < rows) + { + int src1_index = mad24(y, src1_step, x + src1_offset); + int dst_index = mad24(y, dst_step, x + dst_offset); + + dstT t0 = convertToDstT(src1[src1_index]); + + dst[dst_index] = t0 >= 0 ? t0 : -t0; + } +} diff --git a/modules/ocl/test/test_arithm.cpp b/modules/ocl/test/test_arithm.cpp index ac4842e23d..db01d95036 100644 --- a/modules/ocl/test/test_arithm.cpp +++ b/modules/ocl/test/test_arithm.cpp @@ -220,8 +220,8 @@ PARAM_TEST_CASE(ArithmTestBase, int, int, bool) cv::RNG &rng = TS::ptr()->get_rng(); - src1 = randomMat(rng, randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false); - src2 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, -15440, 14450, false); + src1 = randomMat(rng, randomSize(MIN_VALUE, MAX_VALUE), type, 2, 11, false); + src2 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, -1540, 1740, false); dst1 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false); dst2 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false); mask = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), CV_8UC1, 0, 2, false); @@ -1440,7 +1440,7 @@ TEST_P(SetIdentity, Mat) } } -//////////////////////////////// setIdentity ///////////////////////////////////////////////// +//////////////////////////////// meanStdDev ///////////////////////////////////////////////// typedef ArithmTestBase MeanStdDev; @@ -1458,12 +1458,70 @@ TEST_P(MeanStdDev, Mat) for (int i = 0; i < 4; ++i) { - EXPECT_NEAR(cpu_mean[i], gpu_mean[i], 1e-5); + EXPECT_NEAR(cpu_mean[i], gpu_mean[i], 0.1); EXPECT_NEAR(cpu_stddev[i], gpu_stddev[i], 0.1); } } } +//////////////////////////////// Norm ///////////////////////////////////////////////// + +typedef ArithmTestBase Norm; + +TEST_P(Norm, NORM_INF) +{ + for (int relative = 0; relative < 2; ++relative) + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + int type = NORM_INF; + if (relative == 1) + type |= NORM_RELATIVE; + + const double cpuRes = cv::norm(src1_roi, src2_roi, type); + const double gpuRes = cv::ocl::norm(gsrc1, gsrc2, type); + + EXPECT_NEAR(cpuRes, gpuRes, 0.1); + } +} + +TEST_P(Norm, NORM_L1) +{ + for (int relative = 0; relative < 2; ++relative) + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + int type = NORM_L1; + if (relative == 1) + type |= NORM_RELATIVE; + + const double cpuRes = cv::norm(src1_roi, src2_roi, type); + const double gpuRes = cv::ocl::norm(gsrc1, gsrc2, type); + + EXPECT_NEAR(cpuRes, gpuRes, 0.1); + } +} + +TEST_P(Norm, NORM_L2) +{ + for (int relative = 0; relative < 2; ++relative) + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + int type = NORM_L2; + if (relative == 1) + type |= NORM_RELATIVE; + + const double cpuRes = cv::norm(src1_roi, src2_roi, type); + const double gpuRes = cv::ocl::norm(gsrc1, gsrc2, type); + + EXPECT_NEAR(cpuRes, gpuRes, 0.1); + } +} + //////////////////////////////////////// Instantiation ///////////////////////////////////////// INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool(), Bool())); @@ -1495,5 +1553,6 @@ INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(Values(CV_32F, CV_64F), testing::Ra INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, SetIdentity, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, MeanStdDev, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Norm, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); #endif // HAVE_OPENCL From 66e4aead97e3692d85fdb34909f2a8441a71c110 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 30 Sep 2013 16:07:50 +0400 Subject: [PATCH 11/39] perf tests: added SANITY_CHECK_NOTHING() --- modules/ts/include/opencv2/ts/ts_perf.hpp | 4 +++- modules/ts/src/ts_perf.cpp | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/modules/ts/include/opencv2/ts/ts_perf.hpp b/modules/ts/include/opencv2/ts/ts_perf.hpp index fa88dadb8c..70ad571e09 100644 --- a/modules/ts/include/opencv2/ts/ts_perf.hpp +++ b/modules/ts/include/opencv2/ts/ts_perf.hpp @@ -209,6 +209,7 @@ private: #define SANITY_CHECK(array, ...) ::perf::Regression::add(this, #array, array , ## __VA_ARGS__) #define SANITY_CHECK_KEYPOINTS(array, ...) ::perf::Regression::addKeypoints(this, #array, array , ## __VA_ARGS__) #define SANITY_CHECK_MATCHES(array, ...) ::perf::Regression::addMatches(this, #array, array , ## __VA_ARGS__) +#define SANITY_CHECK_NOTHING() this->setVerified(); class CV_EXPORTS GpuPerf { @@ -345,12 +346,13 @@ private: friend class TestBase; }; friend class _declareHelper; - friend class Regression; bool verified; public: _declareHelper declare; + + void setVerified() { this->verified = true; } }; template class TestBaseWithParam: public TestBase, public ::testing::WithParamInterface {}; diff --git a/modules/ts/src/ts_perf.cpp b/modules/ts/src/ts_perf.cpp index dc7f45320e..7adc246f48 100644 --- a/modules/ts/src/ts_perf.cpp +++ b/modules/ts/src/ts_perf.cpp @@ -107,7 +107,7 @@ Regression& Regression::instance() Regression& Regression::add(TestBase* test, const std::string& name, cv::InputArray array, double eps, ERROR_TYPE err) { - if(test) test->verified = true; + if(test) test->setVerified(); return instance()(name, array, eps, err); } From a8ac3ec72aaa4fb142365626934c0f7c4904f677 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 30 Sep 2013 16:08:08 +0400 Subject: [PATCH 12/39] ocl: perf test: disable MOG2 (random hungs), enable upload/download --- modules/ocl/perf/perf_bgfg.cpp | 2 +- modules/ocl/perf/perf_matrix_operation.cpp | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/modules/ocl/perf/perf_bgfg.cpp b/modules/ocl/perf/perf_bgfg.cpp index cd84509bc0..3180f1bbbe 100644 --- a/modules/ocl/perf/perf_bgfg.cpp +++ b/modules/ocl/perf/perf_bgfg.cpp @@ -167,7 +167,7 @@ PERF_TEST_P(VideoMOGFixture, MOG, typedef tuple VideoMOG2ParamType; typedef TestBaseWithParam VideoMOG2Fixture; -PERF_TEST_P(VideoMOG2Fixture, MOG2, +PERF_TEST_P(VideoMOG2Fixture, DISABLED_MOG2, // TODO Disabled: random hungs on buildslave ::testing::Combine(::testing::Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"), ::testing::Values(1, 3))) { diff --git a/modules/ocl/perf/perf_matrix_operation.cpp b/modules/ocl/perf/perf_matrix_operation.cpp index 13ce47a1bc..8266f0930d 100644 --- a/modules/ocl/perf/perf_matrix_operation.cpp +++ b/modules/ocl/perf/perf_matrix_operation.cpp @@ -161,7 +161,7 @@ PERF_TEST_P(setToFixture, setTo, typedef tuple uploadParams; typedef TestBaseWithParam uploadFixture; -PERF_TEST_P(uploadFixture, DISABLED_upload, +PERF_TEST_P(uploadFixture, upload, testing::Combine( OCL_TYPICAL_MAT_SIZES, testing::Range(CV_8U, CV_64F), @@ -190,15 +190,14 @@ PERF_TEST_P(uploadFixture, DISABLED_upload, else OCL_PERF_ELSE - int value = 0; - SANITY_CHECK(value); + SANITY_CHECK_NOTHING(); } /////////////////// download /////////////////////////// typedef TestBaseWithParam downloadFixture; -PERF_TEST_P(downloadFixture, DISABLED_download, +PERF_TEST_P(downloadFixture, download, testing::Combine( OCL_TYPICAL_MAT_SIZES, testing::Range(CV_8U, CV_64F), @@ -227,6 +226,5 @@ PERF_TEST_P(downloadFixture, DISABLED_download, else OCL_PERF_ELSE - int value = 0; - SANITY_CHECK(value); + SANITY_CHECK_NOTHING(); } From 4322c47bb2ebffdc48a9046cf48e13beabed621c Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Tue, 1 Oct 2013 17:53:14 +0400 Subject: [PATCH 13/39] removed test_norm.cpp from ocl tests --- modules/ocl/test/test_norm.cpp | 63 ---------------------------------- 1 file changed, 63 deletions(-) delete mode 100644 modules/ocl/test/test_norm.cpp diff --git a/modules/ocl/test/test_norm.cpp b/modules/ocl/test/test_norm.cpp deleted file mode 100644 index 2bd847068e..0000000000 --- a/modules/ocl/test/test_norm.cpp +++ /dev/null @@ -1,63 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "test_precomp.hpp" - -typedef ::testing::TestWithParam normFixture; - -TEST_P(normFixture, DISABLED_accuracy) -{ - const cv::Size srcSize = GetParam(); - - cv::Mat src1(srcSize, CV_8UC1), src2(srcSize, CV_8UC1); - cv::randu(src1, 0, 2); - cv::randu(src2, 0, 2); - - cv::ocl::oclMat oclSrc1(src1), oclSrc2(src2); - - double value = cv::norm(src1, src2, cv::NORM_INF); - double oclValue = cv::ocl::norm(oclSrc1, oclSrc2, cv::NORM_INF); - - ASSERT_EQ(value, oclValue); -} - -INSTANTIATE_TEST_CASE_P(oclNormTest, normFixture, - ::testing::Values(cv::Size(500, 500), cv::Size(1000, 1000))); From e65eb4e04278a9dc9e30097a254c8b211849b2fa Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Wed, 2 Oct 2013 14:41:00 +0400 Subject: [PATCH 14/39] Updated Google Test to 1.7.0. --- modules/ts/include/opencv2/ts/ts_gtest.h | 48 +++++++- modules/ts/src/ts_gtest.cpp | 136 +++++++++++++++++++---- 2 files changed, 162 insertions(+), 22 deletions(-) diff --git a/modules/ts/include/opencv2/ts/ts_gtest.h b/modules/ts/include/opencv2/ts/ts_gtest.h index 80b410bb3c..75453d204e 100644 --- a/modules/ts/include/opencv2/ts/ts_gtest.h +++ b/modules/ts/include/opencv2/ts/ts_gtest.h @@ -3116,6 +3116,7 @@ const char* StringFromGTestEnv(const char* flag, const char* default_val); #endif #include +#include #include #include #include @@ -7315,6 +7316,9 @@ class FloatingPoint { return ReinterpretBits(kExponentBitMask); } + // Returns the maximum representable finite floating-point number. + static RawType Max(); + // Non-static methods // Returns the bits that represents this number. @@ -7395,6 +7399,13 @@ class FloatingPoint { FloatingPointUnion u_; }; +// We cannot use std::numeric_limits::max() as it clashes with the max() +// macro defined by . +template <> +inline float FloatingPoint::Max() { return FLT_MAX; } +template <> +inline double FloatingPoint::Max() { return DBL_MAX; } + // Typedefs the instances of the FloatingPoint template class that we // care to use. typedef FloatingPoint Float; @@ -18112,9 +18123,9 @@ class GTEST_API_ TestInfo { return NULL; } - // Returns true if this test should run, that is if the test is not disabled - // (or it is disabled but the also_run_disabled_tests flag has been specified) - // and its full name matches the user-specified filter. + // Returns true if this test should run, that is if the test is not + // disabled (or it is disabled but the also_run_disabled_tests flag has + // been specified) and its full name matches the user-specified filter. // // Google Test allows the user to filter the tests by their full names. // The full name of a test Bar in test case Foo is defined as @@ -18130,6 +18141,14 @@ class GTEST_API_ TestInfo { // contains the character 'A' or starts with "Foo.". bool should_run() const { return should_run_; } + // Returns true iff this test will appear in the XML report. + bool is_reportable() const { + // For now, the XML report includes all tests matching the filter. + // In the future, we may trim tests that are excluded because of + // sharding. + return matches_filter_; + } + // Returns the result of the test. const TestResult* result() const { return &result_; } @@ -18242,9 +18261,15 @@ class GTEST_API_ TestCase { // Gets the number of failed tests in this test case. int failed_test_count() const; + // Gets the number of disabled tests that will be reported in the XML report. + int reportable_disabled_test_count() const; + // Gets the number of disabled tests in this test case. int disabled_test_count() const; + // Gets the number of tests to be printed in the XML report. + int reportable_test_count() const; + // Get the number of tests in this test case that should run. int test_to_run_count() const; @@ -18320,11 +18345,22 @@ class GTEST_API_ TestCase { return test_info->should_run() && test_info->result()->Failed(); } + // Returns true iff the test is disabled and will be reported in the XML + // report. + static bool TestReportableDisabled(const TestInfo* test_info) { + return test_info->is_reportable() && test_info->is_disabled_; + } + // Returns true iff test is disabled. static bool TestDisabled(const TestInfo* test_info) { return test_info->is_disabled_; } + // Returns true iff this test will appear in the XML report. + static bool TestReportable(const TestInfo* test_info) { + return test_info->is_reportable(); + } + // Returns true if the given test should run. static bool ShouldRunTest(const TestInfo* test_info) { return test_info->should_run(); @@ -18617,9 +18653,15 @@ class GTEST_API_ UnitTest { // Gets the number of failed tests. int failed_test_count() const; + // Gets the number of disabled tests that will be reported in the XML report. + int reportable_disabled_test_count() const; + // Gets the number of disabled tests. int disabled_test_count() const; + // Gets the number of tests to be printed in the XML report. + int reportable_test_count() const; + // Gets the number of all tests. int total_test_count() const; diff --git a/modules/ts/src/ts_gtest.cpp b/modules/ts/src/ts_gtest.cpp index 48870913c3..50c8808aff 100644 --- a/modules/ts/src/ts_gtest.cpp +++ b/modules/ts/src/ts_gtest.cpp @@ -960,9 +960,15 @@ class GTEST_API_ UnitTestImpl { // Gets the number of failed tests. int failed_test_count() const; + // Gets the number of disabled tests that will be reported in the XML report. + int reportable_disabled_test_count() const; + // Gets the number of disabled tests. int disabled_test_count() const; + // Gets the number of tests to be printed in the XML report. + int reportable_test_count() const; + // Gets the number of all tests. int total_test_count() const; @@ -1670,6 +1676,10 @@ bool g_help_flag = false; } // namespace internal +static const char* GetDefaultFilter() { + return kUniversalFilter; +} + GTEST_DEFINE_bool_( also_run_disabled_tests, internal::BoolFromGTestEnv("also_run_disabled_tests", false), @@ -1696,7 +1706,7 @@ GTEST_DEFINE_string_( GTEST_DEFINE_string_( filter, - internal::StringFromGTestEnv("filter", kUniversalFilter), + internal::StringFromGTestEnv("filter", GetDefaultFilter()), "A colon-separated list of glob (not regex) patterns " "for filtering the tests to run, optionally followed by a " "'-' and a : separated list of negative patterns (tests to " @@ -1705,7 +1715,7 @@ GTEST_DEFINE_string_( GTEST_DEFINE_string_( param_filter, - internal::StringFromGTestEnv("param_filter", kUniversalFilter), + internal::StringFromGTestEnv("param_filter", GetDefaultFilter()), "Same syntax and semantics as for param, but these patterns " "have to match the test's parameters."); @@ -2225,11 +2235,22 @@ int UnitTestImpl::failed_test_count() const { return SumOverTestCaseList(test_cases_, &TestCase::failed_test_count); } +// Gets the number of disabled tests that will be reported in the XML report. +int UnitTestImpl::reportable_disabled_test_count() const { + return SumOverTestCaseList(test_cases_, + &TestCase::reportable_disabled_test_count); +} + // Gets the number of disabled tests. int UnitTestImpl::disabled_test_count() const { return SumOverTestCaseList(test_cases_, &TestCase::disabled_test_count); } +// Gets the number of tests to be printed in the XML report. +int UnitTestImpl::reportable_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::reportable_test_count); +} + // Gets the number of all tests. int UnitTestImpl::total_test_count() const { return SumOverTestCaseList(test_cases_, &TestCase::total_test_count); @@ -3832,10 +3853,21 @@ int TestCase::failed_test_count() const { return CountIf(test_info_list_, TestFailed); } +// Gets the number of disabled tests that will be reported in the XML report. +int TestCase::reportable_disabled_test_count() const { + return CountIf(test_info_list_, TestReportableDisabled); +} + +// Gets the number of disabled tests in this test case. int TestCase::disabled_test_count() const { return CountIf(test_info_list_, TestDisabled); } +// Gets the number of tests to be printed in the XML report. +int TestCase::reportable_test_count() const { + return CountIf(test_info_list_, TestReportable); +} + // Get the number of tests in this test case that should run. int TestCase::test_to_run_count() const { return CountIf(test_info_list_, ShouldRunTest); @@ -4353,7 +4385,7 @@ void PrettyUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test, num_failures == 1 ? "TEST" : "TESTS"); } - int num_disabled = unit_test.disabled_test_count(); + int num_disabled = unit_test.reportable_disabled_test_count(); if (num_disabled && !GTEST_FLAG(also_run_disabled_tests)) { if (!num_failures) { printf("\n"); // Add a spacer if no FAILURE banner is displayed. @@ -4812,19 +4844,22 @@ void XmlUnitTestResultPrinter::PrintXmlTestCase(std::ostream* stream, *stream << " <" << kTestsuite; OutputXmlAttribute(stream, kTestsuite, "name", test_case.name()); OutputXmlAttribute(stream, kTestsuite, "tests", - StreamableToString(test_case.total_test_count())); + StreamableToString(test_case.reportable_test_count())); OutputXmlAttribute(stream, kTestsuite, "failures", StreamableToString(test_case.failed_test_count())); - OutputXmlAttribute(stream, kTestsuite, "disabled", - StreamableToString(test_case.disabled_test_count())); + OutputXmlAttribute( + stream, kTestsuite, "disabled", + StreamableToString(test_case.reportable_disabled_test_count())); OutputXmlAttribute(stream, kTestsuite, "errors", "0"); OutputXmlAttribute(stream, kTestsuite, "time", FormatTimeInMillisAsSeconds(test_case.elapsed_time())); *stream << TestPropertiesAsXmlAttributes(test_case.ad_hoc_test_result()) << ">\n"; - for (int i = 0; i < test_case.total_test_count(); ++i) - OutputXmlTestInfo(stream, test_case.name(), *test_case.GetTestInfo(i)); + for (int i = 0; i < test_case.total_test_count(); ++i) { + if (test_case.GetTestInfo(i)->is_reportable()) + OutputXmlTestInfo(stream, test_case.name(), *test_case.GetTestInfo(i)); + } *stream << " \n"; } @@ -4837,11 +4872,12 @@ void XmlUnitTestResultPrinter::PrintXmlUnitTest(std::ostream* stream, *stream << "<" << kTestsuites; OutputXmlAttribute(stream, kTestsuites, "tests", - StreamableToString(unit_test.total_test_count())); + StreamableToString(unit_test.reportable_test_count())); OutputXmlAttribute(stream, kTestsuites, "failures", StreamableToString(unit_test.failed_test_count())); - OutputXmlAttribute(stream, kTestsuites, "disabled", - StreamableToString(unit_test.disabled_test_count())); + OutputXmlAttribute( + stream, kTestsuites, "disabled", + StreamableToString(unit_test.reportable_disabled_test_count())); OutputXmlAttribute(stream, kTestsuites, "errors", "0"); OutputXmlAttribute( stream, kTestsuites, "timestamp", @@ -4859,9 +4895,9 @@ void XmlUnitTestResultPrinter::PrintXmlUnitTest(std::ostream* stream, OutputXmlAttribute(stream, kTestsuites, "name", "AllTests"); *stream << ">\n"; - for (int i = 0; i < unit_test.total_test_case_count(); ++i) { - PrintXmlTestCase(stream, *unit_test.GetTestCase(i)); + if (unit_test.GetTestCase(i)->reportable_test_count() > 0) + PrintXmlTestCase(stream, *unit_test.GetTestCase(i)); } *stream << "\n"; } @@ -4995,6 +5031,35 @@ const char* const OsStackTraceGetter::kElidedFramesMarker = "... " GTEST_NAME_ " internal frames ..."; +// A helper class that creates the premature-exit file in its +// constructor and deletes the file in its destructor. +class ScopedPrematureExitFile { + public: + explicit ScopedPrematureExitFile(const char* premature_exit_filepath) + : premature_exit_filepath_(premature_exit_filepath) { + // If a path to the premature-exit file is specified... + if (premature_exit_filepath != NULL && *premature_exit_filepath != '\0') { + // create the file with a single "0" character in it. I/O + // errors are ignored as there's nothing better we can do and we + // don't want to fail the test because of this. + FILE* pfile = posix::FOpen(premature_exit_filepath, "w"); + fwrite("0", 1, 1, pfile); + fclose(pfile); + } + } + + ~ScopedPrematureExitFile() { + if (premature_exit_filepath_ != NULL && *premature_exit_filepath_ != '\0') { + remove(premature_exit_filepath_); + } + } + + private: + const char* const premature_exit_filepath_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedPrematureExitFile); +}; + } // namespace internal // class TestEventListeners @@ -5131,11 +5196,21 @@ int UnitTest::successful_test_count() const { // Gets the number of failed tests. int UnitTest::failed_test_count() const { return impl()->failed_test_count(); } +// Gets the number of disabled tests that will be reported in the XML report. +int UnitTest::reportable_disabled_test_count() const { + return impl()->reportable_disabled_test_count(); +} + // Gets the number of disabled tests. int UnitTest::disabled_test_count() const { return impl()->disabled_test_count(); } +// Gets the number of tests to be printed in the XML report. +int UnitTest::reportable_test_count() const { + return impl()->reportable_test_count(); +} + // Gets the number of all tests. int UnitTest::total_test_count() const { return impl()->total_test_count(); } @@ -5207,13 +5282,12 @@ Environment* UnitTest::AddEnvironment(Environment* env) { // assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc) eventually call // this to report their results. The user code should use the // assertion macros instead of calling this directly. -GTEST_LOCK_EXCLUDED_(mutex_) void UnitTest::AddTestPartResult( TestPartResult::Type result_type, const char* file_name, int line_number, const std::string& message, - const std::string& os_stack_trace) { + const std::string& os_stack_trace) GTEST_LOCK_EXCLUDED_(mutex_) { Message msg; msg << message; @@ -5286,14 +5360,39 @@ void UnitTest::RecordProperty(const std::string& key, // We don't protect this under mutex_, as we only support calling it // from the main thread. int UnitTest::Run() { + const bool in_death_test_child_process = + internal::GTEST_FLAG(internal_run_death_test).length() > 0; + + // Google Test implements this protocol for catching that a test + // program exits before returning control to Google Test: + // + // 1. Upon start, Google Test creates a file whose absolute path + // is specified by the environment variable + // TEST_PREMATURE_EXIT_FILE. + // 2. When Google Test has finished its work, it deletes the file. + // + // This allows a test runner to set TEST_PREMATURE_EXIT_FILE before + // running a Google-Test-based test program and check the existence + // of the file at the end of the test execution to see if it has + // exited prematurely. + + // If we are in the child process of a death test, don't + // create/delete the premature exit file, as doing so is unnecessary + // and will confuse the parent process. Otherwise, create/delete + // the file upon entering/leaving this function. If the program + // somehow exits before this function has a chance to return, the + // premature-exit file will be left undeleted, causing a test runner + // that understands the premature-exit-file protocol to report the + // test as having failed. + const internal::ScopedPrematureExitFile premature_exit_file( + in_death_test_child_process ? + NULL : internal::posix::GetEnv("TEST_PREMATURE_EXIT_FILE")); + // Captures the value of GTEST_FLAG(catch_exceptions). This value will be // used for the duration of the program. impl()->set_catch_exceptions(GTEST_FLAG(catch_exceptions)); #if GTEST_HAS_SEH - const bool in_death_test_child_process = - internal::GTEST_FLAG(internal_run_death_test).length() > 0; - // Either the user wants Google Test to catch exceptions thrown by the // tests or this is executing in the context of death test child // process. In either case the user does not want to see pop-up dialogs @@ -5432,7 +5531,6 @@ UnitTestImpl::UnitTestImpl(UnitTest* parent) start_timestamp_(0), elapsed_time_(0), #if GTEST_HAS_DEATH_TEST - internal_run_death_test_flag_(NULL), death_test_factory_(new DefaultDeathTestFactory), #endif // Will be overridden by the flag before first use. From a613a1a1da2b90cc8204509bfdaf566508280658 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 2 Oct 2013 16:06:15 +0400 Subject: [PATCH 15/39] fixed ocl::bitwise_not in case of CV_8UC1, CV_8UC2 --- modules/ocl/src/opencl/arithm_bitwise_not.cl | 69 ++++++++------------ 1 file changed, 26 insertions(+), 43 deletions(-) diff --git a/modules/ocl/src/opencl/arithm_bitwise_not.cl b/modules/ocl/src/opencl/arithm_bitwise_not.cl index 9905130013..714220ddf4 100644 --- a/modules/ocl/src/opencl/arithm_bitwise_not.cl +++ b/modules/ocl/src/opencl/arithm_bitwise_not.cl @@ -42,6 +42,7 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ + #if defined (DOUBLE_SUPPORT) #ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable @@ -49,10 +50,11 @@ #pragma OPENCL EXTENSION cl_amd_fp64:enable #endif #endif -////////////////////////////////////////////////////////////////////////////////////////////////////// -////////////////////////////////////////////BITWISE_NOT//////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////BITWISE_NOT//////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////// -/**************************************bitwise_not without mask**************************************/ + __kernel void arithm_bitwise_not_D0 (__global uchar *src1, int src1_step, int src1_offset, __global uchar *dst, int dst_step, int dst_offset, int rows, int cols, int dst_step1) @@ -63,35 +65,22 @@ __kernel void arithm_bitwise_not_D0 (__global uchar *src1, int src1_step, int sr if (x < cols && y < rows) { x = x << 2; - -#ifdef dst_align -#undef dst_align -#endif -#define dst_align (dst_offset & 3) - int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); + int src1_index = mad24(y, src1_step, x + src1_offset); int dst_start = mad24(y, dst_step, dst_offset); int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - uchar4 src1_data = vload4(0, src1 + src1_index_fix); - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = ~ src1_data; - - /* if(src1_index < 0) - { - uchar4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - */ - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; + int dst_index = mad24(y, dst_step, dst_offset + x); - *((__global uchar4 *)(dst + dst_index)) = dst_data; + uchar4 src1_data = vload4(0, src1 + src1_index); + uchar4 dst_data = vload4(0, dst + dst_index); + uchar4 tmp_data = ~src1_data; + + dst_data.x = dst_index + 0 < dst_end ? tmp_data.x : dst_data.x; + dst_data.y = dst_index + 1 < dst_end ? tmp_data.y : dst_data.y; + dst_data.z = dst_index + 2 < dst_end ? tmp_data.z : dst_data.z; + dst_data.w = dst_index + 3 < dst_end ? tmp_data.w : dst_data.w; + + vstore4(dst_data, 0, dst + dst_index); } } @@ -106,28 +95,22 @@ __kernel void arithm_bitwise_not_D1 (__global char *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - -#ifdef dst_align -#undef dst_align -#endif -#define dst_align (dst_offset & 3) - int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); + int src1_index = mad24(y, src1_step, x + src1_offset); int dst_start = mad24(y, dst_step, dst_offset); int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); + int dst_index = mad24(y, dst_step, dst_offset + x); char4 src1_data = vload4(0, src1 + src1_index); + char4 dst_data = vload4(0, dst + dst_index); + char4 tmp_data = ~src1_data; - char4 dst_data = *((__global char4 *)(dst + dst_index)); - char4 tmp_data = ~ src1_data; - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; + dst_data.x = dst_index + 0 < dst_end ? tmp_data.x : dst_data.x; + dst_data.y = dst_index + 1 < dst_end ? tmp_data.y : dst_data.y; + dst_data.z = dst_index + 2 < dst_end ? tmp_data.z : dst_data.z; + dst_data.w = dst_index + 3 < dst_end ? tmp_data.w : dst_data.w; - *((__global char4 *)(dst + dst_index)) = dst_data; + vstore4(dst_data, 0, dst + dst_index); } } From e8b93040d6afbc9c0d42502cbd9832ad127e0bb7 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 2 Oct 2013 16:39:05 +0400 Subject: [PATCH 16/39] enabled ocl::norm perf test --- modules/ocl/perf/perf_norm.cpp | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/modules/ocl/perf/perf_norm.cpp b/modules/ocl/perf/perf_norm.cpp index 363bcd2b33..35ac006394 100644 --- a/modules/ocl/perf/perf_norm.cpp +++ b/modules/ocl/perf/perf_norm.cpp @@ -51,18 +51,21 @@ using std::tr1::get; ///////////// norm//////////////////////// -typedef TestBaseWithParam normFixture; +typedef tuple normParams; +typedef TestBaseWithParam normFixture; -PERF_TEST_P(normFixture, DISABLED_norm, OCL_TYPICAL_MAT_SIZES) // TODO doesn't work properly +PERF_TEST_P(normFixture, norm, testing::Combine( + OCL_TYPICAL_MAT_SIZES, + OCL_PERF_ENUM(CV_8UC1, CV_32FC1))) { - const Size srcSize = GetParam(); - const std::string impl = getSelectedImpl(); + const normParams params = GetParam(); + const Size srcSize = get<0>(params); + const int type = get<1>(params); double value = 0.0; + const double eps = CV_MAT_DEPTH(type) == CV_8U ? DBL_EPSILON : 1e-3; - Mat src1(srcSize, CV_8UC1), src2(srcSize, CV_8UC1); - declare.in(src1, src2); - randu(src1, 0, 1); - randu(src2, 0, 1); + Mat src1(srcSize, type), src2(srcSize, type); + declare.in(src1, src2, WARMUP_RNG); if (RUN_OCL_IMPL) { @@ -70,7 +73,7 @@ PERF_TEST_P(normFixture, DISABLED_norm, OCL_TYPICAL_MAT_SIZES) // TODO doesn't w OCL_TEST_CYCLE() value = cv::ocl::norm(oclSrc1, oclSrc2, NORM_INF); - SANITY_CHECK(value); + SANITY_CHECK(value, eps); } else if (RUN_PLAIN_IMPL) { From 212ad971003e69645c7ee64d9e120fdee6d8ccbf Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sun, 15 Sep 2013 02:45:02 +0400 Subject: [PATCH 17/39] ocl: dynamic load of OpenCL libraries Support functions from OpenCL 1.1 / OpenCL 1.2 --- CMakeLists.txt | 9 +- cmake/OpenCVDetectOpenCL.cmake | 67 +++--- cmake/checks/opencl11.cpp | 14 ++ cmake/checks/opencl12.cpp | 14 ++ cmake/templates/cvconfig.h.cmake | 2 + .../opencv2/ocl/cl_runtime/cl_runtime.hpp | 19 ++ .../ocl/include/opencv2/ocl/private/util.hpp | 6 +- modules/ocl/src/cl_runtime/cl_runtime.cpp | 105 +++++++++ .../ocl/src/cl_runtime/generator/common.py | 199 ++++++++++++++++++ .../ocl/src/cl_runtime/generator/generate.sh | 7 + .../ocl/src/cl_runtime/generator/parser_cl.py | 112 ++++++++++ .../cl_runtime/generator/parser_clamdblas.py | 107 ++++++++++ .../cl_runtime/generator/parser_clamdfft.py | 104 +++++++++ .../template/cl_runtime_impl_opencl.hpp.in | 10 + .../template/cl_runtime_opencl.hpp.in | 34 +++ .../template/clamdblas_runtime.cpp.in | 75 +++++++ .../template/clamdblas_runtime.hpp.in | 25 +++ .../template/clamdfft_runtime.cpp.in | 75 +++++++ .../template/clamdfft_runtime.hpp.in | 25 +++ modules/ocl/src/fft.cpp | 2 +- modules/ocl/src/gemm.cpp | 2 +- modules/ocl/src/precomp.hpp | 10 + modules/ocl/src/safe_call.hpp | 6 +- 23 files changed, 975 insertions(+), 54 deletions(-) create mode 100644 cmake/checks/opencl11.cpp create mode 100644 cmake/checks/opencl12.cpp create mode 100644 modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime.hpp create mode 100644 modules/ocl/src/cl_runtime/cl_runtime.cpp create mode 100644 modules/ocl/src/cl_runtime/generator/common.py create mode 100644 modules/ocl/src/cl_runtime/generator/generate.sh create mode 100644 modules/ocl/src/cl_runtime/generator/parser_cl.py create mode 100644 modules/ocl/src/cl_runtime/generator/parser_clamdblas.py create mode 100644 modules/ocl/src/cl_runtime/generator/parser_clamdfft.py create mode 100644 modules/ocl/src/cl_runtime/generator/template/cl_runtime_impl_opencl.hpp.in create mode 100644 modules/ocl/src/cl_runtime/generator/template/cl_runtime_opencl.hpp.in create mode 100644 modules/ocl/src/cl_runtime/generator/template/clamdblas_runtime.cpp.in create mode 100644 modules/ocl/src/cl_runtime/generator/template/clamdblas_runtime.hpp.in create mode 100644 modules/ocl/src/cl_runtime/generator/template/clamdfft_runtime.cpp.in create mode 100644 modules/ocl/src/cl_runtime/generator/template/clamdfft_runtime.hpp.in diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a1598d132..532fd88207 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -824,7 +824,14 @@ endif() if(HAVE_OPENCL) status("") - status(" OpenCL") + status(" OpenCL:") + set(__opencl_ver "invalid") + if(HAVE_OPENCL12) + set(__opencl_ver "1.2") + elseif(HAVE_OPENCL11) + set(__opencl_ver "1.1") + endif() + status(" Version:" ${__opencl_ver}) if(OPENCL_INCLUDE_DIR) status(" Include path:" ${OPENCL_INCLUDE_DIRS}) endif() diff --git a/cmake/OpenCVDetectOpenCL.cmake b/cmake/OpenCVDetectOpenCL.cmake index 2c96274a8c..7ed54b565d 100644 --- a/cmake/OpenCVDetectOpenCL.cmake +++ b/cmake/OpenCVDetectOpenCL.cmake @@ -6,7 +6,7 @@ if(APPLE) else(APPLE) #find_package(OpenCL QUIET) - if (NOT OPENCL_FOUND) + if(NOT OPENCL_FOUND) find_path(OPENCL_ROOT_DIR NAMES OpenCL/cl.h CL/cl.h include/CL/cl.h include/nvidia-current/CL/cl.h PATHS ENV OCLROOT ENV AMDAPPSDKROOT ENV CUDA_PATH ENV INTELOCLSDKROOT @@ -20,18 +20,7 @@ else(APPLE) DOC "OpenCL include directory" NO_DEFAULT_PATH) - if (X86_64) - set(OPENCL_POSSIBLE_LIB_SUFFIXES lib/Win64 lib/x86_64 lib/x64) - elseif (X86) - set(OPENCL_POSSIBLE_LIB_SUFFIXES lib/Win32 lib/x86) - endif() - - find_library(OPENCL_LIBRARY - NAMES OpenCL - HINTS ${OPENCL_ROOT_DIR} - PATH_SUFFIXES ${OPENCL_POSSIBLE_LIB_SUFFIXES} - DOC "OpenCL library" - NO_DEFAULT_PATH) + set(OPENCL_LIBRARY "OPENCL_DYNAMIC_LOAD") mark_as_advanced(OPENCL_INCLUDE_DIR OPENCL_LIBRARY) include(FindPackageHandleStandardArgs) @@ -40,20 +29,30 @@ else(APPLE) endif(APPLE) if(OPENCL_FOUND) - set(HAVE_OPENCL 1) - set(OPENCL_INCLUDE_DIRS ${OPENCL_INCLUDE_DIR}) - set(OPENCL_LIBRARIES ${OPENCL_LIBRARY}) - - if(WIN32 AND X86_64) - set(CLAMD_POSSIBLE_LIB_SUFFIXES lib64/import) - elseif(WIN32) - set(CLAMD_POSSIBLE_LIB_SUFFIXES lib32/import) + try_compile(HAVE_OPENCL11 + "${OpenCV_BINARY_DIR}" + "${OpenCV_SOURCE_DIR}/cmake/checks/opencl11.cpp" + CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${OPENCL_INCLUDE_DIR}" + ) + if(NOT HAVE_OPENCL11) + message(STATUS "OpenCL 1.1 not found, ignore OpenCL SDK") + return() + endif() + try_compile(HAVE_OPENCL12 + "${OpenCV_BINARY_DIR}" + "${OpenCV_SOURCE_DIR}/cmake/checks/opencl12.cpp" + CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${OPENCL_INCLUDE_DIR}" + ) + if(NOT HAVE_OPENCL12) + message(STATUS "OpenCL 1.2 not found, will use OpenCL 1.1") endif() - if(X86_64 AND UNIX) - set(CLAMD_POSSIBLE_LIB_SUFFIXES lib64) - elseif(X86 AND UNIX) - set(CLAMD_POSSIBLE_LIB_SUFFIXES lib32) + set(HAVE_OPENCL 1) + set(OPENCL_INCLUDE_DIRS ${OPENCL_INCLUDE_DIR}) + if(OPENCL_LIBRARY MATCHES "OPENCL_DYNAMIC_LOAD") + unset(OPENCL_LIBRARIES) + else() + set(OPENCL_LIBRARIES "${OPENCL_LIBRARY}") endif() if(WITH_OPENCLAMDFFT) @@ -70,16 +69,9 @@ if(OPENCL_FOUND) PATH_SUFFIXES include DOC "clAmdFft include directory") - find_library(CLAMDFFT_LIBRARY - NAMES clAmdFft.Runtime - HINTS ${CLAMDFFT_ROOT_DIR} - PATH_SUFFIXES ${CLAMD_POSSIBLE_LIB_SUFFIXES} - DOC "clAmdFft library") - - if(CLAMDFFT_LIBRARY AND CLAMDFFT_INCLUDE_DIR) + if(CLAMDFFT_INCLUDE_DIR) set(HAVE_CLAMDFFT 1) list(APPEND OPENCL_INCLUDE_DIRS "${CLAMDFFT_INCLUDE_DIR}") - list(APPEND OPENCL_LIBRARIES "${CLAMDFFT_LIBRARY}") endif() endif() @@ -97,16 +89,9 @@ if(OPENCL_FOUND) PATH_SUFFIXES include DOC "clAmdFft include directory") - find_library(CLAMDBLAS_LIBRARY - NAMES clAmdBlas - HINTS ${CLAMDBLAS_ROOT_DIR} - PATH_SUFFIXES ${CLAMD_POSSIBLE_LIB_SUFFIXES} - DOC "clAmdBlas library") - - if(CLAMDBLAS_LIBRARY AND CLAMDBLAS_INCLUDE_DIR) + if(CLAMDBLAS_INCLUDE_DIR) set(HAVE_CLAMDBLAS 1) list(APPEND OPENCL_INCLUDE_DIRS "${CLAMDBLAS_INCLUDE_DIR}") - list(APPEND OPENCL_LIBRARIES "${CLAMDBLAS_LIBRARY}") endif() endif() endif() diff --git a/cmake/checks/opencl11.cpp b/cmake/checks/opencl11.cpp new file mode 100644 index 0000000000..b17889d1d4 --- /dev/null +++ b/cmake/checks/opencl11.cpp @@ -0,0 +1,14 @@ +#if defined __APPLE__ +#include +#else +#include +#endif + +int main(int argc, char** argv) +{ +#ifdef CL_VERSION_1_1 +#else +#error OpenCL 1.1 not found +#endif + return 0; +} diff --git a/cmake/checks/opencl12.cpp b/cmake/checks/opencl12.cpp new file mode 100644 index 0000000000..639502e02a --- /dev/null +++ b/cmake/checks/opencl12.cpp @@ -0,0 +1,14 @@ +#if defined __APPLE__ +#include +#else +#include +#endif + +int main(int argc, char** argv) +{ +#ifdef CL_VERSION_1_2 +#else +#error OpenCL 1.2 not found +#endif + return 0; +} diff --git a/cmake/templates/cvconfig.h.cmake b/cmake/templates/cvconfig.h.cmake index 960becf381..14eb519d01 100644 --- a/cmake/templates/cvconfig.h.cmake +++ b/cmake/templates/cvconfig.h.cmake @@ -108,6 +108,8 @@ /* OpenCL Support */ #cmakedefine HAVE_OPENCL +#cmakedefine HAVE_OPENCL11 +#cmakedefine HAVE_OPENCL12 /* OpenEXR codec */ #cmakedefine HAVE_OPENEXR diff --git a/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime.hpp b/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime.hpp new file mode 100644 index 0000000000..7b31f457e7 --- /dev/null +++ b/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime.hpp @@ -0,0 +1,19 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef __OPENCV_OCL_CL_RUNTIME_HPP__ +#define __OPENCV_OCL_CL_RUNTIME_HPP__ + +#ifdef HAVE_OPENCL + +#if defined(HAVE_OPENCL12) +#include "cl_runtime_opencl12.hpp" +#elif defined(HAVE_OPENCL11) +#include "cl_runtime_opencl11.hpp" +#else +#error Invalid OpenCL configuration +#endif + +#endif + +#endif // __OPENCV_OCL_CL_RUNTIME_HPP__ diff --git a/modules/ocl/include/opencv2/ocl/private/util.hpp b/modules/ocl/include/opencv2/ocl/private/util.hpp index 9adae38230..3de0d438d4 100644 --- a/modules/ocl/include/opencv2/ocl/private/util.hpp +++ b/modules/ocl/include/opencv2/ocl/private/util.hpp @@ -48,11 +48,7 @@ #include "opencv2/ocl/ocl.hpp" -#if defined __APPLE__ -#include -#else -#include -#endif +#include "opencv2/ocl/cl_runtime/cl_runtime.hpp" namespace cv { diff --git a/modules/ocl/src/cl_runtime/cl_runtime.cpp b/modules/ocl/src/cl_runtime/cl_runtime.cpp new file mode 100644 index 0000000000..c237afe74c --- /dev/null +++ b/modules/ocl/src/cl_runtime/cl_runtime.cpp @@ -0,0 +1,105 @@ +#include "precomp.hpp" + +#if defined(HAVE_OPENCL) && (!defined(__APPLE__) || defined(IOS)) + +#if defined(BUILD_SHARED_LIBS) && (defined WIN32 || defined _WIN32 || defined WINCE) +#define CL_RUNTIME_EXPORT __declspec(dllexport) +#else +#define CL_RUNTIME_EXPORT +#endif + + +#include "opencv2/ocl/cl_runtime/cl_runtime.hpp" + +#if defined(__APPLE__) + #include + + static void* AppleCLGetProcAddress(const char* name) + { + static void * image = NULL; + if (!image) + { + image = dlopen("/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL", RTLD_LAZY | RTLD_GLOBAL); + if (!image) + return NULL; + } + + return dlsym(image, name); + } + #define CV_CL_GET_PROC_ADDRESS(name) AppleCLGetProcAddress(name) +#endif // __APPLE__ + +#if defined(_WIN32) + static void* WinGetProcAddress(const char* name) + { + static HMODULE opencl_module = NULL; + if (!opencl_module) + { + opencl_module = GetModuleHandleA("OpenCL.dll"); + if (!opencl_module) + { + const char* name = "OpenCL.dll"; + const char* envOpenCLBinary = getenv("OPENCV_OPENCL_BINARY"); + if (envOpenCLBinary) + name = envOpenCLBinary; + opencl_module = LoadLibraryA(name); + if (!opencl_module) + return NULL; + } + } + return (void*)GetProcAddress(opencl_module, name); + } + #define CV_CL_GET_PROC_ADDRESS(name) WinGetProcAddress(name) +#endif // _WIN32 + +#if defined(linux) + #include + #include + + static void* GetProcAddress (const char* name) + { + static void* h = NULL; + if (!h) + { + const char* name = "libOpenCL.so"; + const char* envOpenCLBinary = getenv("OPENCV_OPENCL_BINARY"); + if (envOpenCLBinary) + name = envOpenCLBinary; + h = dlopen(name, RTLD_LAZY | RTLD_GLOBAL); + if (!h) + return NULL; + } + + return dlsym(h, name); + } + #define CV_CL_GET_PROC_ADDRESS(name) GetProcAddress(name) +#endif + +#ifndef CV_CL_GET_PROC_ADDRESS +#define CV_CL_GET_PROC_ADDRESS(name) NULL +#endif + +static void* opencl_check_fn(int ID) +{ + extern const char* opencl_fn_names[]; + void* func = CV_CL_GET_PROC_ADDRESS(opencl_fn_names[ID]); + if (!func) + { + std::ostringstream msg; + msg << "OpenCL function is not available: [" << opencl_fn_names[ID] << "]"; + CV_Error(CV_StsBadFunc, msg.str()); + } + extern void* opencl_fn_ptrs[]; + *(void**)(opencl_fn_ptrs[ID]) = func; + return func; +} + +#if defined(HAVE_OPENCL12) +#include "cl_runtime_opencl12_impl.hpp" +#elif defined(HAVE_OPENCL11) +#include "cl_runtime_opencl11_impl.hpp" +#else +#error Invalid OpenCL configuration +#endif + +#endif diff --git a/modules/ocl/src/cl_runtime/generator/common.py b/modules/ocl/src/cl_runtime/generator/common.py new file mode 100644 index 0000000000..99a56096b0 --- /dev/null +++ b/modules/ocl/src/cl_runtime/generator/common.py @@ -0,0 +1,199 @@ +import sys, os, re + +# +# Parser helpers +# + +def remove_comments(s): + def replacer(match): + s = match.group(0) + if s.startswith('/'): + return "" + else: + return s + pattern = re.compile( + r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', + re.DOTALL | re.MULTILINE + ) + return re.sub(pattern, replacer, s) + + +def getTokens(s): + return re.findall(r'[a-z_A-Z0-9_]+|[^[a-z_A-Z0-9_ \n\r\t]', s) + + +def getParameter(pos, tokens): + deep = 0 + p = [] + while True: + if pos >= len(tokens): + break + if (tokens[pos] == ')' or tokens[pos] == ',') and deep == 0: + if tokens[pos] == ')': + pos = len(tokens) + else: + pos += 1 + break + if tokens[pos] == '(': + deep += 1 + if tokens[pos] == ')': + deep -= 1 + p.append(tokens[pos]) + pos += 1 + return (' '.join(p), pos) + + +def getParameters(i, tokens): + assert tokens[i] == '(' + i += 1 + + params = [] + while True: + if i >= len(tokens) or tokens[i] == ')': + break + + (param, i) = getParameter(i, tokens) + if len(param) > 0: + params.append(param) + else: + assert False + break + + if len(params) > 0 and params[0] == 'void': + del params[0] + + return params + +def postProcessParameters(fns): + for fn in fns: + fn['params_full'] = list(fn['params']) + for i in range(len(fn['params'])): + p = fn['params'][i] + if p.find('(') != -1: + p = re.sub(r'\* *([a-zA-Z0-9_]*) ?\)', '*)', p, 1) + fn['params'][i] = p + continue + parts = re.findall(r'[a-z_A-Z0-9]+|\*', p) + if len(parts) > 1: + if parts[-1].find('*') == -1: + del parts[-1] + fn['params'][i] = ' '.join(parts) + +# +# Generator helpers +# + +def outputToString(f): + def wrapped(*args, **kwargs): + from cStringIO import StringIO + old_stdout = sys.stdout + sys.stdout = str_stdout = StringIO() + res = f(*args, **kwargs) + assert res is None + sys.stdout = old_stdout + result = str_stdout.getvalue() + result = re.sub(r'([^\n ]) [ ]+', r'\1 ', result) # don't remove spaces at start of line + result = re.sub(r' ,', ',', result) + result = re.sub(r' \*', '*', result) + result = re.sub(r'\( ', '(', result) + result = re.sub(r' \)', ')', result) + return result + return wrapped + +@outputToString +def generateEnums(fns, prefix='OPENCL_FN'): + print '// generated by %s' % os.path.basename(sys.argv[0]) + print 'enum %s_ID {' % prefix + first = True + for fn in fns: + print ' %s_%s%s,' % (prefix, fn['name'], ' = 0' if first else '') + first = False + print '};' + +@outputToString +def generateNames(fns, prefix='opencl_fn'): + print '// generated by %s' % os.path.basename(sys.argv[0]) + print 'const char* %s_names[] = {' % prefix + for fn in fns: + print ' "%s",' % (fn['name']) + print '};' + +@outputToString +def generatePtrs(fns, prefix='opencl_fn'): + print '// generated by %s' % os.path.basename(sys.argv[0]) + print 'void* %s_ptrs[] = {' % prefix + for fn in fns: + print ' &%s,' % (fn['name']) + print '};' + +@outputToString +def generateRemapOrigin(fns): + print '// generated by %s' % os.path.basename(sys.argv[0]) + for fn in fns: + print '#define %s %s_' % (fn['name'], fn['name']) + +@outputToString +def generateRemapDynamic(fns): + print '// generated by %s' % os.path.basename(sys.argv[0]) + for fn in fns: + print '#undef %s' % (fn['name']) + print '#define %s %s_pfn' % (fn['name'], fn['name']) + +@outputToString +def generateParamsCfg(fns): + for fn in fns: + print '%s %d' % (fn['name'], len(fn['params'])) + +@outputToString +def generateFnDeclaration(fns): + print '// generated by %s' % os.path.basename(sys.argv[0]) + for fn in fns: + print 'extern CL_RUNTIME_EXPORT %s %s (%s *%s)(%s);' % (' '.join(fn['modifiers']), ' '.join(fn['ret']), ' '.join(fn['calling']), + fn['name'], ', '.join(fn['params'] if not fn.has_key('params_full') else fn['params_full'])) + +@outputToString +def generateFnDefinition(fns, lprefix='opencl_fn', uprefix='OPENCL_FN'): + print '// generated by %s' % os.path.basename(sys.argv[0]) + for fn in fns: + print '%s%s (%s *%s)(%s) = %s%d<%s_%s, %s%s>::switch_fn;' % \ + ((' '.join(fn['modifiers'] + ' ') if len(fn['modifiers']) > 0 else ''), + ' '.join(fn['ret']), ' '.join(fn['calling']), fn['name'], ', '.join(fn['params']), \ + lprefix, len(fn['params']), uprefix, fn['name'], ' '.join(fn['ret']), ('' if len(fn['params']) == 0 else ', ' + ', '.join(fn['params']))) + +@outputToString +def generateTemplates(sz, lprefix, switch_name, calling_convention=''): + print '// generated by %s' % os.path.basename(sys.argv[0]) + for sz in range(sz): + template_params = ['int ID', 'typename _R'] + types = [] + types_with_params = [] + params = [] + for i in range(1, sz + 1): + template_params.append('typename _T%d' % i) + types.append('_T%d' % i) + types_with_params.append('_T%d p%d' % (i, i)) + params.append('p%d' % i) + print 'template <%s>' % ', '.join(template_params) + print 'struct %s%d' % (lprefix, sz) + print '{' + print ' typedef _R (%s *FN)(%s);' % (calling_convention, ', '.join(types)) + print ' static _R %s switch_fn(%s)' % (calling_convention, ', '.join(types_with_params)) + print ' { return ((FN)%s(ID))(%s); }' % (switch_name, ', '.join(params)) + print '};' + print '' + + +def ProcessTemplate(inputFile, ctx, noteLine='//\n// AUTOGENERATED, DO NOT EDIT\n//'): + f = open(inputFile, "r") + if noteLine: + print noteLine + for line in f: + if line.startswith('@'): + assert line[-1] == '\n' + line = line[:-1] # remove '\n' + assert line[-1] == '@' + name = line[1:-1] + assert ctx.has_key(name), name + line = ctx[name] + print line, + f.close() diff --git a/modules/ocl/src/cl_runtime/generator/generate.sh b/modules/ocl/src/cl_runtime/generator/generate.sh new file mode 100644 index 0000000000..5be039dfef --- /dev/null +++ b/modules/ocl/src/cl_runtime/generator/generate.sh @@ -0,0 +1,7 @@ +#!/bin/bash -e +echo "Generate files for CL runtime..." +cat sources/opencl11/cl.h | python parser_cl.py cl_runtime_opencl11 +cat sources/opencl12/cl.h | python parser_cl.py cl_runtime_opencl12 +cat sources/clAmdBlas.h | python parser_clamdblas.py +cat sources/clAmdFft.h | python parser_clamdfft.py +echo "Generate files for CL runtime... Done" diff --git a/modules/ocl/src/cl_runtime/generator/parser_cl.py b/modules/ocl/src/cl_runtime/generator/parser_cl.py new file mode 100644 index 0000000000..e711e4cf09 --- /dev/null +++ b/modules/ocl/src/cl_runtime/generator/parser_cl.py @@ -0,0 +1,112 @@ +#!/bin/python +# usage: +# cat opencl11/cl.h | $0 cl_runtime_opencl11 +# cat opencl12/cl.h | $0 cl_runtime_opencl12 +import sys, re; + +from common import remove_comments, getTokens, getParameters, postProcessParameters + +try: + if len(sys.argv) > 1: + outfile = open('../../../include/opencv2/ocl/cl_runtime/' + sys.argv[1] + '.hpp', "w") + outfile_impl = open('../' + sys.argv[1] + '_impl.hpp', "w") + if len(sys.argv) > 2: + f = open(sys.argv[2], "r") + else: + f = sys.stdin + else: + sys.exit("ERROR. Specify output file") +except: + sys.exit("ERROR. Can't open input/output file, check parameters") + +fns = [] + +while True: + line = f.readline() + if len(line) == 0: + break + assert isinstance(line, str) + parts = line.split(); + if line.startswith('extern') and line.find('CL_API_CALL') != -1: + # read block of lines + while True: + nl = f.readline() + nl = nl.strip() + nl = re.sub(r'\n', r'', nl) + if len(nl) == 0: + break; + line += ' ' + nl + + line = remove_comments(line) + + parts = getTokens(line) + + fn = {} + modifiers = [] + ret = [] + calling = [] + i = 1 + while (i < len(parts)): + if parts[i].startswith('CL_'): + modifiers.append(parts[i]) + else: + break + i += 1 + while (i < len(parts)): + if not parts[i].startswith('CL_'): + ret.append(parts[i]) + else: + break + i += 1 + while (i < len(parts)): + calling.append(parts[i]) + i += 1 + if parts[i - 1] == 'CL_API_CALL': + break + + fn['modifiers'] = [] # modifiers + fn['ret'] = ret + fn['calling'] = [] # calling + + # print 'modifiers='+' '.join(modifiers) + # print 'ret='+' '.join(type) + # print 'calling='+' '.join(calling) + + name = parts[i]; i += 1; + fn['name'] = name + print 'name=' + name + + params = getParameters(i, parts) + + fn['params'] = params + # print 'params="'+','.join(params)+'"' + + fns.append(fn) + +f.close() + +print 'Found %d functions' % len(fns) + +postProcessParameters(fns) + +from pprint import pprint +pprint(fns) + +from common import * + +ctx = {} +ctx['CL_REMAP_ORIGIN'] = generateRemapOrigin(fns) +ctx['CL_REMAP_DYNAMIC'] = generateRemapDynamic(fns) +ctx['CL_FN_DECLARATIONS'] = generateFnDeclaration(fns) + +sys.stdout = outfile +ProcessTemplate('template/cl_runtime_opencl.hpp.in', ctx) + +ctx['CL_FN_ENUMS'] = generateEnums(fns) +ctx['CL_FN_NAMES'] = generateNames(fns) +ctx['CL_FN_DEFINITIONS'] = generateFnDefinition(fns) +ctx['CL_FN_PTRS'] = generatePtrs(fns) +ctx['CL_FN_SWITCH'] = generateTemplates(15, 'opencl_fn', 'opencl_check_fn') + +sys.stdout = outfile_impl +ProcessTemplate('template/cl_runtime_impl_opencl.hpp.in', ctx) diff --git a/modules/ocl/src/cl_runtime/generator/parser_clamdblas.py b/modules/ocl/src/cl_runtime/generator/parser_clamdblas.py new file mode 100644 index 0000000000..52e62c5de8 --- /dev/null +++ b/modules/ocl/src/cl_runtime/generator/parser_clamdblas.py @@ -0,0 +1,107 @@ +#!/bin/python +# usage: +# cat clAmdBlas.h | $0 +import sys, re; + +from common import remove_comments, getTokens, getParameters, postProcessParameters + +try: + if len(sys.argv) > 1: + f = open(sys.argv[1], "r") + else: + f = sys.stdin +except: + sys.exit("ERROR. Can't open input file") + +fns = [] + +while True: + line = f.readline() + if len(line) == 0: + break + assert isinstance(line, str) + line = line.strip() + parts = line.split(); + if (line.startswith('clAmd') or line.startswith('cl_') or line == 'void') and len(line.split()) == 1 and line.find('(') == -1: + fn = {} + modifiers = [] + ret = [] + calling = [] + i = 0 + while (i < len(parts)): + if parts[i].startswith('CL_'): + modifiers.append(parts[i]) + else: + break + i += 1 + while (i < len(parts)): + if not parts[i].startswith('CL_'): + ret.append(parts[i]) + else: + break + i += 1 + while (i < len(parts)): + calling.append(parts[i]) + i += 1 + fn['modifiers'] = [] # modifiers + fn['ret'] = ret + fn['calling'] = calling + + # print 'modifiers='+' '.join(modifiers) + # print 'ret='+' '.join(type) + # print 'calling='+' '.join(calling) + + # read block of lines + line = f.readline() + while True: + nl = f.readline() + nl = nl.strip() + nl = re.sub(r'\n', r'', nl) + if len(nl) == 0: + break; + line += ' ' + nl + + line = remove_comments(line) + + parts = getTokens(line) + + i = 0; + + name = parts[i]; i += 1; + fn['name'] = name + print 'name=' + name + + params = getParameters(i, parts) + + fn['params'] = params + # print 'params="'+','.join(params)+'"' + + fns.append(fn) + +f.close() + +print 'Found %d functions' % len(fns) + +postProcessParameters(fns) + +from pprint import pprint +pprint(fns) + +from common import * + +ctx = {} +ctx['CLAMDBLAS_REMAP_ORIGIN'] = generateRemapOrigin(fns) +ctx['CLAMDBLAS_REMAP_DYNAMIC'] = generateRemapDynamic(fns) +ctx['CLAMDBLAS_FN_DECLARATIONS'] = generateFnDeclaration(fns) + +sys.stdout = open('../../../include/opencv2/ocl/cl_runtime/clamdblas_runtime.hpp', 'w') +ProcessTemplate('template/clamdblas_runtime.hpp.in', ctx) + +ctx['CL_FN_ENUMS'] = generateEnums(fns, 'OPENCLAMDBLAS_FN') +ctx['CL_FN_NAMES'] = generateNames(fns, 'openclamdblas_fn') +ctx['CL_FN_DEFINITIONS'] = generateFnDefinition(fns, 'openclamdblas_fn', 'OPENCLAMDBLAS_FN') +ctx['CL_FN_PTRS'] = generatePtrs(fns, 'openclamdblas_fn') +ctx['CL_FN_SWITCH'] = generateTemplates(23, 'openclamdblas_fn', 'openclamdblas_check_fn', '') + +sys.stdout = open('../clamdblas_runtime.cpp', 'w') +ProcessTemplate('template/clamdblas_runtime.cpp.in', ctx) diff --git a/modules/ocl/src/cl_runtime/generator/parser_clamdfft.py b/modules/ocl/src/cl_runtime/generator/parser_clamdfft.py new file mode 100644 index 0000000000..35b78cad43 --- /dev/null +++ b/modules/ocl/src/cl_runtime/generator/parser_clamdfft.py @@ -0,0 +1,104 @@ +#!/bin/python +# usage: +# cat clAmdFft.h | $0 +import sys, re; + +from common import remove_comments, getTokens, getParameters, postProcessParameters + + +try: + if len(sys.argv) > 1: + f = open(sys.argv[1], "r") + else: + f = sys.stdin +except: + sys.exit("ERROR. Can't open input file") + +fns = [] + +while True: + line = f.readline() + if len(line) == 0: + break + assert isinstance(line, str) + line = line.strip() + if line.startswith('CLAMDFFTAPI'): + line = re.sub(r'\n', r'', line) + while True: + nl = f.readline() + nl = nl.strip() + nl = re.sub(r'\n', r'', nl) + if len(nl) == 0: + break; + line += ' ' + nl + + line = remove_comments(line) + + parts = getTokens(line) + + fn = {} + modifiers = [] + ret = [] + calling = [] + + i = 0 + while True: + if parts[i] == "CLAMDFFTAPI": + modifiers.append(parts[i]) + else: + break + i += 1 + while (i < len(parts)): + if not parts[i] == '(': + ret.append(parts[i]) + else: + del ret[-1] + i -= 1 + break + i += 1 + + fn['modifiers'] = [] # modifiers + fn['ret'] = ret + fn['calling'] = calling + + name = parts[i]; i += 1; + fn['name'] = name + print 'name=' + name + + params = getParameters(i, parts) + + if len(params) > 0 and params[0] == 'void': + del params[0] + + fn['params'] = params + # print 'params="'+','.join(params)+'"' + + fns.append(fn) + +f.close() + +print 'Found %d functions' % len(fns) + +postProcessParameters(fns) + +from pprint import pprint +pprint(fns) + +from common import * + +ctx = {} +ctx['CLAMDFFT_REMAP_ORIGIN'] = generateRemapOrigin(fns) +ctx['CLAMDFFT_REMAP_DYNAMIC'] = generateRemapDynamic(fns) +ctx['CLAMDFFT_FN_DECLARATIONS'] = generateFnDeclaration(fns) + +sys.stdout = open('../../../include/opencv2/ocl/cl_runtime/clamdfft_runtime.hpp', 'w') +ProcessTemplate('template/clamdfft_runtime.hpp.in', ctx) + +ctx['CL_FN_ENUMS'] = generateEnums(fns, 'OPENCLAMDFFT_FN') +ctx['CL_FN_NAMES'] = generateNames(fns, 'openclamdfft_fn') +ctx['CL_FN_DEFINITIONS'] = generateFnDefinition(fns, 'openclamdfft_fn', 'OPENCLAMDFFT_FN') +ctx['CL_FN_PTRS'] = generatePtrs(fns, 'openclamdfft_fn') +ctx['CL_FN_SWITCH'] = generateTemplates(23, 'openclamdfft_fn', 'openclamdfft_check_fn', '') + +sys.stdout = open('../clamdfft_runtime.cpp', 'w') +ProcessTemplate('template/clamdfft_runtime.cpp.in', ctx) diff --git a/modules/ocl/src/cl_runtime/generator/template/cl_runtime_impl_opencl.hpp.in b/modules/ocl/src/cl_runtime/generator/template/cl_runtime_impl_opencl.hpp.in new file mode 100644 index 0000000000..ff0395dcd3 --- /dev/null +++ b/modules/ocl/src/cl_runtime/generator/template/cl_runtime_impl_opencl.hpp.in @@ -0,0 +1,10 @@ +@CL_FN_ENUMS@ +@CL_FN_NAMES@ + +namespace { +@CL_FN_SWITCH@ +} + +@CL_FN_DEFINITIONS@ + +@CL_FN_PTRS@ diff --git a/modules/ocl/src/cl_runtime/generator/template/cl_runtime_opencl.hpp.in b/modules/ocl/src/cl_runtime/generator/template/cl_runtime_opencl.hpp.in new file mode 100644 index 0000000000..0079cb696d --- /dev/null +++ b/modules/ocl/src/cl_runtime/generator/template/cl_runtime_opencl.hpp.in @@ -0,0 +1,34 @@ +#ifndef __OPENCV_OCL_CL_RUNTIME_OPENCL_HPP__ +#define __OPENCV_OCL_CL_RUNTIME_OPENCL_HPP__ + +#ifdef HAVE_OPENCL + +#if defined __APPLE__ && !defined(IOS) +#include +#else + +@CL_REMAP_ORIGIN@ + +#if defined __APPLE__ +#include +#else +#include +#endif + +@CL_REMAP_DYNAMIC@ + +#ifndef CL_RUNTIME_EXPORT +#if (defined(BUILD_SHARED_LIBS) || defined(OPENCV_OCL_SHARED)) && (defined WIN32 || defined _WIN32 || defined WINCE) +#define CL_RUNTIME_EXPORT __declspec(dllimport) +#else +#define CL_RUNTIME_EXPORT +#endif +#endif + +@CL_FN_DECLARATIONS@ + +#endif + +#endif + +#endif // __OPENCV_OCL_CL_RUNTIME_OPENCL_HPP__ diff --git a/modules/ocl/src/cl_runtime/generator/template/clamdblas_runtime.cpp.in b/modules/ocl/src/cl_runtime/generator/template/clamdblas_runtime.cpp.in new file mode 100644 index 0000000000..8492edda9e --- /dev/null +++ b/modules/ocl/src/cl_runtime/generator/template/clamdblas_runtime.cpp.in @@ -0,0 +1,75 @@ +#include "precomp.hpp" + +#ifdef HAVE_CLAMDBLAS + +#include "opencv2/ocl/cl_runtime/cl_runtime.hpp" +#include "opencv2/ocl/cl_runtime/clamdblas_runtime.hpp" + +#if defined(_WIN32) + static void* WinGetProcAddress(const char* name) + { + static HMODULE opencl_module = NULL; + if (!opencl_module) + { + opencl_module = GetModuleHandleA("clAmdBlas.dll"); + if (!opencl_module) + { + opencl_module = LoadLibraryA("clAmdBlas.dll"); + if (!opencl_module) + return NULL; + } + } + return (void*)GetProcAddress(opencl_module, name); + } + #define CV_CL_GET_PROC_ADDRESS(name) WinGetProcAddress(name) +#endif // _WIN32 + +#if defined(linux) + #include + #include + + static void* GetProcAddress (const char* name) + { + static void* h = NULL; + if (!h) + { + h = dlopen("libclAmdBlas.so", RTLD_LAZY | RTLD_GLOBAL); + if (!h) + return NULL; + } + + return dlsym(h, name); + } + #define CV_CL_GET_PROC_ADDRESS(name) GetProcAddress(name) +#endif + +#ifndef CV_CL_GET_PROC_ADDRESS +#define CV_CL_GET_PROC_ADDRESS(name) NULL +#endif + +@CL_FN_ENUMS@ +@CL_FN_NAMES@ + +static void* openclamdblas_check_fn(int ID) +{ + void* func = CV_CL_GET_PROC_ADDRESS(openclamdblas_fn_names[ID]); + if (!func) + { + std::ostringstream msg; + msg << "OpenCL AMD BLAS function is not available: [" << openclamdblas_fn_names[ID] << "]"; + CV_Error(CV_StsBadFunc, msg.str()); + } + extern void* openclamdblas_fn_ptrs[]; + *(void**)(openclamdblas_fn_ptrs[ID]) = func; + return func; +} + +namespace { +@CL_FN_SWITCH@ +} + +@CL_FN_DEFINITIONS@ + +@CL_FN_PTRS@ + +#endif diff --git a/modules/ocl/src/cl_runtime/generator/template/clamdblas_runtime.hpp.in b/modules/ocl/src/cl_runtime/generator/template/clamdblas_runtime.hpp.in new file mode 100644 index 0000000000..cbffb0861a --- /dev/null +++ b/modules/ocl/src/cl_runtime/generator/template/clamdblas_runtime.hpp.in @@ -0,0 +1,25 @@ +#ifndef __OPENCV_OCL_CLAMDBLAS_RUNTIME_HPP__ +#define __OPENCV_OCL_CLAMDBLAS_RUNTIME_HPP__ + +#ifdef HAVE_CLAMDBLAS + +@CLAMDBLAS_REMAP_ORIGIN@ + +#include + +@CLAMDBLAS_REMAP_DYNAMIC@ + +#ifndef CL_RUNTIME_EXPORT +#if (defined(BUILD_SHARED_LIBS) || defined(OPENCV_OCL_SHARED)) && (defined WIN32 || defined _WIN32 || defined WINCE) +#define CL_RUNTIME_EXPORT __declspec(dllimport) +#else +#define CL_RUNTIME_EXPORT +#endif +#endif + + +@CLAMDBLAS_FN_DECLARATIONS@ + +#endif + +#endif // __OPENCV_OCL_CLAMDBLAS_RUNTIME_HPP__ diff --git a/modules/ocl/src/cl_runtime/generator/template/clamdfft_runtime.cpp.in b/modules/ocl/src/cl_runtime/generator/template/clamdfft_runtime.cpp.in new file mode 100644 index 0000000000..aee6bd8ab6 --- /dev/null +++ b/modules/ocl/src/cl_runtime/generator/template/clamdfft_runtime.cpp.in @@ -0,0 +1,75 @@ +#include "precomp.hpp" + +#ifdef HAVE_CLAMDFFT + +#include "opencv2/ocl/cl_runtime/cl_runtime.hpp" +#include "opencv2/ocl/cl_runtime/clamdfft_runtime.hpp" + +#if defined(_WIN32) + static void* WinGetProcAddress(const char* name) + { + static HMODULE opencl_module = NULL; + if (!opencl_module) + { + opencl_module = GetModuleHandleA("clAmdFft.Runtime.dll"); + if (!opencl_module) + { + opencl_module = LoadLibraryA("clAmdFft.Runtime.dll"); + if (!opencl_module) + return NULL; + } + } + return (void*)GetProcAddress(opencl_module, name); + } + #define CV_CL_GET_PROC_ADDRESS(name) WinGetProcAddress(name) +#endif // _WIN32 + +#if defined(linux) + #include + #include + + static void* GetProcAddress (const char* name) + { + static void* h = NULL; + if (!h) + { + h = dlopen("libclAmdFft.Runtime.so", RTLD_LAZY | RTLD_GLOBAL); + if (!h) + return NULL; + } + + return dlsym(h, name); + } + #define CV_CL_GET_PROC_ADDRESS(name) GetProcAddress(name) +#endif + +#ifndef CV_CL_GET_PROC_ADDRESS +#define CV_CL_GET_PROC_ADDRESS(name) NULL +#endif + +@CL_FN_ENUMS@ +@CL_FN_NAMES@ + +static void* openclamdfft_check_fn(int ID) +{ + void* func = CV_CL_GET_PROC_ADDRESS(openclamdfft_fn_names[ID]); + if (!func) + { + std::ostringstream msg; + msg << "OpenCL AMD FFT function is not available: [" << openclamdfft_fn_names[ID] << "]"; + CV_Error(CV_StsBadFunc, msg.str()); + } + extern void* openclamdfft_fn_ptrs[]; + *(void**)(openclamdfft_fn_ptrs[ID]) = func; + return func; +} + +namespace { +@CL_FN_SWITCH@ +} + +@CL_FN_DEFINITIONS@ + +@CL_FN_PTRS@ + +#endif diff --git a/modules/ocl/src/cl_runtime/generator/template/clamdfft_runtime.hpp.in b/modules/ocl/src/cl_runtime/generator/template/clamdfft_runtime.hpp.in new file mode 100644 index 0000000000..5e26d0154e --- /dev/null +++ b/modules/ocl/src/cl_runtime/generator/template/clamdfft_runtime.hpp.in @@ -0,0 +1,25 @@ +#ifndef __OPENCV_OCL_CLAMDFFT_RUNTIME_HPP__ +#define __OPENCV_OCL_CLAMDFFT_RUNTIME_HPP__ + +#ifdef HAVE_CLAMDFFT + +@CLAMDFFT_REMAP_ORIGIN@ + +#include + +@CLAMDFFT_REMAP_DYNAMIC@ + +#ifndef CL_RUNTIME_EXPORT +#if (defined(BUILD_SHARED_LIBS) || defined(OPENCV_OCL_SHARED)) && (defined WIN32 || defined _WIN32 || defined WINCE) +#define CL_RUNTIME_EXPORT __declspec(dllimport) +#else +#define CL_RUNTIME_EXPORT +#endif +#endif + + +@CLAMDFFT_FN_DECLARATIONS@ + +#endif + +#endif // __OPENCV_OCL_CLAMDFFT_RUNTIME_HPP__ diff --git a/modules/ocl/src/fft.cpp b/modules/ocl/src/fft.cpp index 7aa40e8b7b..b6cc070fb5 100644 --- a/modules/ocl/src/fft.cpp +++ b/modules/ocl/src/fft.cpp @@ -59,7 +59,7 @@ namespace cv { namespace ocl { }} void cv::ocl::fft_teardown(){} #else -#include "clAmdFft.h" +#include "opencv2/ocl/cl_runtime/clamdfft_runtime.hpp" namespace cv { namespace ocl diff --git a/modules/ocl/src/gemm.cpp b/modules/ocl/src/gemm.cpp index 7e31cdbf4f..ec03c2f932 100644 --- a/modules/ocl/src/gemm.cpp +++ b/modules/ocl/src/gemm.cpp @@ -73,7 +73,7 @@ void cv::ocl::clBlasTeardown() } #else -#include "clAmdBlas.h" +#include "opencv2/ocl/cl_runtime/clamdblas_runtime.hpp" using namespace cv; static bool clBlasInitialized = false; diff --git a/modules/ocl/src/precomp.hpp b/modules/ocl/src/precomp.hpp index a50ab900ab..039e7ff061 100644 --- a/modules/ocl/src/precomp.hpp +++ b/modules/ocl/src/precomp.hpp @@ -52,8 +52,18 @@ #pragma warning( disable: 4267 4324 4244 4251 4710 4711 4514 4996 ) #endif +#if defined(_WIN32) +#include +#endif + #include "cvconfig.h" +#if defined(BUILD_SHARED_LIBS) && (defined WIN32 || defined _WIN32 || defined WINCE) +#define CL_RUNTIME_EXPORT __declspec(dllexport) +#else +#define CL_RUNTIME_EXPORT +#endif + #include #include #include diff --git a/modules/ocl/src/safe_call.hpp b/modules/ocl/src/safe_call.hpp index ba36cabd32..574400eefd 100644 --- a/modules/ocl/src/safe_call.hpp +++ b/modules/ocl/src/safe_call.hpp @@ -46,11 +46,7 @@ #ifndef __OPENCV_OPENCL_SAFE_CALL_HPP__ #define __OPENCV_OPENCL_SAFE_CALL_HPP__ -#if defined __APPLE__ -#include -#else -#include -#endif +#include "opencv2/ocl/cl_runtime/cl_runtime.hpp" #if defined(__GNUC__) #define openCLSafeCall(expr) ___openCLSafeCall(expr, __FILE__, __LINE__, __func__) From 5ab49c4cc222f5c76c83407e8ac0e0704a9e55c2 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sun, 15 Sep 2013 03:06:47 +0400 Subject: [PATCH 18/39] ocl: cl_runtime: added autogenerated code --- .../ocl/cl_runtime/cl_runtime_opencl11.hpp | 333 ++++++ .../ocl/cl_runtime/cl_runtime_opencl12.hpp | 389 +++++++ .../ocl/cl_runtime/clamdblas_runtime.hpp | 728 +++++++++++++ .../ocl/cl_runtime/clamdfft_runtime.hpp | 156 +++ .../cl_runtime/cl_runtime_opencl11_impl.hpp | 435 ++++++++ .../cl_runtime/cl_runtime_opencl12_impl.hpp | 491 +++++++++ .../ocl/src/cl_runtime/clamdblas_runtime.cpp | 968 ++++++++++++++++++ .../ocl/src/cl_runtime/clamdfft_runtime.cpp | 396 +++++++ 8 files changed, 3896 insertions(+) create mode 100644 modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl11.hpp create mode 100644 modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl12.hpp create mode 100644 modules/ocl/include/opencv2/ocl/cl_runtime/clamdblas_runtime.hpp create mode 100644 modules/ocl/include/opencv2/ocl/cl_runtime/clamdfft_runtime.hpp create mode 100644 modules/ocl/src/cl_runtime/cl_runtime_opencl11_impl.hpp create mode 100644 modules/ocl/src/cl_runtime/cl_runtime_opencl12_impl.hpp create mode 100644 modules/ocl/src/cl_runtime/clamdblas_runtime.cpp create mode 100644 modules/ocl/src/cl_runtime/clamdfft_runtime.cpp diff --git a/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl11.hpp b/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl11.hpp new file mode 100644 index 0000000000..4155dce963 --- /dev/null +++ b/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl11.hpp @@ -0,0 +1,333 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef __OPENCV_OCL_CL_RUNTIME_OPENCL_HPP__ +#define __OPENCV_OCL_CL_RUNTIME_OPENCL_HPP__ + +#ifdef HAVE_OPENCL + +#if defined __APPLE__ && !defined(IOS) +#include +#else + +// generated by parser_cl.py +#define clGetPlatformIDs clGetPlatformIDs_ +#define clGetPlatformInfo clGetPlatformInfo_ +#define clGetDeviceIDs clGetDeviceIDs_ +#define clGetDeviceInfo clGetDeviceInfo_ +#define clCreateContext clCreateContext_ +#define clCreateContextFromType clCreateContextFromType_ +#define clRetainContext clRetainContext_ +#define clReleaseContext clReleaseContext_ +#define clGetContextInfo clGetContextInfo_ +#define clCreateCommandQueue clCreateCommandQueue_ +#define clRetainCommandQueue clRetainCommandQueue_ +#define clReleaseCommandQueue clReleaseCommandQueue_ +#define clGetCommandQueueInfo clGetCommandQueueInfo_ +#define clSetCommandQueueProperty clSetCommandQueueProperty_ +#define clCreateBuffer clCreateBuffer_ +#define clCreateSubBuffer clCreateSubBuffer_ +#define clCreateImage2D clCreateImage2D_ +#define clCreateImage3D clCreateImage3D_ +#define clRetainMemObject clRetainMemObject_ +#define clReleaseMemObject clReleaseMemObject_ +#define clGetSupportedImageFormats clGetSupportedImageFormats_ +#define clGetMemObjectInfo clGetMemObjectInfo_ +#define clGetImageInfo clGetImageInfo_ +#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_ +#define clCreateSampler clCreateSampler_ +#define clRetainSampler clRetainSampler_ +#define clReleaseSampler clReleaseSampler_ +#define clGetSamplerInfo clGetSamplerInfo_ +#define clCreateProgramWithSource clCreateProgramWithSource_ +#define clCreateProgramWithBinary clCreateProgramWithBinary_ +#define clRetainProgram clRetainProgram_ +#define clReleaseProgram clReleaseProgram_ +#define clBuildProgram clBuildProgram_ +#define clUnloadCompiler clUnloadCompiler_ +#define clGetProgramInfo clGetProgramInfo_ +#define clGetProgramBuildInfo clGetProgramBuildInfo_ +#define clCreateKernel clCreateKernel_ +#define clCreateKernelsInProgram clCreateKernelsInProgram_ +#define clRetainKernel clRetainKernel_ +#define clReleaseKernel clReleaseKernel_ +#define clSetKernelArg clSetKernelArg_ +#define clGetKernelInfo clGetKernelInfo_ +#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_ +#define clWaitForEvents clWaitForEvents_ +#define clGetEventInfo clGetEventInfo_ +#define clCreateUserEvent clCreateUserEvent_ +#define clRetainEvent clRetainEvent_ +#define clReleaseEvent clReleaseEvent_ +#define clSetUserEventStatus clSetUserEventStatus_ +#define clSetEventCallback clSetEventCallback_ +#define clGetEventProfilingInfo clGetEventProfilingInfo_ +#define clFlush clFlush_ +#define clFinish clFinish_ +#define clEnqueueReadBuffer clEnqueueReadBuffer_ +#define clEnqueueReadBufferRect clEnqueueReadBufferRect_ +#define clEnqueueWriteBuffer clEnqueueWriteBuffer_ +#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_ +#define clEnqueueCopyBuffer clEnqueueCopyBuffer_ +#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_ +#define clEnqueueReadImage clEnqueueReadImage_ +#define clEnqueueWriteImage clEnqueueWriteImage_ +#define clEnqueueCopyImage clEnqueueCopyImage_ +#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_ +#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_ +#define clEnqueueMapBuffer clEnqueueMapBuffer_ +#define clEnqueueMapImage clEnqueueMapImage_ +#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_ +#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_ +#define clEnqueueTask clEnqueueTask_ +#define clEnqueueNativeKernel clEnqueueNativeKernel_ +#define clEnqueueMarker clEnqueueMarker_ +#define clEnqueueWaitForEvents clEnqueueWaitForEvents_ +#define clEnqueueBarrier clEnqueueBarrier_ +#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_ + +#if defined __APPLE__ +#include +#else +#include +#endif + +// generated by parser_cl.py +#undef clGetPlatformIDs +#define clGetPlatformIDs clGetPlatformIDs_pfn +#undef clGetPlatformInfo +#define clGetPlatformInfo clGetPlatformInfo_pfn +#undef clGetDeviceIDs +#define clGetDeviceIDs clGetDeviceIDs_pfn +#undef clGetDeviceInfo +#define clGetDeviceInfo clGetDeviceInfo_pfn +#undef clCreateContext +#define clCreateContext clCreateContext_pfn +#undef clCreateContextFromType +#define clCreateContextFromType clCreateContextFromType_pfn +#undef clRetainContext +#define clRetainContext clRetainContext_pfn +#undef clReleaseContext +#define clReleaseContext clReleaseContext_pfn +#undef clGetContextInfo +#define clGetContextInfo clGetContextInfo_pfn +#undef clCreateCommandQueue +#define clCreateCommandQueue clCreateCommandQueue_pfn +#undef clRetainCommandQueue +#define clRetainCommandQueue clRetainCommandQueue_pfn +#undef clReleaseCommandQueue +#define clReleaseCommandQueue clReleaseCommandQueue_pfn +#undef clGetCommandQueueInfo +#define clGetCommandQueueInfo clGetCommandQueueInfo_pfn +#undef clSetCommandQueueProperty +#define clSetCommandQueueProperty clSetCommandQueueProperty_pfn +#undef clCreateBuffer +#define clCreateBuffer clCreateBuffer_pfn +#undef clCreateSubBuffer +#define clCreateSubBuffer clCreateSubBuffer_pfn +#undef clCreateImage2D +#define clCreateImage2D clCreateImage2D_pfn +#undef clCreateImage3D +#define clCreateImage3D clCreateImage3D_pfn +#undef clRetainMemObject +#define clRetainMemObject clRetainMemObject_pfn +#undef clReleaseMemObject +#define clReleaseMemObject clReleaseMemObject_pfn +#undef clGetSupportedImageFormats +#define clGetSupportedImageFormats clGetSupportedImageFormats_pfn +#undef clGetMemObjectInfo +#define clGetMemObjectInfo clGetMemObjectInfo_pfn +#undef clGetImageInfo +#define clGetImageInfo clGetImageInfo_pfn +#undef clSetMemObjectDestructorCallback +#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_pfn +#undef clCreateSampler +#define clCreateSampler clCreateSampler_pfn +#undef clRetainSampler +#define clRetainSampler clRetainSampler_pfn +#undef clReleaseSampler +#define clReleaseSampler clReleaseSampler_pfn +#undef clGetSamplerInfo +#define clGetSamplerInfo clGetSamplerInfo_pfn +#undef clCreateProgramWithSource +#define clCreateProgramWithSource clCreateProgramWithSource_pfn +#undef clCreateProgramWithBinary +#define clCreateProgramWithBinary clCreateProgramWithBinary_pfn +#undef clRetainProgram +#define clRetainProgram clRetainProgram_pfn +#undef clReleaseProgram +#define clReleaseProgram clReleaseProgram_pfn +#undef clBuildProgram +#define clBuildProgram clBuildProgram_pfn +#undef clUnloadCompiler +#define clUnloadCompiler clUnloadCompiler_pfn +#undef clGetProgramInfo +#define clGetProgramInfo clGetProgramInfo_pfn +#undef clGetProgramBuildInfo +#define clGetProgramBuildInfo clGetProgramBuildInfo_pfn +#undef clCreateKernel +#define clCreateKernel clCreateKernel_pfn +#undef clCreateKernelsInProgram +#define clCreateKernelsInProgram clCreateKernelsInProgram_pfn +#undef clRetainKernel +#define clRetainKernel clRetainKernel_pfn +#undef clReleaseKernel +#define clReleaseKernel clReleaseKernel_pfn +#undef clSetKernelArg +#define clSetKernelArg clSetKernelArg_pfn +#undef clGetKernelInfo +#define clGetKernelInfo clGetKernelInfo_pfn +#undef clGetKernelWorkGroupInfo +#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_pfn +#undef clWaitForEvents +#define clWaitForEvents clWaitForEvents_pfn +#undef clGetEventInfo +#define clGetEventInfo clGetEventInfo_pfn +#undef clCreateUserEvent +#define clCreateUserEvent clCreateUserEvent_pfn +#undef clRetainEvent +#define clRetainEvent clRetainEvent_pfn +#undef clReleaseEvent +#define clReleaseEvent clReleaseEvent_pfn +#undef clSetUserEventStatus +#define clSetUserEventStatus clSetUserEventStatus_pfn +#undef clSetEventCallback +#define clSetEventCallback clSetEventCallback_pfn +#undef clGetEventProfilingInfo +#define clGetEventProfilingInfo clGetEventProfilingInfo_pfn +#undef clFlush +#define clFlush clFlush_pfn +#undef clFinish +#define clFinish clFinish_pfn +#undef clEnqueueReadBuffer +#define clEnqueueReadBuffer clEnqueueReadBuffer_pfn +#undef clEnqueueReadBufferRect +#define clEnqueueReadBufferRect clEnqueueReadBufferRect_pfn +#undef clEnqueueWriteBuffer +#define clEnqueueWriteBuffer clEnqueueWriteBuffer_pfn +#undef clEnqueueWriteBufferRect +#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_pfn +#undef clEnqueueCopyBuffer +#define clEnqueueCopyBuffer clEnqueueCopyBuffer_pfn +#undef clEnqueueCopyBufferRect +#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_pfn +#undef clEnqueueReadImage +#define clEnqueueReadImage clEnqueueReadImage_pfn +#undef clEnqueueWriteImage +#define clEnqueueWriteImage clEnqueueWriteImage_pfn +#undef clEnqueueCopyImage +#define clEnqueueCopyImage clEnqueueCopyImage_pfn +#undef clEnqueueCopyImageToBuffer +#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_pfn +#undef clEnqueueCopyBufferToImage +#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_pfn +#undef clEnqueueMapBuffer +#define clEnqueueMapBuffer clEnqueueMapBuffer_pfn +#undef clEnqueueMapImage +#define clEnqueueMapImage clEnqueueMapImage_pfn +#undef clEnqueueUnmapMemObject +#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_pfn +#undef clEnqueueNDRangeKernel +#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_pfn +#undef clEnqueueTask +#define clEnqueueTask clEnqueueTask_pfn +#undef clEnqueueNativeKernel +#define clEnqueueNativeKernel clEnqueueNativeKernel_pfn +#undef clEnqueueMarker +#define clEnqueueMarker clEnqueueMarker_pfn +#undef clEnqueueWaitForEvents +#define clEnqueueWaitForEvents clEnqueueWaitForEvents_pfn +#undef clEnqueueBarrier +#define clEnqueueBarrier clEnqueueBarrier_pfn +#undef clGetExtensionFunctionAddress +#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_pfn + +#ifndef CL_RUNTIME_EXPORT +#if (defined(BUILD_SHARED_LIBS) || defined(OPENCV_OCL_SHARED)) && (defined WIN32 || defined _WIN32 || defined WINCE) +#define CL_RUNTIME_EXPORT __declspec(dllimport) +#else +#define CL_RUNTIME_EXPORT +#endif +#endif + +// generated by parser_cl.py +extern CL_RUNTIME_EXPORT cl_int (*clGetPlatformIDs)(cl_uint, cl_platform_id*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_int (*clGetPlatformInfo)(cl_platform_id, cl_platform_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (*clGetDeviceIDs)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_int (*clGetDeviceInfo)(cl_device_id, cl_device_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_context (*clCreateContext)(const cl_context_properties*, cl_uint, const cl_device_id*, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_context (*clCreateContextFromType)(const cl_context_properties*, cl_device_type, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (*clRetainContext)(cl_context); +extern CL_RUNTIME_EXPORT cl_int (*clReleaseContext)(cl_context); +extern CL_RUNTIME_EXPORT cl_int (*clGetContextInfo)(cl_context, cl_context_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_command_queue (*clCreateCommandQueue)(cl_context, cl_device_id, cl_command_queue_properties, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (*clRetainCommandQueue)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (*clReleaseCommandQueue)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (*clGetCommandQueueInfo)(cl_command_queue, cl_command_queue_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (*clSetCommandQueueProperty)(cl_command_queue, cl_command_queue_properties, cl_bool, cl_command_queue_properties*); +extern CL_RUNTIME_EXPORT cl_mem (*clCreateBuffer)(cl_context, cl_mem_flags, size_t, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (*clCreateSubBuffer)(cl_mem, cl_mem_flags, cl_buffer_create_type, const void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (*clCreateImage2D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (*clCreateImage3D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, size_t, size_t, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (*clRetainMemObject)(cl_mem); +extern CL_RUNTIME_EXPORT cl_int (*clReleaseMemObject)(cl_mem); +extern CL_RUNTIME_EXPORT cl_int (*clGetSupportedImageFormats)(cl_context, cl_mem_flags, cl_mem_object_type, cl_uint, cl_image_format*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_int (*clGetMemObjectInfo)(cl_mem, cl_mem_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (*clGetImageInfo)(cl_mem, cl_image_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (*clSetMemObjectDestructorCallback)(cl_mem, void (CL_CALLBACK*) (cl_mem, void*), void*); +extern CL_RUNTIME_EXPORT cl_sampler (*clCreateSampler)(cl_context, cl_bool, cl_addressing_mode, cl_filter_mode, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (*clRetainSampler)(cl_sampler); +extern CL_RUNTIME_EXPORT cl_int (*clReleaseSampler)(cl_sampler); +extern CL_RUNTIME_EXPORT cl_int (*clGetSamplerInfo)(cl_sampler, cl_sampler_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_program (*clCreateProgramWithSource)(cl_context, cl_uint, const char**, const size_t*, cl_int*); +extern CL_RUNTIME_EXPORT cl_program (*clCreateProgramWithBinary)(cl_context, cl_uint, const cl_device_id*, const size_t*, const unsigned char**, cl_int*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (*clRetainProgram)(cl_program); +extern CL_RUNTIME_EXPORT cl_int (*clReleaseProgram)(cl_program); +extern CL_RUNTIME_EXPORT cl_int (*clBuildProgram)(cl_program, cl_uint, const cl_device_id*, const char*, void (CL_CALLBACK*) (cl_program, void*), void*); +extern CL_RUNTIME_EXPORT cl_int (*clUnloadCompiler)(); +extern CL_RUNTIME_EXPORT cl_int (*clGetProgramInfo)(cl_program, cl_program_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (*clGetProgramBuildInfo)(cl_program, cl_device_id, cl_program_build_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_kernel (*clCreateKernel)(cl_program, const char*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (*clCreateKernelsInProgram)(cl_program, cl_uint, cl_kernel*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_int (*clRetainKernel)(cl_kernel); +extern CL_RUNTIME_EXPORT cl_int (*clReleaseKernel)(cl_kernel); +extern CL_RUNTIME_EXPORT cl_int (*clSetKernelArg)(cl_kernel, cl_uint, size_t, const void*); +extern CL_RUNTIME_EXPORT cl_int (*clGetKernelInfo)(cl_kernel, cl_kernel_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (*clGetKernelWorkGroupInfo)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (*clWaitForEvents)(cl_uint, const cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clGetEventInfo)(cl_event, cl_event_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_event (*clCreateUserEvent)(cl_context, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (*clRetainEvent)(cl_event); +extern CL_RUNTIME_EXPORT cl_int (*clReleaseEvent)(cl_event); +extern CL_RUNTIME_EXPORT cl_int (*clSetUserEventStatus)(cl_event, cl_int); +extern CL_RUNTIME_EXPORT cl_int (*clSetEventCallback)(cl_event, cl_int, void (CL_CALLBACK*) (cl_event, cl_int, void*), void*); +extern CL_RUNTIME_EXPORT cl_int (*clGetEventProfilingInfo)(cl_event, cl_profiling_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (*clFlush)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (*clFinish)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueReadBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueReadBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueWriteBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueWriteBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueCopyBuffer)(cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueCopyBufferRect)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueReadImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueWriteImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueCopyImage)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueCopyImageToBuffer)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueCopyBufferToImage)(cl_command_queue, cl_mem, cl_mem, size_t, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT void* (*clEnqueueMapBuffer)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t, size_t, cl_uint, const cl_event*, cl_event*, cl_int*); +extern CL_RUNTIME_EXPORT void* (*clEnqueueMapImage)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, const size_t*, const size_t*, size_t*, size_t*, cl_uint, const cl_event*, cl_event*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueUnmapMemObject)(cl_command_queue, cl_mem, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueTask)(cl_command_queue, cl_kernel, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueNativeKernel)(cl_command_queue, void (CL_CALLBACK* user_func) (void*), void*, size_t, cl_uint, const cl_mem*, const void**, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueMarker)(cl_command_queue, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueWaitForEvents)(cl_command_queue, cl_uint, const cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueBarrier)(cl_command_queue); +extern CL_RUNTIME_EXPORT void* (*clGetExtensionFunctionAddress)(const char*); + +#endif + +#endif + +#endif // __OPENCV_OCL_CL_RUNTIME_OPENCL_HPP__ diff --git a/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl12.hpp b/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl12.hpp new file mode 100644 index 0000000000..8d03fbff6a --- /dev/null +++ b/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl12.hpp @@ -0,0 +1,389 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef __OPENCV_OCL_CL_RUNTIME_OPENCL_HPP__ +#define __OPENCV_OCL_CL_RUNTIME_OPENCL_HPP__ + +#ifdef HAVE_OPENCL + +#if defined __APPLE__ && !defined(IOS) +#include +#else + +// generated by parser_cl.py +#define clGetPlatformIDs clGetPlatformIDs_ +#define clGetPlatformInfo clGetPlatformInfo_ +#define clGetDeviceIDs clGetDeviceIDs_ +#define clGetDeviceInfo clGetDeviceInfo_ +#define clCreateSubDevices clCreateSubDevices_ +#define clRetainDevice clRetainDevice_ +#define clReleaseDevice clReleaseDevice_ +#define clCreateContext clCreateContext_ +#define clCreateContextFromType clCreateContextFromType_ +#define clRetainContext clRetainContext_ +#define clReleaseContext clReleaseContext_ +#define clGetContextInfo clGetContextInfo_ +#define clCreateCommandQueue clCreateCommandQueue_ +#define clRetainCommandQueue clRetainCommandQueue_ +#define clReleaseCommandQueue clReleaseCommandQueue_ +#define clGetCommandQueueInfo clGetCommandQueueInfo_ +#define clCreateBuffer clCreateBuffer_ +#define clCreateSubBuffer clCreateSubBuffer_ +#define clCreateImage clCreateImage_ +#define clRetainMemObject clRetainMemObject_ +#define clReleaseMemObject clReleaseMemObject_ +#define clGetSupportedImageFormats clGetSupportedImageFormats_ +#define clGetMemObjectInfo clGetMemObjectInfo_ +#define clGetImageInfo clGetImageInfo_ +#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_ +#define clCreateSampler clCreateSampler_ +#define clRetainSampler clRetainSampler_ +#define clReleaseSampler clReleaseSampler_ +#define clGetSamplerInfo clGetSamplerInfo_ +#define clCreateProgramWithSource clCreateProgramWithSource_ +#define clCreateProgramWithBinary clCreateProgramWithBinary_ +#define clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels_ +#define clRetainProgram clRetainProgram_ +#define clReleaseProgram clReleaseProgram_ +#define clBuildProgram clBuildProgram_ +#define clCompileProgram clCompileProgram_ +#define clLinkProgram clLinkProgram_ +#define clUnloadPlatformCompiler clUnloadPlatformCompiler_ +#define clGetProgramInfo clGetProgramInfo_ +#define clGetProgramBuildInfo clGetProgramBuildInfo_ +#define clCreateKernel clCreateKernel_ +#define clCreateKernelsInProgram clCreateKernelsInProgram_ +#define clRetainKernel clRetainKernel_ +#define clReleaseKernel clReleaseKernel_ +#define clSetKernelArg clSetKernelArg_ +#define clGetKernelInfo clGetKernelInfo_ +#define clGetKernelArgInfo clGetKernelArgInfo_ +#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_ +#define clWaitForEvents clWaitForEvents_ +#define clGetEventInfo clGetEventInfo_ +#define clCreateUserEvent clCreateUserEvent_ +#define clRetainEvent clRetainEvent_ +#define clReleaseEvent clReleaseEvent_ +#define clSetUserEventStatus clSetUserEventStatus_ +#define clSetEventCallback clSetEventCallback_ +#define clGetEventProfilingInfo clGetEventProfilingInfo_ +#define clFlush clFlush_ +#define clFinish clFinish_ +#define clEnqueueReadBuffer clEnqueueReadBuffer_ +#define clEnqueueReadBufferRect clEnqueueReadBufferRect_ +#define clEnqueueWriteBuffer clEnqueueWriteBuffer_ +#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_ +#define clEnqueueFillBuffer clEnqueueFillBuffer_ +#define clEnqueueCopyBuffer clEnqueueCopyBuffer_ +#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_ +#define clEnqueueReadImage clEnqueueReadImage_ +#define clEnqueueWriteImage clEnqueueWriteImage_ +#define clEnqueueFillImage clEnqueueFillImage_ +#define clEnqueueCopyImage clEnqueueCopyImage_ +#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_ +#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_ +#define clEnqueueMapBuffer clEnqueueMapBuffer_ +#define clEnqueueMapImage clEnqueueMapImage_ +#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_ +#define clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects_ +#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_ +#define clEnqueueTask clEnqueueTask_ +#define clEnqueueNativeKernel clEnqueueNativeKernel_ +#define clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList_ +#define clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList_ +#define clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform_ +#define clCreateImage2D clCreateImage2D_ +#define clCreateImage3D clCreateImage3D_ +#define clEnqueueMarker clEnqueueMarker_ +#define clEnqueueWaitForEvents clEnqueueWaitForEvents_ +#define clEnqueueBarrier clEnqueueBarrier_ +#define clUnloadCompiler clUnloadCompiler_ +#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_ + +#if defined __APPLE__ +#include +#else +#include +#endif + +// generated by parser_cl.py +#undef clGetPlatformIDs +#define clGetPlatformIDs clGetPlatformIDs_pfn +#undef clGetPlatformInfo +#define clGetPlatformInfo clGetPlatformInfo_pfn +#undef clGetDeviceIDs +#define clGetDeviceIDs clGetDeviceIDs_pfn +#undef clGetDeviceInfo +#define clGetDeviceInfo clGetDeviceInfo_pfn +#undef clCreateSubDevices +#define clCreateSubDevices clCreateSubDevices_pfn +#undef clRetainDevice +#define clRetainDevice clRetainDevice_pfn +#undef clReleaseDevice +#define clReleaseDevice clReleaseDevice_pfn +#undef clCreateContext +#define clCreateContext clCreateContext_pfn +#undef clCreateContextFromType +#define clCreateContextFromType clCreateContextFromType_pfn +#undef clRetainContext +#define clRetainContext clRetainContext_pfn +#undef clReleaseContext +#define clReleaseContext clReleaseContext_pfn +#undef clGetContextInfo +#define clGetContextInfo clGetContextInfo_pfn +#undef clCreateCommandQueue +#define clCreateCommandQueue clCreateCommandQueue_pfn +#undef clRetainCommandQueue +#define clRetainCommandQueue clRetainCommandQueue_pfn +#undef clReleaseCommandQueue +#define clReleaseCommandQueue clReleaseCommandQueue_pfn +#undef clGetCommandQueueInfo +#define clGetCommandQueueInfo clGetCommandQueueInfo_pfn +#undef clCreateBuffer +#define clCreateBuffer clCreateBuffer_pfn +#undef clCreateSubBuffer +#define clCreateSubBuffer clCreateSubBuffer_pfn +#undef clCreateImage +#define clCreateImage clCreateImage_pfn +#undef clRetainMemObject +#define clRetainMemObject clRetainMemObject_pfn +#undef clReleaseMemObject +#define clReleaseMemObject clReleaseMemObject_pfn +#undef clGetSupportedImageFormats +#define clGetSupportedImageFormats clGetSupportedImageFormats_pfn +#undef clGetMemObjectInfo +#define clGetMemObjectInfo clGetMemObjectInfo_pfn +#undef clGetImageInfo +#define clGetImageInfo clGetImageInfo_pfn +#undef clSetMemObjectDestructorCallback +#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_pfn +#undef clCreateSampler +#define clCreateSampler clCreateSampler_pfn +#undef clRetainSampler +#define clRetainSampler clRetainSampler_pfn +#undef clReleaseSampler +#define clReleaseSampler clReleaseSampler_pfn +#undef clGetSamplerInfo +#define clGetSamplerInfo clGetSamplerInfo_pfn +#undef clCreateProgramWithSource +#define clCreateProgramWithSource clCreateProgramWithSource_pfn +#undef clCreateProgramWithBinary +#define clCreateProgramWithBinary clCreateProgramWithBinary_pfn +#undef clCreateProgramWithBuiltInKernels +#define clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels_pfn +#undef clRetainProgram +#define clRetainProgram clRetainProgram_pfn +#undef clReleaseProgram +#define clReleaseProgram clReleaseProgram_pfn +#undef clBuildProgram +#define clBuildProgram clBuildProgram_pfn +#undef clCompileProgram +#define clCompileProgram clCompileProgram_pfn +#undef clLinkProgram +#define clLinkProgram clLinkProgram_pfn +#undef clUnloadPlatformCompiler +#define clUnloadPlatformCompiler clUnloadPlatformCompiler_pfn +#undef clGetProgramInfo +#define clGetProgramInfo clGetProgramInfo_pfn +#undef clGetProgramBuildInfo +#define clGetProgramBuildInfo clGetProgramBuildInfo_pfn +#undef clCreateKernel +#define clCreateKernel clCreateKernel_pfn +#undef clCreateKernelsInProgram +#define clCreateKernelsInProgram clCreateKernelsInProgram_pfn +#undef clRetainKernel +#define clRetainKernel clRetainKernel_pfn +#undef clReleaseKernel +#define clReleaseKernel clReleaseKernel_pfn +#undef clSetKernelArg +#define clSetKernelArg clSetKernelArg_pfn +#undef clGetKernelInfo +#define clGetKernelInfo clGetKernelInfo_pfn +#undef clGetKernelArgInfo +#define clGetKernelArgInfo clGetKernelArgInfo_pfn +#undef clGetKernelWorkGroupInfo +#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_pfn +#undef clWaitForEvents +#define clWaitForEvents clWaitForEvents_pfn +#undef clGetEventInfo +#define clGetEventInfo clGetEventInfo_pfn +#undef clCreateUserEvent +#define clCreateUserEvent clCreateUserEvent_pfn +#undef clRetainEvent +#define clRetainEvent clRetainEvent_pfn +#undef clReleaseEvent +#define clReleaseEvent clReleaseEvent_pfn +#undef clSetUserEventStatus +#define clSetUserEventStatus clSetUserEventStatus_pfn +#undef clSetEventCallback +#define clSetEventCallback clSetEventCallback_pfn +#undef clGetEventProfilingInfo +#define clGetEventProfilingInfo clGetEventProfilingInfo_pfn +#undef clFlush +#define clFlush clFlush_pfn +#undef clFinish +#define clFinish clFinish_pfn +#undef clEnqueueReadBuffer +#define clEnqueueReadBuffer clEnqueueReadBuffer_pfn +#undef clEnqueueReadBufferRect +#define clEnqueueReadBufferRect clEnqueueReadBufferRect_pfn +#undef clEnqueueWriteBuffer +#define clEnqueueWriteBuffer clEnqueueWriteBuffer_pfn +#undef clEnqueueWriteBufferRect +#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_pfn +#undef clEnqueueFillBuffer +#define clEnqueueFillBuffer clEnqueueFillBuffer_pfn +#undef clEnqueueCopyBuffer +#define clEnqueueCopyBuffer clEnqueueCopyBuffer_pfn +#undef clEnqueueCopyBufferRect +#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_pfn +#undef clEnqueueReadImage +#define clEnqueueReadImage clEnqueueReadImage_pfn +#undef clEnqueueWriteImage +#define clEnqueueWriteImage clEnqueueWriteImage_pfn +#undef clEnqueueFillImage +#define clEnqueueFillImage clEnqueueFillImage_pfn +#undef clEnqueueCopyImage +#define clEnqueueCopyImage clEnqueueCopyImage_pfn +#undef clEnqueueCopyImageToBuffer +#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_pfn +#undef clEnqueueCopyBufferToImage +#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_pfn +#undef clEnqueueMapBuffer +#define clEnqueueMapBuffer clEnqueueMapBuffer_pfn +#undef clEnqueueMapImage +#define clEnqueueMapImage clEnqueueMapImage_pfn +#undef clEnqueueUnmapMemObject +#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_pfn +#undef clEnqueueMigrateMemObjects +#define clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects_pfn +#undef clEnqueueNDRangeKernel +#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_pfn +#undef clEnqueueTask +#define clEnqueueTask clEnqueueTask_pfn +#undef clEnqueueNativeKernel +#define clEnqueueNativeKernel clEnqueueNativeKernel_pfn +#undef clEnqueueMarkerWithWaitList +#define clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList_pfn +#undef clEnqueueBarrierWithWaitList +#define clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList_pfn +#undef clGetExtensionFunctionAddressForPlatform +#define clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform_pfn +#undef clCreateImage2D +#define clCreateImage2D clCreateImage2D_pfn +#undef clCreateImage3D +#define clCreateImage3D clCreateImage3D_pfn +#undef clEnqueueMarker +#define clEnqueueMarker clEnqueueMarker_pfn +#undef clEnqueueWaitForEvents +#define clEnqueueWaitForEvents clEnqueueWaitForEvents_pfn +#undef clEnqueueBarrier +#define clEnqueueBarrier clEnqueueBarrier_pfn +#undef clUnloadCompiler +#define clUnloadCompiler clUnloadCompiler_pfn +#undef clGetExtensionFunctionAddress +#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_pfn + +#ifndef CL_RUNTIME_EXPORT +#if (defined(BUILD_SHARED_LIBS) || defined(OPENCV_OCL_SHARED)) && (defined WIN32 || defined _WIN32 || defined WINCE) +#define CL_RUNTIME_EXPORT __declspec(dllimport) +#else +#define CL_RUNTIME_EXPORT +#endif +#endif + +// generated by parser_cl.py +extern CL_RUNTIME_EXPORT cl_int (*clGetPlatformIDs)(cl_uint, cl_platform_id*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_int (*clGetPlatformInfo)(cl_platform_id, cl_platform_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (*clGetDeviceIDs)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_int (*clGetDeviceInfo)(cl_device_id, cl_device_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (*clCreateSubDevices)(cl_device_id, const cl_device_partition_property*, cl_uint, cl_device_id*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_int (*clRetainDevice)(cl_device_id); +extern CL_RUNTIME_EXPORT cl_int (*clReleaseDevice)(cl_device_id); +extern CL_RUNTIME_EXPORT cl_context (*clCreateContext)(const cl_context_properties*, cl_uint, const cl_device_id*, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_context (*clCreateContextFromType)(const cl_context_properties*, cl_device_type, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (*clRetainContext)(cl_context); +extern CL_RUNTIME_EXPORT cl_int (*clReleaseContext)(cl_context); +extern CL_RUNTIME_EXPORT cl_int (*clGetContextInfo)(cl_context, cl_context_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_command_queue (*clCreateCommandQueue)(cl_context, cl_device_id, cl_command_queue_properties, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (*clRetainCommandQueue)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (*clReleaseCommandQueue)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (*clGetCommandQueueInfo)(cl_command_queue, cl_command_queue_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_mem (*clCreateBuffer)(cl_context, cl_mem_flags, size_t, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (*clCreateSubBuffer)(cl_mem, cl_mem_flags, cl_buffer_create_type, const void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (*clCreateImage)(cl_context, cl_mem_flags, const cl_image_format*, const cl_image_desc*, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (*clRetainMemObject)(cl_mem); +extern CL_RUNTIME_EXPORT cl_int (*clReleaseMemObject)(cl_mem); +extern CL_RUNTIME_EXPORT cl_int (*clGetSupportedImageFormats)(cl_context, cl_mem_flags, cl_mem_object_type, cl_uint, cl_image_format*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_int (*clGetMemObjectInfo)(cl_mem, cl_mem_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (*clGetImageInfo)(cl_mem, cl_image_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (*clSetMemObjectDestructorCallback)(cl_mem, void (CL_CALLBACK*) (cl_mem, void*), void*); +extern CL_RUNTIME_EXPORT cl_sampler (*clCreateSampler)(cl_context, cl_bool, cl_addressing_mode, cl_filter_mode, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (*clRetainSampler)(cl_sampler); +extern CL_RUNTIME_EXPORT cl_int (*clReleaseSampler)(cl_sampler); +extern CL_RUNTIME_EXPORT cl_int (*clGetSamplerInfo)(cl_sampler, cl_sampler_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_program (*clCreateProgramWithSource)(cl_context, cl_uint, const char**, const size_t*, cl_int*); +extern CL_RUNTIME_EXPORT cl_program (*clCreateProgramWithBinary)(cl_context, cl_uint, const cl_device_id*, const size_t*, const unsigned char**, cl_int*, cl_int*); +extern CL_RUNTIME_EXPORT cl_program (*clCreateProgramWithBuiltInKernels)(cl_context, cl_uint, const cl_device_id*, const char*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (*clRetainProgram)(cl_program); +extern CL_RUNTIME_EXPORT cl_int (*clReleaseProgram)(cl_program); +extern CL_RUNTIME_EXPORT cl_int (*clBuildProgram)(cl_program, cl_uint, const cl_device_id*, const char*, void (CL_CALLBACK*) (cl_program, void*), void*); +extern CL_RUNTIME_EXPORT cl_int (*clCompileProgram)(cl_program, cl_uint, const cl_device_id*, const char*, cl_uint, const cl_program*, const char**, void (CL_CALLBACK*) (cl_program, void*), void*); +extern CL_RUNTIME_EXPORT cl_program (*clLinkProgram)(cl_context, cl_uint, const cl_device_id*, const char*, cl_uint, const cl_program*, void (CL_CALLBACK*) (cl_program, void*), void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (*clUnloadPlatformCompiler)(cl_platform_id); +extern CL_RUNTIME_EXPORT cl_int (*clGetProgramInfo)(cl_program, cl_program_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (*clGetProgramBuildInfo)(cl_program, cl_device_id, cl_program_build_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_kernel (*clCreateKernel)(cl_program, const char*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (*clCreateKernelsInProgram)(cl_program, cl_uint, cl_kernel*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_int (*clRetainKernel)(cl_kernel); +extern CL_RUNTIME_EXPORT cl_int (*clReleaseKernel)(cl_kernel); +extern CL_RUNTIME_EXPORT cl_int (*clSetKernelArg)(cl_kernel, cl_uint, size_t, const void*); +extern CL_RUNTIME_EXPORT cl_int (*clGetKernelInfo)(cl_kernel, cl_kernel_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (*clGetKernelArgInfo)(cl_kernel, cl_uint, cl_kernel_arg_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (*clGetKernelWorkGroupInfo)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (*clWaitForEvents)(cl_uint, const cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clGetEventInfo)(cl_event, cl_event_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_event (*clCreateUserEvent)(cl_context, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (*clRetainEvent)(cl_event); +extern CL_RUNTIME_EXPORT cl_int (*clReleaseEvent)(cl_event); +extern CL_RUNTIME_EXPORT cl_int (*clSetUserEventStatus)(cl_event, cl_int); +extern CL_RUNTIME_EXPORT cl_int (*clSetEventCallback)(cl_event, cl_int, void (CL_CALLBACK*) (cl_event, cl_int, void*), void*); +extern CL_RUNTIME_EXPORT cl_int (*clGetEventProfilingInfo)(cl_event, cl_profiling_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (*clFlush)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (*clFinish)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueReadBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueReadBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueWriteBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueWriteBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueFillBuffer)(cl_command_queue, cl_mem, const void*, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueCopyBuffer)(cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueCopyBufferRect)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueReadImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueWriteImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueFillImage)(cl_command_queue, cl_mem, const void*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueCopyImage)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueCopyImageToBuffer)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueCopyBufferToImage)(cl_command_queue, cl_mem, cl_mem, size_t, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT void* (*clEnqueueMapBuffer)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t, size_t, cl_uint, const cl_event*, cl_event*, cl_int*); +extern CL_RUNTIME_EXPORT void* (*clEnqueueMapImage)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, const size_t*, const size_t*, size_t*, size_t*, cl_uint, const cl_event*, cl_event*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueUnmapMemObject)(cl_command_queue, cl_mem, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueMigrateMemObjects)(cl_command_queue, cl_uint, const cl_mem*, cl_mem_migration_flags, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueTask)(cl_command_queue, cl_kernel, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueNativeKernel)(cl_command_queue, void (CL_CALLBACK*) (void*), void*, size_t, cl_uint, const cl_mem*, const void**, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueMarkerWithWaitList)(cl_command_queue, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueBarrierWithWaitList)(cl_command_queue, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT void* (*clGetExtensionFunctionAddressForPlatform)(cl_platform_id, const char*); +extern CL_RUNTIME_EXPORT cl_mem (*clCreateImage2D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (*clCreateImage3D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, size_t, size_t, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueMarker)(cl_command_queue, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueWaitForEvents)(cl_command_queue, cl_uint, const cl_event*); +extern CL_RUNTIME_EXPORT cl_int (*clEnqueueBarrier)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (*clUnloadCompiler)(); +extern CL_RUNTIME_EXPORT void* (*clGetExtensionFunctionAddress)(const char*); + +#endif + +#endif + +#endif // __OPENCV_OCL_CL_RUNTIME_OPENCL_HPP__ diff --git a/modules/ocl/include/opencv2/ocl/cl_runtime/clamdblas_runtime.hpp b/modules/ocl/include/opencv2/ocl/cl_runtime/clamdblas_runtime.hpp new file mode 100644 index 0000000000..ae28a158e9 --- /dev/null +++ b/modules/ocl/include/opencv2/ocl/cl_runtime/clamdblas_runtime.hpp @@ -0,0 +1,728 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef __OPENCV_OCL_CLAMDBLAS_RUNTIME_HPP__ +#define __OPENCV_OCL_CLAMDBLAS_RUNTIME_HPP__ + +#ifdef HAVE_CLAMDBLAS + +// generated by parser_clamdblas.py +#define clAmdBlasGetVersion clAmdBlasGetVersion_ +#define clAmdBlasSetup clAmdBlasSetup_ +#define clAmdBlasTeardown clAmdBlasTeardown_ +#define clAmdBlasAddScratchImage clAmdBlasAddScratchImage_ +#define clAmdBlasRemoveScratchImage clAmdBlasRemoveScratchImage_ +#define clAmdBlasSswap clAmdBlasSswap_ +#define clAmdBlasDswap clAmdBlasDswap_ +#define clAmdBlasCswap clAmdBlasCswap_ +#define clAmdBlasZswap clAmdBlasZswap_ +#define clAmdBlasSscal clAmdBlasSscal_ +#define clAmdBlasDscal clAmdBlasDscal_ +#define clAmdBlasCscal clAmdBlasCscal_ +#define clAmdBlasZscal clAmdBlasZscal_ +#define clAmdBlasCsscal clAmdBlasCsscal_ +#define clAmdBlasZdscal clAmdBlasZdscal_ +#define clAmdBlasScopy clAmdBlasScopy_ +#define clAmdBlasDcopy clAmdBlasDcopy_ +#define clAmdBlasCcopy clAmdBlasCcopy_ +#define clAmdBlasZcopy clAmdBlasZcopy_ +#define clAmdBlasSaxpy clAmdBlasSaxpy_ +#define clAmdBlasDaxpy clAmdBlasDaxpy_ +#define clAmdBlasCaxpy clAmdBlasCaxpy_ +#define clAmdBlasZaxpy clAmdBlasZaxpy_ +#define clAmdBlasSdot clAmdBlasSdot_ +#define clAmdBlasDdot clAmdBlasDdot_ +#define clAmdBlasCdotu clAmdBlasCdotu_ +#define clAmdBlasZdotu clAmdBlasZdotu_ +#define clAmdBlasCdotc clAmdBlasCdotc_ +#define clAmdBlasZdotc clAmdBlasZdotc_ +#define clAmdBlasSrotg clAmdBlasSrotg_ +#define clAmdBlasDrotg clAmdBlasDrotg_ +#define clAmdBlasCrotg clAmdBlasCrotg_ +#define clAmdBlasZrotg clAmdBlasZrotg_ +#define clAmdBlasSrotmg clAmdBlasSrotmg_ +#define clAmdBlasDrotmg clAmdBlasDrotmg_ +#define clAmdBlasSrot clAmdBlasSrot_ +#define clAmdBlasDrot clAmdBlasDrot_ +#define clAmdBlasCsrot clAmdBlasCsrot_ +#define clAmdBlasZdrot clAmdBlasZdrot_ +#define clAmdBlasSrotm clAmdBlasSrotm_ +#define clAmdBlasDrotm clAmdBlasDrotm_ +#define clAmdBlasSnrm2 clAmdBlasSnrm2_ +#define clAmdBlasDnrm2 clAmdBlasDnrm2_ +#define clAmdBlasScnrm2 clAmdBlasScnrm2_ +#define clAmdBlasDznrm2 clAmdBlasDznrm2_ +#define clAmdBlasiSamax clAmdBlasiSamax_ +#define clAmdBlasiDamax clAmdBlasiDamax_ +#define clAmdBlasiCamax clAmdBlasiCamax_ +#define clAmdBlasiZamax clAmdBlasiZamax_ +#define clAmdBlasSasum clAmdBlasSasum_ +#define clAmdBlasDasum clAmdBlasDasum_ +#define clAmdBlasScasum clAmdBlasScasum_ +#define clAmdBlasDzasum clAmdBlasDzasum_ +#define clAmdBlasSgemv clAmdBlasSgemv_ +#define clAmdBlasDgemv clAmdBlasDgemv_ +#define clAmdBlasCgemv clAmdBlasCgemv_ +#define clAmdBlasZgemv clAmdBlasZgemv_ +#define clAmdBlasSgemvEx clAmdBlasSgemvEx_ +#define clAmdBlasDgemvEx clAmdBlasDgemvEx_ +#define clAmdBlasCgemvEx clAmdBlasCgemvEx_ +#define clAmdBlasZgemvEx clAmdBlasZgemvEx_ +#define clAmdBlasSsymv clAmdBlasSsymv_ +#define clAmdBlasDsymv clAmdBlasDsymv_ +#define clAmdBlasSsymvEx clAmdBlasSsymvEx_ +#define clAmdBlasDsymvEx clAmdBlasDsymvEx_ +#define clAmdBlasChemv clAmdBlasChemv_ +#define clAmdBlasZhemv clAmdBlasZhemv_ +#define clAmdBlasStrmv clAmdBlasStrmv_ +#define clAmdBlasDtrmv clAmdBlasDtrmv_ +#define clAmdBlasCtrmv clAmdBlasCtrmv_ +#define clAmdBlasZtrmv clAmdBlasZtrmv_ +#define clAmdBlasStrsv clAmdBlasStrsv_ +#define clAmdBlasDtrsv clAmdBlasDtrsv_ +#define clAmdBlasCtrsv clAmdBlasCtrsv_ +#define clAmdBlasZtrsv clAmdBlasZtrsv_ +#define clAmdBlasSger clAmdBlasSger_ +#define clAmdBlasDger clAmdBlasDger_ +#define clAmdBlasCgeru clAmdBlasCgeru_ +#define clAmdBlasZgeru clAmdBlasZgeru_ +#define clAmdBlasCgerc clAmdBlasCgerc_ +#define clAmdBlasZgerc clAmdBlasZgerc_ +#define clAmdBlasSsyr clAmdBlasSsyr_ +#define clAmdBlasDsyr clAmdBlasDsyr_ +#define clAmdBlasCher clAmdBlasCher_ +#define clAmdBlasZher clAmdBlasZher_ +#define clAmdBlasSsyr2 clAmdBlasSsyr2_ +#define clAmdBlasDsyr2 clAmdBlasDsyr2_ +#define clAmdBlasCher2 clAmdBlasCher2_ +#define clAmdBlasZher2 clAmdBlasZher2_ +#define clAmdBlasStpmv clAmdBlasStpmv_ +#define clAmdBlasDtpmv clAmdBlasDtpmv_ +#define clAmdBlasCtpmv clAmdBlasCtpmv_ +#define clAmdBlasZtpmv clAmdBlasZtpmv_ +#define clAmdBlasStpsv clAmdBlasStpsv_ +#define clAmdBlasDtpsv clAmdBlasDtpsv_ +#define clAmdBlasCtpsv clAmdBlasCtpsv_ +#define clAmdBlasZtpsv clAmdBlasZtpsv_ +#define clAmdBlasSspmv clAmdBlasSspmv_ +#define clAmdBlasDspmv clAmdBlasDspmv_ +#define clAmdBlasChpmv clAmdBlasChpmv_ +#define clAmdBlasZhpmv clAmdBlasZhpmv_ +#define clAmdBlasSspr clAmdBlasSspr_ +#define clAmdBlasDspr clAmdBlasDspr_ +#define clAmdBlasChpr clAmdBlasChpr_ +#define clAmdBlasZhpr clAmdBlasZhpr_ +#define clAmdBlasSspr2 clAmdBlasSspr2_ +#define clAmdBlasDspr2 clAmdBlasDspr2_ +#define clAmdBlasChpr2 clAmdBlasChpr2_ +#define clAmdBlasZhpr2 clAmdBlasZhpr2_ +#define clAmdBlasSgbmv clAmdBlasSgbmv_ +#define clAmdBlasDgbmv clAmdBlasDgbmv_ +#define clAmdBlasCgbmv clAmdBlasCgbmv_ +#define clAmdBlasZgbmv clAmdBlasZgbmv_ +#define clAmdBlasStbmv clAmdBlasStbmv_ +#define clAmdBlasDtbmv clAmdBlasDtbmv_ +#define clAmdBlasCtbmv clAmdBlasCtbmv_ +#define clAmdBlasZtbmv clAmdBlasZtbmv_ +#define clAmdBlasSsbmv clAmdBlasSsbmv_ +#define clAmdBlasDsbmv clAmdBlasDsbmv_ +#define clAmdBlasChbmv clAmdBlasChbmv_ +#define clAmdBlasZhbmv clAmdBlasZhbmv_ +#define clAmdBlasStbsv clAmdBlasStbsv_ +#define clAmdBlasDtbsv clAmdBlasDtbsv_ +#define clAmdBlasCtbsv clAmdBlasCtbsv_ +#define clAmdBlasZtbsv clAmdBlasZtbsv_ +#define clAmdBlasSgemm clAmdBlasSgemm_ +#define clAmdBlasDgemm clAmdBlasDgemm_ +#define clAmdBlasCgemm clAmdBlasCgemm_ +#define clAmdBlasZgemm clAmdBlasZgemm_ +#define clAmdBlasSgemmEx clAmdBlasSgemmEx_ +#define clAmdBlasDgemmEx clAmdBlasDgemmEx_ +#define clAmdBlasCgemmEx clAmdBlasCgemmEx_ +#define clAmdBlasZgemmEx clAmdBlasZgemmEx_ +#define clAmdBlasStrmm clAmdBlasStrmm_ +#define clAmdBlasDtrmm clAmdBlasDtrmm_ +#define clAmdBlasCtrmm clAmdBlasCtrmm_ +#define clAmdBlasZtrmm clAmdBlasZtrmm_ +#define clAmdBlasStrmmEx clAmdBlasStrmmEx_ +#define clAmdBlasDtrmmEx clAmdBlasDtrmmEx_ +#define clAmdBlasCtrmmEx clAmdBlasCtrmmEx_ +#define clAmdBlasZtrmmEx clAmdBlasZtrmmEx_ +#define clAmdBlasStrsm clAmdBlasStrsm_ +#define clAmdBlasDtrsm clAmdBlasDtrsm_ +#define clAmdBlasCtrsm clAmdBlasCtrsm_ +#define clAmdBlasZtrsm clAmdBlasZtrsm_ +#define clAmdBlasStrsmEx clAmdBlasStrsmEx_ +#define clAmdBlasDtrsmEx clAmdBlasDtrsmEx_ +#define clAmdBlasCtrsmEx clAmdBlasCtrsmEx_ +#define clAmdBlasZtrsmEx clAmdBlasZtrsmEx_ +#define clAmdBlasSsyrk clAmdBlasSsyrk_ +#define clAmdBlasDsyrk clAmdBlasDsyrk_ +#define clAmdBlasCsyrk clAmdBlasCsyrk_ +#define clAmdBlasZsyrk clAmdBlasZsyrk_ +#define clAmdBlasSsyrkEx clAmdBlasSsyrkEx_ +#define clAmdBlasDsyrkEx clAmdBlasDsyrkEx_ +#define clAmdBlasCsyrkEx clAmdBlasCsyrkEx_ +#define clAmdBlasZsyrkEx clAmdBlasZsyrkEx_ +#define clAmdBlasSsyr2k clAmdBlasSsyr2k_ +#define clAmdBlasDsyr2k clAmdBlasDsyr2k_ +#define clAmdBlasCsyr2k clAmdBlasCsyr2k_ +#define clAmdBlasZsyr2k clAmdBlasZsyr2k_ +#define clAmdBlasSsyr2kEx clAmdBlasSsyr2kEx_ +#define clAmdBlasDsyr2kEx clAmdBlasDsyr2kEx_ +#define clAmdBlasCsyr2kEx clAmdBlasCsyr2kEx_ +#define clAmdBlasZsyr2kEx clAmdBlasZsyr2kEx_ +#define clAmdBlasSsymm clAmdBlasSsymm_ +#define clAmdBlasDsymm clAmdBlasDsymm_ +#define clAmdBlasCsymm clAmdBlasCsymm_ +#define clAmdBlasZsymm clAmdBlasZsymm_ +#define clAmdBlasChemm clAmdBlasChemm_ +#define clAmdBlasZhemm clAmdBlasZhemm_ +#define clAmdBlasCherk clAmdBlasCherk_ +#define clAmdBlasZherk clAmdBlasZherk_ +#define clAmdBlasCher2k clAmdBlasCher2k_ +#define clAmdBlasZher2k clAmdBlasZher2k_ + +#include + +// generated by parser_clamdblas.py +#undef clAmdBlasGetVersion +#define clAmdBlasGetVersion clAmdBlasGetVersion_pfn +#undef clAmdBlasSetup +#define clAmdBlasSetup clAmdBlasSetup_pfn +#undef clAmdBlasTeardown +#define clAmdBlasTeardown clAmdBlasTeardown_pfn +#undef clAmdBlasAddScratchImage +#define clAmdBlasAddScratchImage clAmdBlasAddScratchImage_pfn +#undef clAmdBlasRemoveScratchImage +#define clAmdBlasRemoveScratchImage clAmdBlasRemoveScratchImage_pfn +#undef clAmdBlasSswap +#define clAmdBlasSswap clAmdBlasSswap_pfn +#undef clAmdBlasDswap +#define clAmdBlasDswap clAmdBlasDswap_pfn +#undef clAmdBlasCswap +#define clAmdBlasCswap clAmdBlasCswap_pfn +#undef clAmdBlasZswap +#define clAmdBlasZswap clAmdBlasZswap_pfn +#undef clAmdBlasSscal +#define clAmdBlasSscal clAmdBlasSscal_pfn +#undef clAmdBlasDscal +#define clAmdBlasDscal clAmdBlasDscal_pfn +#undef clAmdBlasCscal +#define clAmdBlasCscal clAmdBlasCscal_pfn +#undef clAmdBlasZscal +#define clAmdBlasZscal clAmdBlasZscal_pfn +#undef clAmdBlasCsscal +#define clAmdBlasCsscal clAmdBlasCsscal_pfn +#undef clAmdBlasZdscal +#define clAmdBlasZdscal clAmdBlasZdscal_pfn +#undef clAmdBlasScopy +#define clAmdBlasScopy clAmdBlasScopy_pfn +#undef clAmdBlasDcopy +#define clAmdBlasDcopy clAmdBlasDcopy_pfn +#undef clAmdBlasCcopy +#define clAmdBlasCcopy clAmdBlasCcopy_pfn +#undef clAmdBlasZcopy +#define clAmdBlasZcopy clAmdBlasZcopy_pfn +#undef clAmdBlasSaxpy +#define clAmdBlasSaxpy clAmdBlasSaxpy_pfn +#undef clAmdBlasDaxpy +#define clAmdBlasDaxpy clAmdBlasDaxpy_pfn +#undef clAmdBlasCaxpy +#define clAmdBlasCaxpy clAmdBlasCaxpy_pfn +#undef clAmdBlasZaxpy +#define clAmdBlasZaxpy clAmdBlasZaxpy_pfn +#undef clAmdBlasSdot +#define clAmdBlasSdot clAmdBlasSdot_pfn +#undef clAmdBlasDdot +#define clAmdBlasDdot clAmdBlasDdot_pfn +#undef clAmdBlasCdotu +#define clAmdBlasCdotu clAmdBlasCdotu_pfn +#undef clAmdBlasZdotu +#define clAmdBlasZdotu clAmdBlasZdotu_pfn +#undef clAmdBlasCdotc +#define clAmdBlasCdotc clAmdBlasCdotc_pfn +#undef clAmdBlasZdotc +#define clAmdBlasZdotc clAmdBlasZdotc_pfn +#undef clAmdBlasSrotg +#define clAmdBlasSrotg clAmdBlasSrotg_pfn +#undef clAmdBlasDrotg +#define clAmdBlasDrotg clAmdBlasDrotg_pfn +#undef clAmdBlasCrotg +#define clAmdBlasCrotg clAmdBlasCrotg_pfn +#undef clAmdBlasZrotg +#define clAmdBlasZrotg clAmdBlasZrotg_pfn +#undef clAmdBlasSrotmg +#define clAmdBlasSrotmg clAmdBlasSrotmg_pfn +#undef clAmdBlasDrotmg +#define clAmdBlasDrotmg clAmdBlasDrotmg_pfn +#undef clAmdBlasSrot +#define clAmdBlasSrot clAmdBlasSrot_pfn +#undef clAmdBlasDrot +#define clAmdBlasDrot clAmdBlasDrot_pfn +#undef clAmdBlasCsrot +#define clAmdBlasCsrot clAmdBlasCsrot_pfn +#undef clAmdBlasZdrot +#define clAmdBlasZdrot clAmdBlasZdrot_pfn +#undef clAmdBlasSrotm +#define clAmdBlasSrotm clAmdBlasSrotm_pfn +#undef clAmdBlasDrotm +#define clAmdBlasDrotm clAmdBlasDrotm_pfn +#undef clAmdBlasSnrm2 +#define clAmdBlasSnrm2 clAmdBlasSnrm2_pfn +#undef clAmdBlasDnrm2 +#define clAmdBlasDnrm2 clAmdBlasDnrm2_pfn +#undef clAmdBlasScnrm2 +#define clAmdBlasScnrm2 clAmdBlasScnrm2_pfn +#undef clAmdBlasDznrm2 +#define clAmdBlasDznrm2 clAmdBlasDznrm2_pfn +#undef clAmdBlasiSamax +#define clAmdBlasiSamax clAmdBlasiSamax_pfn +#undef clAmdBlasiDamax +#define clAmdBlasiDamax clAmdBlasiDamax_pfn +#undef clAmdBlasiCamax +#define clAmdBlasiCamax clAmdBlasiCamax_pfn +#undef clAmdBlasiZamax +#define clAmdBlasiZamax clAmdBlasiZamax_pfn +#undef clAmdBlasSasum +#define clAmdBlasSasum clAmdBlasSasum_pfn +#undef clAmdBlasDasum +#define clAmdBlasDasum clAmdBlasDasum_pfn +#undef clAmdBlasScasum +#define clAmdBlasScasum clAmdBlasScasum_pfn +#undef clAmdBlasDzasum +#define clAmdBlasDzasum clAmdBlasDzasum_pfn +#undef clAmdBlasSgemv +#define clAmdBlasSgemv clAmdBlasSgemv_pfn +#undef clAmdBlasDgemv +#define clAmdBlasDgemv clAmdBlasDgemv_pfn +#undef clAmdBlasCgemv +#define clAmdBlasCgemv clAmdBlasCgemv_pfn +#undef clAmdBlasZgemv +#define clAmdBlasZgemv clAmdBlasZgemv_pfn +#undef clAmdBlasSgemvEx +#define clAmdBlasSgemvEx clAmdBlasSgemvEx_pfn +#undef clAmdBlasDgemvEx +#define clAmdBlasDgemvEx clAmdBlasDgemvEx_pfn +#undef clAmdBlasCgemvEx +#define clAmdBlasCgemvEx clAmdBlasCgemvEx_pfn +#undef clAmdBlasZgemvEx +#define clAmdBlasZgemvEx clAmdBlasZgemvEx_pfn +#undef clAmdBlasSsymv +#define clAmdBlasSsymv clAmdBlasSsymv_pfn +#undef clAmdBlasDsymv +#define clAmdBlasDsymv clAmdBlasDsymv_pfn +#undef clAmdBlasSsymvEx +#define clAmdBlasSsymvEx clAmdBlasSsymvEx_pfn +#undef clAmdBlasDsymvEx +#define clAmdBlasDsymvEx clAmdBlasDsymvEx_pfn +#undef clAmdBlasChemv +#define clAmdBlasChemv clAmdBlasChemv_pfn +#undef clAmdBlasZhemv +#define clAmdBlasZhemv clAmdBlasZhemv_pfn +#undef clAmdBlasStrmv +#define clAmdBlasStrmv clAmdBlasStrmv_pfn +#undef clAmdBlasDtrmv +#define clAmdBlasDtrmv clAmdBlasDtrmv_pfn +#undef clAmdBlasCtrmv +#define clAmdBlasCtrmv clAmdBlasCtrmv_pfn +#undef clAmdBlasZtrmv +#define clAmdBlasZtrmv clAmdBlasZtrmv_pfn +#undef clAmdBlasStrsv +#define clAmdBlasStrsv clAmdBlasStrsv_pfn +#undef clAmdBlasDtrsv +#define clAmdBlasDtrsv clAmdBlasDtrsv_pfn +#undef clAmdBlasCtrsv +#define clAmdBlasCtrsv clAmdBlasCtrsv_pfn +#undef clAmdBlasZtrsv +#define clAmdBlasZtrsv clAmdBlasZtrsv_pfn +#undef clAmdBlasSger +#define clAmdBlasSger clAmdBlasSger_pfn +#undef clAmdBlasDger +#define clAmdBlasDger clAmdBlasDger_pfn +#undef clAmdBlasCgeru +#define clAmdBlasCgeru clAmdBlasCgeru_pfn +#undef clAmdBlasZgeru +#define clAmdBlasZgeru clAmdBlasZgeru_pfn +#undef clAmdBlasCgerc +#define clAmdBlasCgerc clAmdBlasCgerc_pfn +#undef clAmdBlasZgerc +#define clAmdBlasZgerc clAmdBlasZgerc_pfn +#undef clAmdBlasSsyr +#define clAmdBlasSsyr clAmdBlasSsyr_pfn +#undef clAmdBlasDsyr +#define clAmdBlasDsyr clAmdBlasDsyr_pfn +#undef clAmdBlasCher +#define clAmdBlasCher clAmdBlasCher_pfn +#undef clAmdBlasZher +#define clAmdBlasZher clAmdBlasZher_pfn +#undef clAmdBlasSsyr2 +#define clAmdBlasSsyr2 clAmdBlasSsyr2_pfn +#undef clAmdBlasDsyr2 +#define clAmdBlasDsyr2 clAmdBlasDsyr2_pfn +#undef clAmdBlasCher2 +#define clAmdBlasCher2 clAmdBlasCher2_pfn +#undef clAmdBlasZher2 +#define clAmdBlasZher2 clAmdBlasZher2_pfn +#undef clAmdBlasStpmv +#define clAmdBlasStpmv clAmdBlasStpmv_pfn +#undef clAmdBlasDtpmv +#define clAmdBlasDtpmv clAmdBlasDtpmv_pfn +#undef clAmdBlasCtpmv +#define clAmdBlasCtpmv clAmdBlasCtpmv_pfn +#undef clAmdBlasZtpmv +#define clAmdBlasZtpmv clAmdBlasZtpmv_pfn +#undef clAmdBlasStpsv +#define clAmdBlasStpsv clAmdBlasStpsv_pfn +#undef clAmdBlasDtpsv +#define clAmdBlasDtpsv clAmdBlasDtpsv_pfn +#undef clAmdBlasCtpsv +#define clAmdBlasCtpsv clAmdBlasCtpsv_pfn +#undef clAmdBlasZtpsv +#define clAmdBlasZtpsv clAmdBlasZtpsv_pfn +#undef clAmdBlasSspmv +#define clAmdBlasSspmv clAmdBlasSspmv_pfn +#undef clAmdBlasDspmv +#define clAmdBlasDspmv clAmdBlasDspmv_pfn +#undef clAmdBlasChpmv +#define clAmdBlasChpmv clAmdBlasChpmv_pfn +#undef clAmdBlasZhpmv +#define clAmdBlasZhpmv clAmdBlasZhpmv_pfn +#undef clAmdBlasSspr +#define clAmdBlasSspr clAmdBlasSspr_pfn +#undef clAmdBlasDspr +#define clAmdBlasDspr clAmdBlasDspr_pfn +#undef clAmdBlasChpr +#define clAmdBlasChpr clAmdBlasChpr_pfn +#undef clAmdBlasZhpr +#define clAmdBlasZhpr clAmdBlasZhpr_pfn +#undef clAmdBlasSspr2 +#define clAmdBlasSspr2 clAmdBlasSspr2_pfn +#undef clAmdBlasDspr2 +#define clAmdBlasDspr2 clAmdBlasDspr2_pfn +#undef clAmdBlasChpr2 +#define clAmdBlasChpr2 clAmdBlasChpr2_pfn +#undef clAmdBlasZhpr2 +#define clAmdBlasZhpr2 clAmdBlasZhpr2_pfn +#undef clAmdBlasSgbmv +#define clAmdBlasSgbmv clAmdBlasSgbmv_pfn +#undef clAmdBlasDgbmv +#define clAmdBlasDgbmv clAmdBlasDgbmv_pfn +#undef clAmdBlasCgbmv +#define clAmdBlasCgbmv clAmdBlasCgbmv_pfn +#undef clAmdBlasZgbmv +#define clAmdBlasZgbmv clAmdBlasZgbmv_pfn +#undef clAmdBlasStbmv +#define clAmdBlasStbmv clAmdBlasStbmv_pfn +#undef clAmdBlasDtbmv +#define clAmdBlasDtbmv clAmdBlasDtbmv_pfn +#undef clAmdBlasCtbmv +#define clAmdBlasCtbmv clAmdBlasCtbmv_pfn +#undef clAmdBlasZtbmv +#define clAmdBlasZtbmv clAmdBlasZtbmv_pfn +#undef clAmdBlasSsbmv +#define clAmdBlasSsbmv clAmdBlasSsbmv_pfn +#undef clAmdBlasDsbmv +#define clAmdBlasDsbmv clAmdBlasDsbmv_pfn +#undef clAmdBlasChbmv +#define clAmdBlasChbmv clAmdBlasChbmv_pfn +#undef clAmdBlasZhbmv +#define clAmdBlasZhbmv clAmdBlasZhbmv_pfn +#undef clAmdBlasStbsv +#define clAmdBlasStbsv clAmdBlasStbsv_pfn +#undef clAmdBlasDtbsv +#define clAmdBlasDtbsv clAmdBlasDtbsv_pfn +#undef clAmdBlasCtbsv +#define clAmdBlasCtbsv clAmdBlasCtbsv_pfn +#undef clAmdBlasZtbsv +#define clAmdBlasZtbsv clAmdBlasZtbsv_pfn +#undef clAmdBlasSgemm +#define clAmdBlasSgemm clAmdBlasSgemm_pfn +#undef clAmdBlasDgemm +#define clAmdBlasDgemm clAmdBlasDgemm_pfn +#undef clAmdBlasCgemm +#define clAmdBlasCgemm clAmdBlasCgemm_pfn +#undef clAmdBlasZgemm +#define clAmdBlasZgemm clAmdBlasZgemm_pfn +#undef clAmdBlasSgemmEx +#define clAmdBlasSgemmEx clAmdBlasSgemmEx_pfn +#undef clAmdBlasDgemmEx +#define clAmdBlasDgemmEx clAmdBlasDgemmEx_pfn +#undef clAmdBlasCgemmEx +#define clAmdBlasCgemmEx clAmdBlasCgemmEx_pfn +#undef clAmdBlasZgemmEx +#define clAmdBlasZgemmEx clAmdBlasZgemmEx_pfn +#undef clAmdBlasStrmm +#define clAmdBlasStrmm clAmdBlasStrmm_pfn +#undef clAmdBlasDtrmm +#define clAmdBlasDtrmm clAmdBlasDtrmm_pfn +#undef clAmdBlasCtrmm +#define clAmdBlasCtrmm clAmdBlasCtrmm_pfn +#undef clAmdBlasZtrmm +#define clAmdBlasZtrmm clAmdBlasZtrmm_pfn +#undef clAmdBlasStrmmEx +#define clAmdBlasStrmmEx clAmdBlasStrmmEx_pfn +#undef clAmdBlasDtrmmEx +#define clAmdBlasDtrmmEx clAmdBlasDtrmmEx_pfn +#undef clAmdBlasCtrmmEx +#define clAmdBlasCtrmmEx clAmdBlasCtrmmEx_pfn +#undef clAmdBlasZtrmmEx +#define clAmdBlasZtrmmEx clAmdBlasZtrmmEx_pfn +#undef clAmdBlasStrsm +#define clAmdBlasStrsm clAmdBlasStrsm_pfn +#undef clAmdBlasDtrsm +#define clAmdBlasDtrsm clAmdBlasDtrsm_pfn +#undef clAmdBlasCtrsm +#define clAmdBlasCtrsm clAmdBlasCtrsm_pfn +#undef clAmdBlasZtrsm +#define clAmdBlasZtrsm clAmdBlasZtrsm_pfn +#undef clAmdBlasStrsmEx +#define clAmdBlasStrsmEx clAmdBlasStrsmEx_pfn +#undef clAmdBlasDtrsmEx +#define clAmdBlasDtrsmEx clAmdBlasDtrsmEx_pfn +#undef clAmdBlasCtrsmEx +#define clAmdBlasCtrsmEx clAmdBlasCtrsmEx_pfn +#undef clAmdBlasZtrsmEx +#define clAmdBlasZtrsmEx clAmdBlasZtrsmEx_pfn +#undef clAmdBlasSsyrk +#define clAmdBlasSsyrk clAmdBlasSsyrk_pfn +#undef clAmdBlasDsyrk +#define clAmdBlasDsyrk clAmdBlasDsyrk_pfn +#undef clAmdBlasCsyrk +#define clAmdBlasCsyrk clAmdBlasCsyrk_pfn +#undef clAmdBlasZsyrk +#define clAmdBlasZsyrk clAmdBlasZsyrk_pfn +#undef clAmdBlasSsyrkEx +#define clAmdBlasSsyrkEx clAmdBlasSsyrkEx_pfn +#undef clAmdBlasDsyrkEx +#define clAmdBlasDsyrkEx clAmdBlasDsyrkEx_pfn +#undef clAmdBlasCsyrkEx +#define clAmdBlasCsyrkEx clAmdBlasCsyrkEx_pfn +#undef clAmdBlasZsyrkEx +#define clAmdBlasZsyrkEx clAmdBlasZsyrkEx_pfn +#undef clAmdBlasSsyr2k +#define clAmdBlasSsyr2k clAmdBlasSsyr2k_pfn +#undef clAmdBlasDsyr2k +#define clAmdBlasDsyr2k clAmdBlasDsyr2k_pfn +#undef clAmdBlasCsyr2k +#define clAmdBlasCsyr2k clAmdBlasCsyr2k_pfn +#undef clAmdBlasZsyr2k +#define clAmdBlasZsyr2k clAmdBlasZsyr2k_pfn +#undef clAmdBlasSsyr2kEx +#define clAmdBlasSsyr2kEx clAmdBlasSsyr2kEx_pfn +#undef clAmdBlasDsyr2kEx +#define clAmdBlasDsyr2kEx clAmdBlasDsyr2kEx_pfn +#undef clAmdBlasCsyr2kEx +#define clAmdBlasCsyr2kEx clAmdBlasCsyr2kEx_pfn +#undef clAmdBlasZsyr2kEx +#define clAmdBlasZsyr2kEx clAmdBlasZsyr2kEx_pfn +#undef clAmdBlasSsymm +#define clAmdBlasSsymm clAmdBlasSsymm_pfn +#undef clAmdBlasDsymm +#define clAmdBlasDsymm clAmdBlasDsymm_pfn +#undef clAmdBlasCsymm +#define clAmdBlasCsymm clAmdBlasCsymm_pfn +#undef clAmdBlasZsymm +#define clAmdBlasZsymm clAmdBlasZsymm_pfn +#undef clAmdBlasChemm +#define clAmdBlasChemm clAmdBlasChemm_pfn +#undef clAmdBlasZhemm +#define clAmdBlasZhemm clAmdBlasZhemm_pfn +#undef clAmdBlasCherk +#define clAmdBlasCherk clAmdBlasCherk_pfn +#undef clAmdBlasZherk +#define clAmdBlasZherk clAmdBlasZherk_pfn +#undef clAmdBlasCher2k +#define clAmdBlasCher2k clAmdBlasCher2k_pfn +#undef clAmdBlasZher2k +#define clAmdBlasZher2k clAmdBlasZher2k_pfn + +#ifndef CL_RUNTIME_EXPORT +#if (defined(BUILD_SHARED_LIBS) || defined(OPENCV_OCL_SHARED)) && (defined WIN32 || defined _WIN32 || defined WINCE) +#define CL_RUNTIME_EXPORT __declspec(dllimport) +#else +#define CL_RUNTIME_EXPORT +#endif +#endif + + +// generated by parser_clamdblas.py +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasGetVersion)(cl_uint* major, cl_uint* minor, cl_uint* patch); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSetup)(); +extern CL_RUNTIME_EXPORT void (*clAmdBlasTeardown)(); +extern CL_RUNTIME_EXPORT cl_ulong (*clAmdBlasAddScratchImage)(cl_context context, size_t width, size_t height, clAmdBlasStatus* status); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasRemoveScratchImage)(cl_ulong imageID); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSscal)(size_t N, cl_float alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDscal)(size_t N, cl_double alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCscal)(size_t N, cl_float2 alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZscal)(size_t N, cl_double2 alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsscal)(size_t N, cl_float alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZdscal)(size_t N, cl_double alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasScopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSaxpy)(size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDaxpy)(size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCaxpy)(size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZaxpy)(size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSdot)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDdot)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCdotu)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZdotu)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCdotc)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZdotc)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSrotg)(cl_mem SA, size_t offSA, cl_mem SB, size_t offSB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDrotg)(cl_mem DA, size_t offDA, cl_mem DB, size_t offDB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCrotg)(cl_mem CA, size_t offCA, cl_mem CB, size_t offCB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZrotg)(cl_mem CA, size_t offCA, cl_mem CB, size_t offCB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSrotmg)(cl_mem SD1, size_t offSD1, cl_mem SD2, size_t offSD2, cl_mem SX1, size_t offSX1, const cl_mem SY1, size_t offSY1, cl_mem SPARAM, size_t offSparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDrotmg)(cl_mem DD1, size_t offDD1, cl_mem DD2, size_t offDD2, cl_mem DX1, size_t offDX1, const cl_mem DY1, size_t offDY1, cl_mem DPARAM, size_t offDparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_float C, cl_float S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_double C, cl_double S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_float C, cl_float S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZdrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_double C, cl_double S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSrotm)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, const cl_mem SPARAM, size_t offSparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDrotm)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, const cl_mem DPARAM, size_t offDparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasScnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDznrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasiSamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasiDamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasiCamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasiZamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasScasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDzasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgemv)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgemv)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgemv)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, FloatComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgemv)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, DoubleComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgemvEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgemvEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgemvEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, FloatComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgemvEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, DoubleComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsymv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsymv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsymvEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsymvEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChemv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, FloatComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, FloatComplex beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhemv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, DoubleComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, DoubleComplex beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSger)(clAmdBlasOrder order, size_t M, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDger)(clAmdBlasOrder order, size_t M, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgeru)(clAmdBlasOrder order, size_t M, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgeru)(clAmdBlasOrder order, size_t M, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgerc)(clAmdBlasOrder order, size_t M, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgerc)(clAmdBlasOrder order, size_t M, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCher)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZher)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCher2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZher2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStpsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtpsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtpsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtpsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSspmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDspmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float2 alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double2 alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSspr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDspr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChpr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhpr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSspr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDspr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChpr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhpr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgbmv)(clAmdBlasOrder order, clAmdBlasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgbmv)(clAmdBlasOrder order, clAmdBlasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgbmv)(clAmdBlasOrder order, clAmdBlasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgbmv)(clAmdBlasOrder order, clAmdBlasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, size_t K, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, size_t K, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStbsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtbsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtbsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtbsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgemm)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, cl_float beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgemm)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, cl_double beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgemm)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, FloatComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgemm)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, DoubleComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgemmEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgemmEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgemmEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgemmEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrmm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrmm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrmm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrmm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrmmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrmmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrmmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrmmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrsm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrsm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrsm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrsm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrsmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrsmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrsmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrsmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyrk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t lda, cl_float beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyrk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t lda, cl_double beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsyrk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t lda, FloatComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsyrk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t lda, DoubleComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyrkEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyrkEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsyrkEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsyrkEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyr2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, cl_float beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyr2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, cl_double beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsyr2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, FloatComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsyr2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, DoubleComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyr2kEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyr2kEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsyr2kEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsyr2kEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsymm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsymm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsymm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsymm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChemm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhemm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCherk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, float alpha, const cl_mem A, size_t offa, size_t lda, float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZherk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, double alpha, const cl_mem A, size_t offa, size_t lda, double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCher2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZher2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); + +#endif + +#endif // __OPENCV_OCL_CLAMDBLAS_RUNTIME_HPP__ diff --git a/modules/ocl/include/opencv2/ocl/cl_runtime/clamdfft_runtime.hpp b/modules/ocl/include/opencv2/ocl/cl_runtime/clamdfft_runtime.hpp new file mode 100644 index 0000000000..7145f86046 --- /dev/null +++ b/modules/ocl/include/opencv2/ocl/cl_runtime/clamdfft_runtime.hpp @@ -0,0 +1,156 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef __OPENCV_OCL_CLAMDFFT_RUNTIME_HPP__ +#define __OPENCV_OCL_CLAMDFFT_RUNTIME_HPP__ + +#ifdef HAVE_CLAMDFFT + +// generated by parser_clamdfft.py +#define clAmdFftSetup clAmdFftSetup_ +#define clAmdFftTeardown clAmdFftTeardown_ +#define clAmdFftGetVersion clAmdFftGetVersion_ +#define clAmdFftCreateDefaultPlan clAmdFftCreateDefaultPlan_ +#define clAmdFftCopyPlan clAmdFftCopyPlan_ +#define clAmdFftBakePlan clAmdFftBakePlan_ +#define clAmdFftDestroyPlan clAmdFftDestroyPlan_ +#define clAmdFftGetPlanContext clAmdFftGetPlanContext_ +#define clAmdFftGetPlanPrecision clAmdFftGetPlanPrecision_ +#define clAmdFftSetPlanPrecision clAmdFftSetPlanPrecision_ +#define clAmdFftGetPlanScale clAmdFftGetPlanScale_ +#define clAmdFftSetPlanScale clAmdFftSetPlanScale_ +#define clAmdFftGetPlanBatchSize clAmdFftGetPlanBatchSize_ +#define clAmdFftSetPlanBatchSize clAmdFftSetPlanBatchSize_ +#define clAmdFftGetPlanDim clAmdFftGetPlanDim_ +#define clAmdFftSetPlanDim clAmdFftSetPlanDim_ +#define clAmdFftGetPlanLength clAmdFftGetPlanLength_ +#define clAmdFftSetPlanLength clAmdFftSetPlanLength_ +#define clAmdFftGetPlanInStride clAmdFftGetPlanInStride_ +#define clAmdFftSetPlanInStride clAmdFftSetPlanInStride_ +#define clAmdFftGetPlanOutStride clAmdFftGetPlanOutStride_ +#define clAmdFftSetPlanOutStride clAmdFftSetPlanOutStride_ +#define clAmdFftGetPlanDistance clAmdFftGetPlanDistance_ +#define clAmdFftSetPlanDistance clAmdFftSetPlanDistance_ +#define clAmdFftGetLayout clAmdFftGetLayout_ +#define clAmdFftSetLayout clAmdFftSetLayout_ +#define clAmdFftGetResultLocation clAmdFftGetResultLocation_ +#define clAmdFftSetResultLocation clAmdFftSetResultLocation_ +#define clAmdFftGetPlanTransposeResult clAmdFftGetPlanTransposeResult_ +#define clAmdFftSetPlanTransposeResult clAmdFftSetPlanTransposeResult_ +#define clAmdFftGetTmpBufSize clAmdFftGetTmpBufSize_ +#define clAmdFftEnqueueTransform clAmdFftEnqueueTransform_ + +#include + +// generated by parser_clamdfft.py +#undef clAmdFftSetup +#define clAmdFftSetup clAmdFftSetup_pfn +#undef clAmdFftTeardown +#define clAmdFftTeardown clAmdFftTeardown_pfn +#undef clAmdFftGetVersion +#define clAmdFftGetVersion clAmdFftGetVersion_pfn +#undef clAmdFftCreateDefaultPlan +#define clAmdFftCreateDefaultPlan clAmdFftCreateDefaultPlan_pfn +#undef clAmdFftCopyPlan +#define clAmdFftCopyPlan clAmdFftCopyPlan_pfn +#undef clAmdFftBakePlan +#define clAmdFftBakePlan clAmdFftBakePlan_pfn +#undef clAmdFftDestroyPlan +#define clAmdFftDestroyPlan clAmdFftDestroyPlan_pfn +#undef clAmdFftGetPlanContext +#define clAmdFftGetPlanContext clAmdFftGetPlanContext_pfn +#undef clAmdFftGetPlanPrecision +#define clAmdFftGetPlanPrecision clAmdFftGetPlanPrecision_pfn +#undef clAmdFftSetPlanPrecision +#define clAmdFftSetPlanPrecision clAmdFftSetPlanPrecision_pfn +#undef clAmdFftGetPlanScale +#define clAmdFftGetPlanScale clAmdFftGetPlanScale_pfn +#undef clAmdFftSetPlanScale +#define clAmdFftSetPlanScale clAmdFftSetPlanScale_pfn +#undef clAmdFftGetPlanBatchSize +#define clAmdFftGetPlanBatchSize clAmdFftGetPlanBatchSize_pfn +#undef clAmdFftSetPlanBatchSize +#define clAmdFftSetPlanBatchSize clAmdFftSetPlanBatchSize_pfn +#undef clAmdFftGetPlanDim +#define clAmdFftGetPlanDim clAmdFftGetPlanDim_pfn +#undef clAmdFftSetPlanDim +#define clAmdFftSetPlanDim clAmdFftSetPlanDim_pfn +#undef clAmdFftGetPlanLength +#define clAmdFftGetPlanLength clAmdFftGetPlanLength_pfn +#undef clAmdFftSetPlanLength +#define clAmdFftSetPlanLength clAmdFftSetPlanLength_pfn +#undef clAmdFftGetPlanInStride +#define clAmdFftGetPlanInStride clAmdFftGetPlanInStride_pfn +#undef clAmdFftSetPlanInStride +#define clAmdFftSetPlanInStride clAmdFftSetPlanInStride_pfn +#undef clAmdFftGetPlanOutStride +#define clAmdFftGetPlanOutStride clAmdFftGetPlanOutStride_pfn +#undef clAmdFftSetPlanOutStride +#define clAmdFftSetPlanOutStride clAmdFftSetPlanOutStride_pfn +#undef clAmdFftGetPlanDistance +#define clAmdFftGetPlanDistance clAmdFftGetPlanDistance_pfn +#undef clAmdFftSetPlanDistance +#define clAmdFftSetPlanDistance clAmdFftSetPlanDistance_pfn +#undef clAmdFftGetLayout +#define clAmdFftGetLayout clAmdFftGetLayout_pfn +#undef clAmdFftSetLayout +#define clAmdFftSetLayout clAmdFftSetLayout_pfn +#undef clAmdFftGetResultLocation +#define clAmdFftGetResultLocation clAmdFftGetResultLocation_pfn +#undef clAmdFftSetResultLocation +#define clAmdFftSetResultLocation clAmdFftSetResultLocation_pfn +#undef clAmdFftGetPlanTransposeResult +#define clAmdFftGetPlanTransposeResult clAmdFftGetPlanTransposeResult_pfn +#undef clAmdFftSetPlanTransposeResult +#define clAmdFftSetPlanTransposeResult clAmdFftSetPlanTransposeResult_pfn +#undef clAmdFftGetTmpBufSize +#define clAmdFftGetTmpBufSize clAmdFftGetTmpBufSize_pfn +#undef clAmdFftEnqueueTransform +#define clAmdFftEnqueueTransform clAmdFftEnqueueTransform_pfn + +#ifndef CL_RUNTIME_EXPORT +#if (defined(BUILD_SHARED_LIBS) || defined(OPENCV_OCL_SHARED)) && (defined WIN32 || defined _WIN32 || defined WINCE) +#define CL_RUNTIME_EXPORT __declspec(dllimport) +#else +#define CL_RUNTIME_EXPORT +#endif +#endif + + +// generated by parser_clamdfft.py +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetup)(const clAmdFftSetupData* setupData); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftTeardown)(); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetVersion)(cl_uint* major, cl_uint* minor, cl_uint* patch); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftCreateDefaultPlan)(clAmdFftPlanHandle* plHandle, cl_context context, const clAmdFftDim dim, const size_t* clLengths); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftCopyPlan)(clAmdFftPlanHandle* out_plHandle, cl_context new_context, clAmdFftPlanHandle in_plHandle); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftBakePlan)(clAmdFftPlanHandle plHandle, cl_uint numQueues, cl_command_queue* commQueueFFT, void (CL_CALLBACK* pfn_notify) (clAmdFftPlanHandle plHandle, void* user_data), void* user_data); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftDestroyPlan)(clAmdFftPlanHandle* plHandle); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanContext)(const clAmdFftPlanHandle plHandle, cl_context* context); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanPrecision)(const clAmdFftPlanHandle plHandle, clAmdFftPrecision* precision); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanPrecision)(clAmdFftPlanHandle plHandle, clAmdFftPrecision precision); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanScale)(const clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float* scale); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanScale)(clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float scale); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanBatchSize)(const clAmdFftPlanHandle plHandle, size_t* batchSize); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanBatchSize)(clAmdFftPlanHandle plHandle, size_t batchSize); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanDim)(const clAmdFftPlanHandle plHandle, clAmdFftDim* dim, cl_uint* size); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanDim)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanLength)(const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clLengths); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanLength)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, const size_t* clLengths); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanInStride)(const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanInStride)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanOutStride)(const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanOutStride)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanDistance)(const clAmdFftPlanHandle plHandle, size_t* iDist, size_t* oDist); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanDistance)(clAmdFftPlanHandle plHandle, size_t iDist, size_t oDist); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetLayout)(const clAmdFftPlanHandle plHandle, clAmdFftLayout* iLayout, clAmdFftLayout* oLayout); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetLayout)(clAmdFftPlanHandle plHandle, clAmdFftLayout iLayout, clAmdFftLayout oLayout); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetResultLocation)(const clAmdFftPlanHandle plHandle, clAmdFftResultLocation* placeness); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetResultLocation)(clAmdFftPlanHandle plHandle, clAmdFftResultLocation placeness); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanTransposeResult)(const clAmdFftPlanHandle plHandle, clAmdFftResultTransposed* transposed); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanTransposeResult)(clAmdFftPlanHandle plHandle, clAmdFftResultTransposed transposed); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetTmpBufSize)(const clAmdFftPlanHandle plHandle, size_t* buffersize); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftEnqueueTransform)(clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_uint numQueuesAndEvents, cl_command_queue* commQueues, cl_uint numWaitEvents, const cl_event* waitEvents, cl_event* outEvents, cl_mem* inputBuffers, cl_mem* outputBuffers, cl_mem tmpBuffer); + +#endif + +#endif // __OPENCV_OCL_CLAMDFFT_RUNTIME_HPP__ diff --git a/modules/ocl/src/cl_runtime/cl_runtime_opencl11_impl.hpp b/modules/ocl/src/cl_runtime/cl_runtime_opencl11_impl.hpp new file mode 100644 index 0000000000..ef14696996 --- /dev/null +++ b/modules/ocl/src/cl_runtime/cl_runtime_opencl11_impl.hpp @@ -0,0 +1,435 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +// generated by parser_cl.py +enum OPENCL_FN_ID { + OPENCL_FN_clGetPlatformIDs = 0, + OPENCL_FN_clGetPlatformInfo, + OPENCL_FN_clGetDeviceIDs, + OPENCL_FN_clGetDeviceInfo, + OPENCL_FN_clCreateContext, + OPENCL_FN_clCreateContextFromType, + OPENCL_FN_clRetainContext, + OPENCL_FN_clReleaseContext, + OPENCL_FN_clGetContextInfo, + OPENCL_FN_clCreateCommandQueue, + OPENCL_FN_clRetainCommandQueue, + OPENCL_FN_clReleaseCommandQueue, + OPENCL_FN_clGetCommandQueueInfo, + OPENCL_FN_clSetCommandQueueProperty, + OPENCL_FN_clCreateBuffer, + OPENCL_FN_clCreateSubBuffer, + OPENCL_FN_clCreateImage2D, + OPENCL_FN_clCreateImage3D, + OPENCL_FN_clRetainMemObject, + OPENCL_FN_clReleaseMemObject, + OPENCL_FN_clGetSupportedImageFormats, + OPENCL_FN_clGetMemObjectInfo, + OPENCL_FN_clGetImageInfo, + OPENCL_FN_clSetMemObjectDestructorCallback, + OPENCL_FN_clCreateSampler, + OPENCL_FN_clRetainSampler, + OPENCL_FN_clReleaseSampler, + OPENCL_FN_clGetSamplerInfo, + OPENCL_FN_clCreateProgramWithSource, + OPENCL_FN_clCreateProgramWithBinary, + OPENCL_FN_clRetainProgram, + OPENCL_FN_clReleaseProgram, + OPENCL_FN_clBuildProgram, + OPENCL_FN_clUnloadCompiler, + OPENCL_FN_clGetProgramInfo, + OPENCL_FN_clGetProgramBuildInfo, + OPENCL_FN_clCreateKernel, + OPENCL_FN_clCreateKernelsInProgram, + OPENCL_FN_clRetainKernel, + OPENCL_FN_clReleaseKernel, + OPENCL_FN_clSetKernelArg, + OPENCL_FN_clGetKernelInfo, + OPENCL_FN_clGetKernelWorkGroupInfo, + OPENCL_FN_clWaitForEvents, + OPENCL_FN_clGetEventInfo, + OPENCL_FN_clCreateUserEvent, + OPENCL_FN_clRetainEvent, + OPENCL_FN_clReleaseEvent, + OPENCL_FN_clSetUserEventStatus, + OPENCL_FN_clSetEventCallback, + OPENCL_FN_clGetEventProfilingInfo, + OPENCL_FN_clFlush, + OPENCL_FN_clFinish, + OPENCL_FN_clEnqueueReadBuffer, + OPENCL_FN_clEnqueueReadBufferRect, + OPENCL_FN_clEnqueueWriteBuffer, + OPENCL_FN_clEnqueueWriteBufferRect, + OPENCL_FN_clEnqueueCopyBuffer, + OPENCL_FN_clEnqueueCopyBufferRect, + OPENCL_FN_clEnqueueReadImage, + OPENCL_FN_clEnqueueWriteImage, + OPENCL_FN_clEnqueueCopyImage, + OPENCL_FN_clEnqueueCopyImageToBuffer, + OPENCL_FN_clEnqueueCopyBufferToImage, + OPENCL_FN_clEnqueueMapBuffer, + OPENCL_FN_clEnqueueMapImage, + OPENCL_FN_clEnqueueUnmapMemObject, + OPENCL_FN_clEnqueueNDRangeKernel, + OPENCL_FN_clEnqueueTask, + OPENCL_FN_clEnqueueNativeKernel, + OPENCL_FN_clEnqueueMarker, + OPENCL_FN_clEnqueueWaitForEvents, + OPENCL_FN_clEnqueueBarrier, + OPENCL_FN_clGetExtensionFunctionAddress, +}; +// generated by parser_cl.py +const char* opencl_fn_names[] = { + "clGetPlatformIDs", + "clGetPlatformInfo", + "clGetDeviceIDs", + "clGetDeviceInfo", + "clCreateContext", + "clCreateContextFromType", + "clRetainContext", + "clReleaseContext", + "clGetContextInfo", + "clCreateCommandQueue", + "clRetainCommandQueue", + "clReleaseCommandQueue", + "clGetCommandQueueInfo", + "clSetCommandQueueProperty", + "clCreateBuffer", + "clCreateSubBuffer", + "clCreateImage2D", + "clCreateImage3D", + "clRetainMemObject", + "clReleaseMemObject", + "clGetSupportedImageFormats", + "clGetMemObjectInfo", + "clGetImageInfo", + "clSetMemObjectDestructorCallback", + "clCreateSampler", + "clRetainSampler", + "clReleaseSampler", + "clGetSamplerInfo", + "clCreateProgramWithSource", + "clCreateProgramWithBinary", + "clRetainProgram", + "clReleaseProgram", + "clBuildProgram", + "clUnloadCompiler", + "clGetProgramInfo", + "clGetProgramBuildInfo", + "clCreateKernel", + "clCreateKernelsInProgram", + "clRetainKernel", + "clReleaseKernel", + "clSetKernelArg", + "clGetKernelInfo", + "clGetKernelWorkGroupInfo", + "clWaitForEvents", + "clGetEventInfo", + "clCreateUserEvent", + "clRetainEvent", + "clReleaseEvent", + "clSetUserEventStatus", + "clSetEventCallback", + "clGetEventProfilingInfo", + "clFlush", + "clFinish", + "clEnqueueReadBuffer", + "clEnqueueReadBufferRect", + "clEnqueueWriteBuffer", + "clEnqueueWriteBufferRect", + "clEnqueueCopyBuffer", + "clEnqueueCopyBufferRect", + "clEnqueueReadImage", + "clEnqueueWriteImage", + "clEnqueueCopyImage", + "clEnqueueCopyImageToBuffer", + "clEnqueueCopyBufferToImage", + "clEnqueueMapBuffer", + "clEnqueueMapImage", + "clEnqueueUnmapMemObject", + "clEnqueueNDRangeKernel", + "clEnqueueTask", + "clEnqueueNativeKernel", + "clEnqueueMarker", + "clEnqueueWaitForEvents", + "clEnqueueBarrier", + "clGetExtensionFunctionAddress", +}; + +namespace { +// generated by parser_cl.py +template +struct opencl_fn0 +{ + typedef _R (*FN)(); + static _R switch_fn() + { return ((FN)opencl_check_fn(ID))(); } +}; + +template +struct opencl_fn1 +{ + typedef _R (*FN)(_T1); + static _R switch_fn(_T1 p1) + { return ((FN)opencl_check_fn(ID))(p1); } +}; + +template +struct opencl_fn2 +{ + typedef _R (*FN)(_T1, _T2); + static _R switch_fn(_T1 p1, _T2 p2) + { return ((FN)opencl_check_fn(ID))(p1, p2); } +}; + +template +struct opencl_fn3 +{ + typedef _R (*FN)(_T1, _T2, _T3); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3) + { return ((FN)opencl_check_fn(ID))(p1, p2, p3); } +}; + +template +struct opencl_fn4 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4) + { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4); } +}; + +template +struct opencl_fn5 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5) + { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5); } +}; + +template +struct opencl_fn6 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6) + { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6); } +}; + +template +struct opencl_fn7 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7) + { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7); } +}; + +template +struct opencl_fn8 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8) + { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8); } +}; + +template +struct opencl_fn9 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9) + { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9); } +}; + +template +struct opencl_fn10 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10) + { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); } +}; + +template +struct opencl_fn11 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11) + { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11); } +}; + +template +struct opencl_fn12 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12) + { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12); } +}; + +template +struct opencl_fn13 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13) + { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); } +}; + +template +struct opencl_fn14 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13, _T14); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13, _T14 p14) + { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14); } +}; + +} + +// generated by parser_cl.py +cl_int (*clGetPlatformIDs)(cl_uint, cl_platform_id*, cl_uint*) = opencl_fn3::switch_fn; +cl_int (*clGetPlatformInfo)(cl_platform_id, cl_platform_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (*clGetDeviceIDs)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*) = opencl_fn5::switch_fn; +cl_int (*clGetDeviceInfo)(cl_device_id, cl_device_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_context (*clCreateContext)(const cl_context_properties*, cl_uint, const cl_device_id*, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*) = opencl_fn6::switch_fn; +cl_context (*clCreateContextFromType)(const cl_context_properties*, cl_device_type, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*) = opencl_fn5::switch_fn; +cl_int (*clRetainContext)(cl_context) = opencl_fn1::switch_fn; +cl_int (*clReleaseContext)(cl_context) = opencl_fn1::switch_fn; +cl_int (*clGetContextInfo)(cl_context, cl_context_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_command_queue (*clCreateCommandQueue)(cl_context, cl_device_id, cl_command_queue_properties, cl_int*) = opencl_fn4::switch_fn; +cl_int (*clRetainCommandQueue)(cl_command_queue) = opencl_fn1::switch_fn; +cl_int (*clReleaseCommandQueue)(cl_command_queue) = opencl_fn1::switch_fn; +cl_int (*clGetCommandQueueInfo)(cl_command_queue, cl_command_queue_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (*clSetCommandQueueProperty)(cl_command_queue, cl_command_queue_properties, cl_bool, cl_command_queue_properties*) = opencl_fn4::switch_fn; +cl_mem (*clCreateBuffer)(cl_context, cl_mem_flags, size_t, void*, cl_int*) = opencl_fn5::switch_fn; +cl_mem (*clCreateSubBuffer)(cl_mem, cl_mem_flags, cl_buffer_create_type, const void*, cl_int*) = opencl_fn5::switch_fn; +cl_mem (*clCreateImage2D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, void*, cl_int*) = opencl_fn8::switch_fn; +cl_mem (*clCreateImage3D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, size_t, size_t, void*, cl_int*) = opencl_fn10::switch_fn; +cl_int (*clRetainMemObject)(cl_mem) = opencl_fn1::switch_fn; +cl_int (*clReleaseMemObject)(cl_mem) = opencl_fn1::switch_fn; +cl_int (*clGetSupportedImageFormats)(cl_context, cl_mem_flags, cl_mem_object_type, cl_uint, cl_image_format*, cl_uint*) = opencl_fn6::switch_fn; +cl_int (*clGetMemObjectInfo)(cl_mem, cl_mem_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (*clGetImageInfo)(cl_mem, cl_image_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (*clSetMemObjectDestructorCallback)(cl_mem, void (CL_CALLBACK*) (cl_mem, void*), void*) = opencl_fn3::switch_fn; +cl_sampler (*clCreateSampler)(cl_context, cl_bool, cl_addressing_mode, cl_filter_mode, cl_int*) = opencl_fn5::switch_fn; +cl_int (*clRetainSampler)(cl_sampler) = opencl_fn1::switch_fn; +cl_int (*clReleaseSampler)(cl_sampler) = opencl_fn1::switch_fn; +cl_int (*clGetSamplerInfo)(cl_sampler, cl_sampler_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_program (*clCreateProgramWithSource)(cl_context, cl_uint, const char**, const size_t*, cl_int*) = opencl_fn5::switch_fn; +cl_program (*clCreateProgramWithBinary)(cl_context, cl_uint, const cl_device_id*, const size_t*, const unsigned char**, cl_int*, cl_int*) = opencl_fn7::switch_fn; +cl_int (*clRetainProgram)(cl_program) = opencl_fn1::switch_fn; +cl_int (*clReleaseProgram)(cl_program) = opencl_fn1::switch_fn; +cl_int (*clBuildProgram)(cl_program, cl_uint, const cl_device_id*, const char*, void (CL_CALLBACK*) (cl_program, void*), void*) = opencl_fn6::switch_fn; +cl_int (*clUnloadCompiler)() = opencl_fn0::switch_fn; +cl_int (*clGetProgramInfo)(cl_program, cl_program_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (*clGetProgramBuildInfo)(cl_program, cl_device_id, cl_program_build_info, size_t, void*, size_t*) = opencl_fn6::switch_fn; +cl_kernel (*clCreateKernel)(cl_program, const char*, cl_int*) = opencl_fn3::switch_fn; +cl_int (*clCreateKernelsInProgram)(cl_program, cl_uint, cl_kernel*, cl_uint*) = opencl_fn4::switch_fn; +cl_int (*clRetainKernel)(cl_kernel) = opencl_fn1::switch_fn; +cl_int (*clReleaseKernel)(cl_kernel) = opencl_fn1::switch_fn; +cl_int (*clSetKernelArg)(cl_kernel, cl_uint, size_t, const void*) = opencl_fn4::switch_fn; +cl_int (*clGetKernelInfo)(cl_kernel, cl_kernel_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (*clGetKernelWorkGroupInfo)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void*, size_t*) = opencl_fn6::switch_fn; +cl_int (*clWaitForEvents)(cl_uint, const cl_event*) = opencl_fn2::switch_fn; +cl_int (*clGetEventInfo)(cl_event, cl_event_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_event (*clCreateUserEvent)(cl_context, cl_int*) = opencl_fn2::switch_fn; +cl_int (*clRetainEvent)(cl_event) = opencl_fn1::switch_fn; +cl_int (*clReleaseEvent)(cl_event) = opencl_fn1::switch_fn; +cl_int (*clSetUserEventStatus)(cl_event, cl_int) = opencl_fn2::switch_fn; +cl_int (*clSetEventCallback)(cl_event, cl_int, void (CL_CALLBACK*) (cl_event, cl_int, void*), void*) = opencl_fn4::switch_fn; +cl_int (*clGetEventProfilingInfo)(cl_event, cl_profiling_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (*clFlush)(cl_command_queue) = opencl_fn1::switch_fn; +cl_int (*clFinish)(cl_command_queue) = opencl_fn1::switch_fn; +cl_int (*clEnqueueReadBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (*clEnqueueReadBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*) = opencl_fn14::switch_fn; +cl_int (*clEnqueueWriteBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (*clEnqueueWriteBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*) = opencl_fn14::switch_fn; +cl_int (*clEnqueueCopyBuffer)(cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (*clEnqueueCopyBufferRect)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*) = opencl_fn13::switch_fn; +cl_int (*clEnqueueReadImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*) = opencl_fn11::switch_fn; +cl_int (*clEnqueueWriteImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*) = opencl_fn11::switch_fn; +cl_int (*clEnqueueCopyImage)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (*clEnqueueCopyImageToBuffer)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, size_t, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (*clEnqueueCopyBufferToImage)(cl_command_queue, cl_mem, cl_mem, size_t, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +void* (*clEnqueueMapBuffer)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t, size_t, cl_uint, const cl_event*, cl_event*, cl_int*) = opencl_fn10::switch_fn; +void* (*clEnqueueMapImage)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, const size_t*, const size_t*, size_t*, size_t*, cl_uint, const cl_event*, cl_event*, cl_int*) = opencl_fn12::switch_fn; +cl_int (*clEnqueueUnmapMemObject)(cl_command_queue, cl_mem, void*, cl_uint, const cl_event*, cl_event*) = opencl_fn6::switch_fn; +cl_int (*clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (*clEnqueueTask)(cl_command_queue, cl_kernel, cl_uint, const cl_event*, cl_event*) = opencl_fn5::switch_fn; +cl_int (*clEnqueueNativeKernel)(cl_command_queue, void (CL_CALLBACK*) (void*), void*, size_t, cl_uint, const cl_mem*, const void**, cl_uint, const cl_event*, cl_event*) = opencl_fn10::switch_fn; +cl_int (*clEnqueueMarker)(cl_command_queue, cl_event*) = opencl_fn2::switch_fn; +cl_int (*clEnqueueWaitForEvents)(cl_command_queue, cl_uint, const cl_event*) = opencl_fn3::switch_fn; +cl_int (*clEnqueueBarrier)(cl_command_queue) = opencl_fn1::switch_fn; +void* (*clGetExtensionFunctionAddress)(const char*) = opencl_fn1::switch_fn; + +// generated by parser_cl.py +void* opencl_fn_ptrs[] = { + &clGetPlatformIDs, + &clGetPlatformInfo, + &clGetDeviceIDs, + &clGetDeviceInfo, + &clCreateContext, + &clCreateContextFromType, + &clRetainContext, + &clReleaseContext, + &clGetContextInfo, + &clCreateCommandQueue, + &clRetainCommandQueue, + &clReleaseCommandQueue, + &clGetCommandQueueInfo, + &clSetCommandQueueProperty, + &clCreateBuffer, + &clCreateSubBuffer, + &clCreateImage2D, + &clCreateImage3D, + &clRetainMemObject, + &clReleaseMemObject, + &clGetSupportedImageFormats, + &clGetMemObjectInfo, + &clGetImageInfo, + &clSetMemObjectDestructorCallback, + &clCreateSampler, + &clRetainSampler, + &clReleaseSampler, + &clGetSamplerInfo, + &clCreateProgramWithSource, + &clCreateProgramWithBinary, + &clRetainProgram, + &clReleaseProgram, + &clBuildProgram, + &clUnloadCompiler, + &clGetProgramInfo, + &clGetProgramBuildInfo, + &clCreateKernel, + &clCreateKernelsInProgram, + &clRetainKernel, + &clReleaseKernel, + &clSetKernelArg, + &clGetKernelInfo, + &clGetKernelWorkGroupInfo, + &clWaitForEvents, + &clGetEventInfo, + &clCreateUserEvent, + &clRetainEvent, + &clReleaseEvent, + &clSetUserEventStatus, + &clSetEventCallback, + &clGetEventProfilingInfo, + &clFlush, + &clFinish, + &clEnqueueReadBuffer, + &clEnqueueReadBufferRect, + &clEnqueueWriteBuffer, + &clEnqueueWriteBufferRect, + &clEnqueueCopyBuffer, + &clEnqueueCopyBufferRect, + &clEnqueueReadImage, + &clEnqueueWriteImage, + &clEnqueueCopyImage, + &clEnqueueCopyImageToBuffer, + &clEnqueueCopyBufferToImage, + &clEnqueueMapBuffer, + &clEnqueueMapImage, + &clEnqueueUnmapMemObject, + &clEnqueueNDRangeKernel, + &clEnqueueTask, + &clEnqueueNativeKernel, + &clEnqueueMarker, + &clEnqueueWaitForEvents, + &clEnqueueBarrier, + &clGetExtensionFunctionAddress, +}; diff --git a/modules/ocl/src/cl_runtime/cl_runtime_opencl12_impl.hpp b/modules/ocl/src/cl_runtime/cl_runtime_opencl12_impl.hpp new file mode 100644 index 0000000000..f6f3e957a1 --- /dev/null +++ b/modules/ocl/src/cl_runtime/cl_runtime_opencl12_impl.hpp @@ -0,0 +1,491 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +// generated by parser_cl.py +enum OPENCL_FN_ID { + OPENCL_FN_clGetPlatformIDs = 0, + OPENCL_FN_clGetPlatformInfo, + OPENCL_FN_clGetDeviceIDs, + OPENCL_FN_clGetDeviceInfo, + OPENCL_FN_clCreateSubDevices, + OPENCL_FN_clRetainDevice, + OPENCL_FN_clReleaseDevice, + OPENCL_FN_clCreateContext, + OPENCL_FN_clCreateContextFromType, + OPENCL_FN_clRetainContext, + OPENCL_FN_clReleaseContext, + OPENCL_FN_clGetContextInfo, + OPENCL_FN_clCreateCommandQueue, + OPENCL_FN_clRetainCommandQueue, + OPENCL_FN_clReleaseCommandQueue, + OPENCL_FN_clGetCommandQueueInfo, + OPENCL_FN_clCreateBuffer, + OPENCL_FN_clCreateSubBuffer, + OPENCL_FN_clCreateImage, + OPENCL_FN_clRetainMemObject, + OPENCL_FN_clReleaseMemObject, + OPENCL_FN_clGetSupportedImageFormats, + OPENCL_FN_clGetMemObjectInfo, + OPENCL_FN_clGetImageInfo, + OPENCL_FN_clSetMemObjectDestructorCallback, + OPENCL_FN_clCreateSampler, + OPENCL_FN_clRetainSampler, + OPENCL_FN_clReleaseSampler, + OPENCL_FN_clGetSamplerInfo, + OPENCL_FN_clCreateProgramWithSource, + OPENCL_FN_clCreateProgramWithBinary, + OPENCL_FN_clCreateProgramWithBuiltInKernels, + OPENCL_FN_clRetainProgram, + OPENCL_FN_clReleaseProgram, + OPENCL_FN_clBuildProgram, + OPENCL_FN_clCompileProgram, + OPENCL_FN_clLinkProgram, + OPENCL_FN_clUnloadPlatformCompiler, + OPENCL_FN_clGetProgramInfo, + OPENCL_FN_clGetProgramBuildInfo, + OPENCL_FN_clCreateKernel, + OPENCL_FN_clCreateKernelsInProgram, + OPENCL_FN_clRetainKernel, + OPENCL_FN_clReleaseKernel, + OPENCL_FN_clSetKernelArg, + OPENCL_FN_clGetKernelInfo, + OPENCL_FN_clGetKernelArgInfo, + OPENCL_FN_clGetKernelWorkGroupInfo, + OPENCL_FN_clWaitForEvents, + OPENCL_FN_clGetEventInfo, + OPENCL_FN_clCreateUserEvent, + OPENCL_FN_clRetainEvent, + OPENCL_FN_clReleaseEvent, + OPENCL_FN_clSetUserEventStatus, + OPENCL_FN_clSetEventCallback, + OPENCL_FN_clGetEventProfilingInfo, + OPENCL_FN_clFlush, + OPENCL_FN_clFinish, + OPENCL_FN_clEnqueueReadBuffer, + OPENCL_FN_clEnqueueReadBufferRect, + OPENCL_FN_clEnqueueWriteBuffer, + OPENCL_FN_clEnqueueWriteBufferRect, + OPENCL_FN_clEnqueueFillBuffer, + OPENCL_FN_clEnqueueCopyBuffer, + OPENCL_FN_clEnqueueCopyBufferRect, + OPENCL_FN_clEnqueueReadImage, + OPENCL_FN_clEnqueueWriteImage, + OPENCL_FN_clEnqueueFillImage, + OPENCL_FN_clEnqueueCopyImage, + OPENCL_FN_clEnqueueCopyImageToBuffer, + OPENCL_FN_clEnqueueCopyBufferToImage, + OPENCL_FN_clEnqueueMapBuffer, + OPENCL_FN_clEnqueueMapImage, + OPENCL_FN_clEnqueueUnmapMemObject, + OPENCL_FN_clEnqueueMigrateMemObjects, + OPENCL_FN_clEnqueueNDRangeKernel, + OPENCL_FN_clEnqueueTask, + OPENCL_FN_clEnqueueNativeKernel, + OPENCL_FN_clEnqueueMarkerWithWaitList, + OPENCL_FN_clEnqueueBarrierWithWaitList, + OPENCL_FN_clGetExtensionFunctionAddressForPlatform, + OPENCL_FN_clCreateImage2D, + OPENCL_FN_clCreateImage3D, + OPENCL_FN_clEnqueueMarker, + OPENCL_FN_clEnqueueWaitForEvents, + OPENCL_FN_clEnqueueBarrier, + OPENCL_FN_clUnloadCompiler, + OPENCL_FN_clGetExtensionFunctionAddress, +}; +// generated by parser_cl.py +const char* opencl_fn_names[] = { + "clGetPlatformIDs", + "clGetPlatformInfo", + "clGetDeviceIDs", + "clGetDeviceInfo", + "clCreateSubDevices", + "clRetainDevice", + "clReleaseDevice", + "clCreateContext", + "clCreateContextFromType", + "clRetainContext", + "clReleaseContext", + "clGetContextInfo", + "clCreateCommandQueue", + "clRetainCommandQueue", + "clReleaseCommandQueue", + "clGetCommandQueueInfo", + "clCreateBuffer", + "clCreateSubBuffer", + "clCreateImage", + "clRetainMemObject", + "clReleaseMemObject", + "clGetSupportedImageFormats", + "clGetMemObjectInfo", + "clGetImageInfo", + "clSetMemObjectDestructorCallback", + "clCreateSampler", + "clRetainSampler", + "clReleaseSampler", + "clGetSamplerInfo", + "clCreateProgramWithSource", + "clCreateProgramWithBinary", + "clCreateProgramWithBuiltInKernels", + "clRetainProgram", + "clReleaseProgram", + "clBuildProgram", + "clCompileProgram", + "clLinkProgram", + "clUnloadPlatformCompiler", + "clGetProgramInfo", + "clGetProgramBuildInfo", + "clCreateKernel", + "clCreateKernelsInProgram", + "clRetainKernel", + "clReleaseKernel", + "clSetKernelArg", + "clGetKernelInfo", + "clGetKernelArgInfo", + "clGetKernelWorkGroupInfo", + "clWaitForEvents", + "clGetEventInfo", + "clCreateUserEvent", + "clRetainEvent", + "clReleaseEvent", + "clSetUserEventStatus", + "clSetEventCallback", + "clGetEventProfilingInfo", + "clFlush", + "clFinish", + "clEnqueueReadBuffer", + "clEnqueueReadBufferRect", + "clEnqueueWriteBuffer", + "clEnqueueWriteBufferRect", + "clEnqueueFillBuffer", + "clEnqueueCopyBuffer", + "clEnqueueCopyBufferRect", + "clEnqueueReadImage", + "clEnqueueWriteImage", + "clEnqueueFillImage", + "clEnqueueCopyImage", + "clEnqueueCopyImageToBuffer", + "clEnqueueCopyBufferToImage", + "clEnqueueMapBuffer", + "clEnqueueMapImage", + "clEnqueueUnmapMemObject", + "clEnqueueMigrateMemObjects", + "clEnqueueNDRangeKernel", + "clEnqueueTask", + "clEnqueueNativeKernel", + "clEnqueueMarkerWithWaitList", + "clEnqueueBarrierWithWaitList", + "clGetExtensionFunctionAddressForPlatform", + "clCreateImage2D", + "clCreateImage3D", + "clEnqueueMarker", + "clEnqueueWaitForEvents", + "clEnqueueBarrier", + "clUnloadCompiler", + "clGetExtensionFunctionAddress", +}; + +namespace { +// generated by parser_cl.py +template +struct opencl_fn0 +{ + typedef _R (*FN)(); + static _R switch_fn() + { return ((FN)opencl_check_fn(ID))(); } +}; + +template +struct opencl_fn1 +{ + typedef _R (*FN)(_T1); + static _R switch_fn(_T1 p1) + { return ((FN)opencl_check_fn(ID))(p1); } +}; + +template +struct opencl_fn2 +{ + typedef _R (*FN)(_T1, _T2); + static _R switch_fn(_T1 p1, _T2 p2) + { return ((FN)opencl_check_fn(ID))(p1, p2); } +}; + +template +struct opencl_fn3 +{ + typedef _R (*FN)(_T1, _T2, _T3); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3) + { return ((FN)opencl_check_fn(ID))(p1, p2, p3); } +}; + +template +struct opencl_fn4 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4) + { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4); } +}; + +template +struct opencl_fn5 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5) + { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5); } +}; + +template +struct opencl_fn6 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6) + { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6); } +}; + +template +struct opencl_fn7 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7) + { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7); } +}; + +template +struct opencl_fn8 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8) + { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8); } +}; + +template +struct opencl_fn9 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9) + { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9); } +}; + +template +struct opencl_fn10 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10) + { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); } +}; + +template +struct opencl_fn11 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11) + { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11); } +}; + +template +struct opencl_fn12 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12) + { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12); } +}; + +template +struct opencl_fn13 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13) + { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); } +}; + +template +struct opencl_fn14 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13, _T14); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13, _T14 p14) + { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14); } +}; + +} + +// generated by parser_cl.py +cl_int (*clGetPlatformIDs)(cl_uint, cl_platform_id*, cl_uint*) = opencl_fn3::switch_fn; +cl_int (*clGetPlatformInfo)(cl_platform_id, cl_platform_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (*clGetDeviceIDs)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*) = opencl_fn5::switch_fn; +cl_int (*clGetDeviceInfo)(cl_device_id, cl_device_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (*clCreateSubDevices)(cl_device_id, const cl_device_partition_property*, cl_uint, cl_device_id*, cl_uint*) = opencl_fn5::switch_fn; +cl_int (*clRetainDevice)(cl_device_id) = opencl_fn1::switch_fn; +cl_int (*clReleaseDevice)(cl_device_id) = opencl_fn1::switch_fn; +cl_context (*clCreateContext)(const cl_context_properties*, cl_uint, const cl_device_id*, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*) = opencl_fn6::switch_fn; +cl_context (*clCreateContextFromType)(const cl_context_properties*, cl_device_type, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*) = opencl_fn5::switch_fn; +cl_int (*clRetainContext)(cl_context) = opencl_fn1::switch_fn; +cl_int (*clReleaseContext)(cl_context) = opencl_fn1::switch_fn; +cl_int (*clGetContextInfo)(cl_context, cl_context_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_command_queue (*clCreateCommandQueue)(cl_context, cl_device_id, cl_command_queue_properties, cl_int*) = opencl_fn4::switch_fn; +cl_int (*clRetainCommandQueue)(cl_command_queue) = opencl_fn1::switch_fn; +cl_int (*clReleaseCommandQueue)(cl_command_queue) = opencl_fn1::switch_fn; +cl_int (*clGetCommandQueueInfo)(cl_command_queue, cl_command_queue_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_mem (*clCreateBuffer)(cl_context, cl_mem_flags, size_t, void*, cl_int*) = opencl_fn5::switch_fn; +cl_mem (*clCreateSubBuffer)(cl_mem, cl_mem_flags, cl_buffer_create_type, const void*, cl_int*) = opencl_fn5::switch_fn; +cl_mem (*clCreateImage)(cl_context, cl_mem_flags, const cl_image_format*, const cl_image_desc*, void*, cl_int*) = opencl_fn6::switch_fn; +cl_int (*clRetainMemObject)(cl_mem) = opencl_fn1::switch_fn; +cl_int (*clReleaseMemObject)(cl_mem) = opencl_fn1::switch_fn; +cl_int (*clGetSupportedImageFormats)(cl_context, cl_mem_flags, cl_mem_object_type, cl_uint, cl_image_format*, cl_uint*) = opencl_fn6::switch_fn; +cl_int (*clGetMemObjectInfo)(cl_mem, cl_mem_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (*clGetImageInfo)(cl_mem, cl_image_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (*clSetMemObjectDestructorCallback)(cl_mem, void (CL_CALLBACK*) (cl_mem, void*), void*) = opencl_fn3::switch_fn; +cl_sampler (*clCreateSampler)(cl_context, cl_bool, cl_addressing_mode, cl_filter_mode, cl_int*) = opencl_fn5::switch_fn; +cl_int (*clRetainSampler)(cl_sampler) = opencl_fn1::switch_fn; +cl_int (*clReleaseSampler)(cl_sampler) = opencl_fn1::switch_fn; +cl_int (*clGetSamplerInfo)(cl_sampler, cl_sampler_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_program (*clCreateProgramWithSource)(cl_context, cl_uint, const char**, const size_t*, cl_int*) = opencl_fn5::switch_fn; +cl_program (*clCreateProgramWithBinary)(cl_context, cl_uint, const cl_device_id*, const size_t*, const unsigned char**, cl_int*, cl_int*) = opencl_fn7::switch_fn; +cl_program (*clCreateProgramWithBuiltInKernels)(cl_context, cl_uint, const cl_device_id*, const char*, cl_int*) = opencl_fn5::switch_fn; +cl_int (*clRetainProgram)(cl_program) = opencl_fn1::switch_fn; +cl_int (*clReleaseProgram)(cl_program) = opencl_fn1::switch_fn; +cl_int (*clBuildProgram)(cl_program, cl_uint, const cl_device_id*, const char*, void (CL_CALLBACK*) (cl_program, void*), void*) = opencl_fn6::switch_fn; +cl_int (*clCompileProgram)(cl_program, cl_uint, const cl_device_id*, const char*, cl_uint, const cl_program*, const char**, void (CL_CALLBACK*) (cl_program, void*), void*) = opencl_fn9::switch_fn; +cl_program (*clLinkProgram)(cl_context, cl_uint, const cl_device_id*, const char*, cl_uint, const cl_program*, void (CL_CALLBACK*) (cl_program, void*), void*, cl_int*) = opencl_fn9::switch_fn; +cl_int (*clUnloadPlatformCompiler)(cl_platform_id) = opencl_fn1::switch_fn; +cl_int (*clGetProgramInfo)(cl_program, cl_program_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (*clGetProgramBuildInfo)(cl_program, cl_device_id, cl_program_build_info, size_t, void*, size_t*) = opencl_fn6::switch_fn; +cl_kernel (*clCreateKernel)(cl_program, const char*, cl_int*) = opencl_fn3::switch_fn; +cl_int (*clCreateKernelsInProgram)(cl_program, cl_uint, cl_kernel*, cl_uint*) = opencl_fn4::switch_fn; +cl_int (*clRetainKernel)(cl_kernel) = opencl_fn1::switch_fn; +cl_int (*clReleaseKernel)(cl_kernel) = opencl_fn1::switch_fn; +cl_int (*clSetKernelArg)(cl_kernel, cl_uint, size_t, const void*) = opencl_fn4::switch_fn; +cl_int (*clGetKernelInfo)(cl_kernel, cl_kernel_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (*clGetKernelArgInfo)(cl_kernel, cl_uint, cl_kernel_arg_info, size_t, void*, size_t*) = opencl_fn6::switch_fn; +cl_int (*clGetKernelWorkGroupInfo)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void*, size_t*) = opencl_fn6::switch_fn; +cl_int (*clWaitForEvents)(cl_uint, const cl_event*) = opencl_fn2::switch_fn; +cl_int (*clGetEventInfo)(cl_event, cl_event_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_event (*clCreateUserEvent)(cl_context, cl_int*) = opencl_fn2::switch_fn; +cl_int (*clRetainEvent)(cl_event) = opencl_fn1::switch_fn; +cl_int (*clReleaseEvent)(cl_event) = opencl_fn1::switch_fn; +cl_int (*clSetUserEventStatus)(cl_event, cl_int) = opencl_fn2::switch_fn; +cl_int (*clSetEventCallback)(cl_event, cl_int, void (CL_CALLBACK*) (cl_event, cl_int, void*), void*) = opencl_fn4::switch_fn; +cl_int (*clGetEventProfilingInfo)(cl_event, cl_profiling_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (*clFlush)(cl_command_queue) = opencl_fn1::switch_fn; +cl_int (*clFinish)(cl_command_queue) = opencl_fn1::switch_fn; +cl_int (*clEnqueueReadBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (*clEnqueueReadBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*) = opencl_fn14::switch_fn; +cl_int (*clEnqueueWriteBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (*clEnqueueWriteBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*) = opencl_fn14::switch_fn; +cl_int (*clEnqueueFillBuffer)(cl_command_queue, cl_mem, const void*, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (*clEnqueueCopyBuffer)(cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (*clEnqueueCopyBufferRect)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*) = opencl_fn13::switch_fn; +cl_int (*clEnqueueReadImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*) = opencl_fn11::switch_fn; +cl_int (*clEnqueueWriteImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*) = opencl_fn11::switch_fn; +cl_int (*clEnqueueFillImage)(cl_command_queue, cl_mem, const void*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*) = opencl_fn8::switch_fn; +cl_int (*clEnqueueCopyImage)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (*clEnqueueCopyImageToBuffer)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, size_t, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (*clEnqueueCopyBufferToImage)(cl_command_queue, cl_mem, cl_mem, size_t, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +void* (*clEnqueueMapBuffer)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t, size_t, cl_uint, const cl_event*, cl_event*, cl_int*) = opencl_fn10::switch_fn; +void* (*clEnqueueMapImage)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, const size_t*, const size_t*, size_t*, size_t*, cl_uint, const cl_event*, cl_event*, cl_int*) = opencl_fn12::switch_fn; +cl_int (*clEnqueueUnmapMemObject)(cl_command_queue, cl_mem, void*, cl_uint, const cl_event*, cl_event*) = opencl_fn6::switch_fn; +cl_int (*clEnqueueMigrateMemObjects)(cl_command_queue, cl_uint, const cl_mem*, cl_mem_migration_flags, cl_uint, const cl_event*, cl_event*) = opencl_fn7::switch_fn; +cl_int (*clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (*clEnqueueTask)(cl_command_queue, cl_kernel, cl_uint, const cl_event*, cl_event*) = opencl_fn5::switch_fn; +cl_int (*clEnqueueNativeKernel)(cl_command_queue, void (CL_CALLBACK*) (void*), void*, size_t, cl_uint, const cl_mem*, const void**, cl_uint, const cl_event*, cl_event*) = opencl_fn10::switch_fn; +cl_int (*clEnqueueMarkerWithWaitList)(cl_command_queue, cl_uint, const cl_event*, cl_event*) = opencl_fn4::switch_fn; +cl_int (*clEnqueueBarrierWithWaitList)(cl_command_queue, cl_uint, const cl_event*, cl_event*) = opencl_fn4::switch_fn; +void* (*clGetExtensionFunctionAddressForPlatform)(cl_platform_id, const char*) = opencl_fn2::switch_fn; +cl_mem (*clCreateImage2D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, void*, cl_int*) = opencl_fn8::switch_fn; +cl_mem (*clCreateImage3D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, size_t, size_t, void*, cl_int*) = opencl_fn10::switch_fn; +cl_int (*clEnqueueMarker)(cl_command_queue, cl_event*) = opencl_fn2::switch_fn; +cl_int (*clEnqueueWaitForEvents)(cl_command_queue, cl_uint, const cl_event*) = opencl_fn3::switch_fn; +cl_int (*clEnqueueBarrier)(cl_command_queue) = opencl_fn1::switch_fn; +cl_int (*clUnloadCompiler)() = opencl_fn0::switch_fn; +void* (*clGetExtensionFunctionAddress)(const char*) = opencl_fn1::switch_fn; + +// generated by parser_cl.py +void* opencl_fn_ptrs[] = { + &clGetPlatformIDs, + &clGetPlatformInfo, + &clGetDeviceIDs, + &clGetDeviceInfo, + &clCreateSubDevices, + &clRetainDevice, + &clReleaseDevice, + &clCreateContext, + &clCreateContextFromType, + &clRetainContext, + &clReleaseContext, + &clGetContextInfo, + &clCreateCommandQueue, + &clRetainCommandQueue, + &clReleaseCommandQueue, + &clGetCommandQueueInfo, + &clCreateBuffer, + &clCreateSubBuffer, + &clCreateImage, + &clRetainMemObject, + &clReleaseMemObject, + &clGetSupportedImageFormats, + &clGetMemObjectInfo, + &clGetImageInfo, + &clSetMemObjectDestructorCallback, + &clCreateSampler, + &clRetainSampler, + &clReleaseSampler, + &clGetSamplerInfo, + &clCreateProgramWithSource, + &clCreateProgramWithBinary, + &clCreateProgramWithBuiltInKernels, + &clRetainProgram, + &clReleaseProgram, + &clBuildProgram, + &clCompileProgram, + &clLinkProgram, + &clUnloadPlatformCompiler, + &clGetProgramInfo, + &clGetProgramBuildInfo, + &clCreateKernel, + &clCreateKernelsInProgram, + &clRetainKernel, + &clReleaseKernel, + &clSetKernelArg, + &clGetKernelInfo, + &clGetKernelArgInfo, + &clGetKernelWorkGroupInfo, + &clWaitForEvents, + &clGetEventInfo, + &clCreateUserEvent, + &clRetainEvent, + &clReleaseEvent, + &clSetUserEventStatus, + &clSetEventCallback, + &clGetEventProfilingInfo, + &clFlush, + &clFinish, + &clEnqueueReadBuffer, + &clEnqueueReadBufferRect, + &clEnqueueWriteBuffer, + &clEnqueueWriteBufferRect, + &clEnqueueFillBuffer, + &clEnqueueCopyBuffer, + &clEnqueueCopyBufferRect, + &clEnqueueReadImage, + &clEnqueueWriteImage, + &clEnqueueFillImage, + &clEnqueueCopyImage, + &clEnqueueCopyImageToBuffer, + &clEnqueueCopyBufferToImage, + &clEnqueueMapBuffer, + &clEnqueueMapImage, + &clEnqueueUnmapMemObject, + &clEnqueueMigrateMemObjects, + &clEnqueueNDRangeKernel, + &clEnqueueTask, + &clEnqueueNativeKernel, + &clEnqueueMarkerWithWaitList, + &clEnqueueBarrierWithWaitList, + &clGetExtensionFunctionAddressForPlatform, + &clCreateImage2D, + &clCreateImage3D, + &clEnqueueMarker, + &clEnqueueWaitForEvents, + &clEnqueueBarrier, + &clUnloadCompiler, + &clGetExtensionFunctionAddress, +}; diff --git a/modules/ocl/src/cl_runtime/clamdblas_runtime.cpp b/modules/ocl/src/cl_runtime/clamdblas_runtime.cpp new file mode 100644 index 0000000000..0a077db691 --- /dev/null +++ b/modules/ocl/src/cl_runtime/clamdblas_runtime.cpp @@ -0,0 +1,968 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#include "precomp.hpp" + +#ifdef HAVE_CLAMDBLAS + +#include "opencv2/ocl/cl_runtime/cl_runtime.hpp" +#include "opencv2/ocl/cl_runtime/clamdblas_runtime.hpp" + +#if defined(_WIN32) + static void* WinGetProcAddress(const char* name) + { + static HMODULE opencl_module = NULL; + if (!opencl_module) + { + opencl_module = GetModuleHandleA("clAmdBlas.dll"); + if (!opencl_module) + { + opencl_module = LoadLibraryA("clAmdBlas.dll"); + if (!opencl_module) + return NULL; + } + } + return (void*)GetProcAddress(opencl_module, name); + } + #define CV_CL_GET_PROC_ADDRESS(name) WinGetProcAddress(name) +#endif // _WIN32 + +#if defined(linux) + #include + #include + + static void* GetProcAddress (const char* name) + { + static void* h = NULL; + if (!h) + { + h = dlopen("libclAmdBlas.so", RTLD_LAZY | RTLD_GLOBAL); + if (!h) + return NULL; + } + + return dlsym(h, name); + } + #define CV_CL_GET_PROC_ADDRESS(name) GetProcAddress(name) +#endif + +#ifndef CV_CL_GET_PROC_ADDRESS +#define CV_CL_GET_PROC_ADDRESS(name) NULL +#endif + +// generated by parser_clamdblas.py +enum OPENCLAMDBLAS_FN_ID { + OPENCLAMDBLAS_FN_clAmdBlasGetVersion = 0, + OPENCLAMDBLAS_FN_clAmdBlasSetup, + OPENCLAMDBLAS_FN_clAmdBlasTeardown, + OPENCLAMDBLAS_FN_clAmdBlasAddScratchImage, + OPENCLAMDBLAS_FN_clAmdBlasRemoveScratchImage, + OPENCLAMDBLAS_FN_clAmdBlasSswap, + OPENCLAMDBLAS_FN_clAmdBlasDswap, + OPENCLAMDBLAS_FN_clAmdBlasCswap, + OPENCLAMDBLAS_FN_clAmdBlasZswap, + OPENCLAMDBLAS_FN_clAmdBlasSscal, + OPENCLAMDBLAS_FN_clAmdBlasDscal, + OPENCLAMDBLAS_FN_clAmdBlasCscal, + OPENCLAMDBLAS_FN_clAmdBlasZscal, + OPENCLAMDBLAS_FN_clAmdBlasCsscal, + OPENCLAMDBLAS_FN_clAmdBlasZdscal, + OPENCLAMDBLAS_FN_clAmdBlasScopy, + OPENCLAMDBLAS_FN_clAmdBlasDcopy, + OPENCLAMDBLAS_FN_clAmdBlasCcopy, + OPENCLAMDBLAS_FN_clAmdBlasZcopy, + OPENCLAMDBLAS_FN_clAmdBlasSaxpy, + OPENCLAMDBLAS_FN_clAmdBlasDaxpy, + OPENCLAMDBLAS_FN_clAmdBlasCaxpy, + OPENCLAMDBLAS_FN_clAmdBlasZaxpy, + OPENCLAMDBLAS_FN_clAmdBlasSdot, + OPENCLAMDBLAS_FN_clAmdBlasDdot, + OPENCLAMDBLAS_FN_clAmdBlasCdotu, + OPENCLAMDBLAS_FN_clAmdBlasZdotu, + OPENCLAMDBLAS_FN_clAmdBlasCdotc, + OPENCLAMDBLAS_FN_clAmdBlasZdotc, + OPENCLAMDBLAS_FN_clAmdBlasSrotg, + OPENCLAMDBLAS_FN_clAmdBlasDrotg, + OPENCLAMDBLAS_FN_clAmdBlasCrotg, + OPENCLAMDBLAS_FN_clAmdBlasZrotg, + OPENCLAMDBLAS_FN_clAmdBlasSrotmg, + OPENCLAMDBLAS_FN_clAmdBlasDrotmg, + OPENCLAMDBLAS_FN_clAmdBlasSrot, + OPENCLAMDBLAS_FN_clAmdBlasDrot, + OPENCLAMDBLAS_FN_clAmdBlasCsrot, + OPENCLAMDBLAS_FN_clAmdBlasZdrot, + OPENCLAMDBLAS_FN_clAmdBlasSrotm, + OPENCLAMDBLAS_FN_clAmdBlasDrotm, + OPENCLAMDBLAS_FN_clAmdBlasSnrm2, + OPENCLAMDBLAS_FN_clAmdBlasDnrm2, + OPENCLAMDBLAS_FN_clAmdBlasScnrm2, + OPENCLAMDBLAS_FN_clAmdBlasDznrm2, + OPENCLAMDBLAS_FN_clAmdBlasiSamax, + OPENCLAMDBLAS_FN_clAmdBlasiDamax, + OPENCLAMDBLAS_FN_clAmdBlasiCamax, + OPENCLAMDBLAS_FN_clAmdBlasiZamax, + OPENCLAMDBLAS_FN_clAmdBlasSasum, + OPENCLAMDBLAS_FN_clAmdBlasDasum, + OPENCLAMDBLAS_FN_clAmdBlasScasum, + OPENCLAMDBLAS_FN_clAmdBlasDzasum, + OPENCLAMDBLAS_FN_clAmdBlasSgemv, + OPENCLAMDBLAS_FN_clAmdBlasDgemv, + OPENCLAMDBLAS_FN_clAmdBlasCgemv, + OPENCLAMDBLAS_FN_clAmdBlasZgemv, + OPENCLAMDBLAS_FN_clAmdBlasSgemvEx, + OPENCLAMDBLAS_FN_clAmdBlasDgemvEx, + OPENCLAMDBLAS_FN_clAmdBlasCgemvEx, + OPENCLAMDBLAS_FN_clAmdBlasZgemvEx, + OPENCLAMDBLAS_FN_clAmdBlasSsymv, + OPENCLAMDBLAS_FN_clAmdBlasDsymv, + OPENCLAMDBLAS_FN_clAmdBlasSsymvEx, + OPENCLAMDBLAS_FN_clAmdBlasDsymvEx, + OPENCLAMDBLAS_FN_clAmdBlasChemv, + OPENCLAMDBLAS_FN_clAmdBlasZhemv, + OPENCLAMDBLAS_FN_clAmdBlasStrmv, + OPENCLAMDBLAS_FN_clAmdBlasDtrmv, + OPENCLAMDBLAS_FN_clAmdBlasCtrmv, + OPENCLAMDBLAS_FN_clAmdBlasZtrmv, + OPENCLAMDBLAS_FN_clAmdBlasStrsv, + OPENCLAMDBLAS_FN_clAmdBlasDtrsv, + OPENCLAMDBLAS_FN_clAmdBlasCtrsv, + OPENCLAMDBLAS_FN_clAmdBlasZtrsv, + OPENCLAMDBLAS_FN_clAmdBlasSger, + OPENCLAMDBLAS_FN_clAmdBlasDger, + OPENCLAMDBLAS_FN_clAmdBlasCgeru, + OPENCLAMDBLAS_FN_clAmdBlasZgeru, + OPENCLAMDBLAS_FN_clAmdBlasCgerc, + OPENCLAMDBLAS_FN_clAmdBlasZgerc, + OPENCLAMDBLAS_FN_clAmdBlasSsyr, + OPENCLAMDBLAS_FN_clAmdBlasDsyr, + OPENCLAMDBLAS_FN_clAmdBlasCher, + OPENCLAMDBLAS_FN_clAmdBlasZher, + OPENCLAMDBLAS_FN_clAmdBlasSsyr2, + OPENCLAMDBLAS_FN_clAmdBlasDsyr2, + OPENCLAMDBLAS_FN_clAmdBlasCher2, + OPENCLAMDBLAS_FN_clAmdBlasZher2, + OPENCLAMDBLAS_FN_clAmdBlasStpmv, + OPENCLAMDBLAS_FN_clAmdBlasDtpmv, + OPENCLAMDBLAS_FN_clAmdBlasCtpmv, + OPENCLAMDBLAS_FN_clAmdBlasZtpmv, + OPENCLAMDBLAS_FN_clAmdBlasStpsv, + OPENCLAMDBLAS_FN_clAmdBlasDtpsv, + OPENCLAMDBLAS_FN_clAmdBlasCtpsv, + OPENCLAMDBLAS_FN_clAmdBlasZtpsv, + OPENCLAMDBLAS_FN_clAmdBlasSspmv, + OPENCLAMDBLAS_FN_clAmdBlasDspmv, + OPENCLAMDBLAS_FN_clAmdBlasChpmv, + OPENCLAMDBLAS_FN_clAmdBlasZhpmv, + OPENCLAMDBLAS_FN_clAmdBlasSspr, + OPENCLAMDBLAS_FN_clAmdBlasDspr, + OPENCLAMDBLAS_FN_clAmdBlasChpr, + OPENCLAMDBLAS_FN_clAmdBlasZhpr, + OPENCLAMDBLAS_FN_clAmdBlasSspr2, + OPENCLAMDBLAS_FN_clAmdBlasDspr2, + OPENCLAMDBLAS_FN_clAmdBlasChpr2, + OPENCLAMDBLAS_FN_clAmdBlasZhpr2, + OPENCLAMDBLAS_FN_clAmdBlasSgbmv, + OPENCLAMDBLAS_FN_clAmdBlasDgbmv, + OPENCLAMDBLAS_FN_clAmdBlasCgbmv, + OPENCLAMDBLAS_FN_clAmdBlasZgbmv, + OPENCLAMDBLAS_FN_clAmdBlasStbmv, + OPENCLAMDBLAS_FN_clAmdBlasDtbmv, + OPENCLAMDBLAS_FN_clAmdBlasCtbmv, + OPENCLAMDBLAS_FN_clAmdBlasZtbmv, + OPENCLAMDBLAS_FN_clAmdBlasSsbmv, + OPENCLAMDBLAS_FN_clAmdBlasDsbmv, + OPENCLAMDBLAS_FN_clAmdBlasChbmv, + OPENCLAMDBLAS_FN_clAmdBlasZhbmv, + OPENCLAMDBLAS_FN_clAmdBlasStbsv, + OPENCLAMDBLAS_FN_clAmdBlasDtbsv, + OPENCLAMDBLAS_FN_clAmdBlasCtbsv, + OPENCLAMDBLAS_FN_clAmdBlasZtbsv, + OPENCLAMDBLAS_FN_clAmdBlasSgemm, + OPENCLAMDBLAS_FN_clAmdBlasDgemm, + OPENCLAMDBLAS_FN_clAmdBlasCgemm, + OPENCLAMDBLAS_FN_clAmdBlasZgemm, + OPENCLAMDBLAS_FN_clAmdBlasSgemmEx, + OPENCLAMDBLAS_FN_clAmdBlasDgemmEx, + OPENCLAMDBLAS_FN_clAmdBlasCgemmEx, + OPENCLAMDBLAS_FN_clAmdBlasZgemmEx, + OPENCLAMDBLAS_FN_clAmdBlasStrmm, + OPENCLAMDBLAS_FN_clAmdBlasDtrmm, + OPENCLAMDBLAS_FN_clAmdBlasCtrmm, + OPENCLAMDBLAS_FN_clAmdBlasZtrmm, + OPENCLAMDBLAS_FN_clAmdBlasStrmmEx, + OPENCLAMDBLAS_FN_clAmdBlasDtrmmEx, + OPENCLAMDBLAS_FN_clAmdBlasCtrmmEx, + OPENCLAMDBLAS_FN_clAmdBlasZtrmmEx, + OPENCLAMDBLAS_FN_clAmdBlasStrsm, + OPENCLAMDBLAS_FN_clAmdBlasDtrsm, + OPENCLAMDBLAS_FN_clAmdBlasCtrsm, + OPENCLAMDBLAS_FN_clAmdBlasZtrsm, + OPENCLAMDBLAS_FN_clAmdBlasStrsmEx, + OPENCLAMDBLAS_FN_clAmdBlasDtrsmEx, + OPENCLAMDBLAS_FN_clAmdBlasCtrsmEx, + OPENCLAMDBLAS_FN_clAmdBlasZtrsmEx, + OPENCLAMDBLAS_FN_clAmdBlasSsyrk, + OPENCLAMDBLAS_FN_clAmdBlasDsyrk, + OPENCLAMDBLAS_FN_clAmdBlasCsyrk, + OPENCLAMDBLAS_FN_clAmdBlasZsyrk, + OPENCLAMDBLAS_FN_clAmdBlasSsyrkEx, + OPENCLAMDBLAS_FN_clAmdBlasDsyrkEx, + OPENCLAMDBLAS_FN_clAmdBlasCsyrkEx, + OPENCLAMDBLAS_FN_clAmdBlasZsyrkEx, + OPENCLAMDBLAS_FN_clAmdBlasSsyr2k, + OPENCLAMDBLAS_FN_clAmdBlasDsyr2k, + OPENCLAMDBLAS_FN_clAmdBlasCsyr2k, + OPENCLAMDBLAS_FN_clAmdBlasZsyr2k, + OPENCLAMDBLAS_FN_clAmdBlasSsyr2kEx, + OPENCLAMDBLAS_FN_clAmdBlasDsyr2kEx, + OPENCLAMDBLAS_FN_clAmdBlasCsyr2kEx, + OPENCLAMDBLAS_FN_clAmdBlasZsyr2kEx, + OPENCLAMDBLAS_FN_clAmdBlasSsymm, + OPENCLAMDBLAS_FN_clAmdBlasDsymm, + OPENCLAMDBLAS_FN_clAmdBlasCsymm, + OPENCLAMDBLAS_FN_clAmdBlasZsymm, + OPENCLAMDBLAS_FN_clAmdBlasChemm, + OPENCLAMDBLAS_FN_clAmdBlasZhemm, + OPENCLAMDBLAS_FN_clAmdBlasCherk, + OPENCLAMDBLAS_FN_clAmdBlasZherk, + OPENCLAMDBLAS_FN_clAmdBlasCher2k, + OPENCLAMDBLAS_FN_clAmdBlasZher2k, +}; +// generated by parser_clamdblas.py +const char* openclamdblas_fn_names[] = { + "clAmdBlasGetVersion", + "clAmdBlasSetup", + "clAmdBlasTeardown", + "clAmdBlasAddScratchImage", + "clAmdBlasRemoveScratchImage", + "clAmdBlasSswap", + "clAmdBlasDswap", + "clAmdBlasCswap", + "clAmdBlasZswap", + "clAmdBlasSscal", + "clAmdBlasDscal", + "clAmdBlasCscal", + "clAmdBlasZscal", + "clAmdBlasCsscal", + "clAmdBlasZdscal", + "clAmdBlasScopy", + "clAmdBlasDcopy", + "clAmdBlasCcopy", + "clAmdBlasZcopy", + "clAmdBlasSaxpy", + "clAmdBlasDaxpy", + "clAmdBlasCaxpy", + "clAmdBlasZaxpy", + "clAmdBlasSdot", + "clAmdBlasDdot", + "clAmdBlasCdotu", + "clAmdBlasZdotu", + "clAmdBlasCdotc", + "clAmdBlasZdotc", + "clAmdBlasSrotg", + "clAmdBlasDrotg", + "clAmdBlasCrotg", + "clAmdBlasZrotg", + "clAmdBlasSrotmg", + "clAmdBlasDrotmg", + "clAmdBlasSrot", + "clAmdBlasDrot", + "clAmdBlasCsrot", + "clAmdBlasZdrot", + "clAmdBlasSrotm", + "clAmdBlasDrotm", + "clAmdBlasSnrm2", + "clAmdBlasDnrm2", + "clAmdBlasScnrm2", + "clAmdBlasDznrm2", + "clAmdBlasiSamax", + "clAmdBlasiDamax", + "clAmdBlasiCamax", + "clAmdBlasiZamax", + "clAmdBlasSasum", + "clAmdBlasDasum", + "clAmdBlasScasum", + "clAmdBlasDzasum", + "clAmdBlasSgemv", + "clAmdBlasDgemv", + "clAmdBlasCgemv", + "clAmdBlasZgemv", + "clAmdBlasSgemvEx", + "clAmdBlasDgemvEx", + "clAmdBlasCgemvEx", + "clAmdBlasZgemvEx", + "clAmdBlasSsymv", + "clAmdBlasDsymv", + "clAmdBlasSsymvEx", + "clAmdBlasDsymvEx", + "clAmdBlasChemv", + "clAmdBlasZhemv", + "clAmdBlasStrmv", + "clAmdBlasDtrmv", + "clAmdBlasCtrmv", + "clAmdBlasZtrmv", + "clAmdBlasStrsv", + "clAmdBlasDtrsv", + "clAmdBlasCtrsv", + "clAmdBlasZtrsv", + "clAmdBlasSger", + "clAmdBlasDger", + "clAmdBlasCgeru", + "clAmdBlasZgeru", + "clAmdBlasCgerc", + "clAmdBlasZgerc", + "clAmdBlasSsyr", + "clAmdBlasDsyr", + "clAmdBlasCher", + "clAmdBlasZher", + "clAmdBlasSsyr2", + "clAmdBlasDsyr2", + "clAmdBlasCher2", + "clAmdBlasZher2", + "clAmdBlasStpmv", + "clAmdBlasDtpmv", + "clAmdBlasCtpmv", + "clAmdBlasZtpmv", + "clAmdBlasStpsv", + "clAmdBlasDtpsv", + "clAmdBlasCtpsv", + "clAmdBlasZtpsv", + "clAmdBlasSspmv", + "clAmdBlasDspmv", + "clAmdBlasChpmv", + "clAmdBlasZhpmv", + "clAmdBlasSspr", + "clAmdBlasDspr", + "clAmdBlasChpr", + "clAmdBlasZhpr", + "clAmdBlasSspr2", + "clAmdBlasDspr2", + "clAmdBlasChpr2", + "clAmdBlasZhpr2", + "clAmdBlasSgbmv", + "clAmdBlasDgbmv", + "clAmdBlasCgbmv", + "clAmdBlasZgbmv", + "clAmdBlasStbmv", + "clAmdBlasDtbmv", + "clAmdBlasCtbmv", + "clAmdBlasZtbmv", + "clAmdBlasSsbmv", + "clAmdBlasDsbmv", + "clAmdBlasChbmv", + "clAmdBlasZhbmv", + "clAmdBlasStbsv", + "clAmdBlasDtbsv", + "clAmdBlasCtbsv", + "clAmdBlasZtbsv", + "clAmdBlasSgemm", + "clAmdBlasDgemm", + "clAmdBlasCgemm", + "clAmdBlasZgemm", + "clAmdBlasSgemmEx", + "clAmdBlasDgemmEx", + "clAmdBlasCgemmEx", + "clAmdBlasZgemmEx", + "clAmdBlasStrmm", + "clAmdBlasDtrmm", + "clAmdBlasCtrmm", + "clAmdBlasZtrmm", + "clAmdBlasStrmmEx", + "clAmdBlasDtrmmEx", + "clAmdBlasCtrmmEx", + "clAmdBlasZtrmmEx", + "clAmdBlasStrsm", + "clAmdBlasDtrsm", + "clAmdBlasCtrsm", + "clAmdBlasZtrsm", + "clAmdBlasStrsmEx", + "clAmdBlasDtrsmEx", + "clAmdBlasCtrsmEx", + "clAmdBlasZtrsmEx", + "clAmdBlasSsyrk", + "clAmdBlasDsyrk", + "clAmdBlasCsyrk", + "clAmdBlasZsyrk", + "clAmdBlasSsyrkEx", + "clAmdBlasDsyrkEx", + "clAmdBlasCsyrkEx", + "clAmdBlasZsyrkEx", + "clAmdBlasSsyr2k", + "clAmdBlasDsyr2k", + "clAmdBlasCsyr2k", + "clAmdBlasZsyr2k", + "clAmdBlasSsyr2kEx", + "clAmdBlasDsyr2kEx", + "clAmdBlasCsyr2kEx", + "clAmdBlasZsyr2kEx", + "clAmdBlasSsymm", + "clAmdBlasDsymm", + "clAmdBlasCsymm", + "clAmdBlasZsymm", + "clAmdBlasChemm", + "clAmdBlasZhemm", + "clAmdBlasCherk", + "clAmdBlasZherk", + "clAmdBlasCher2k", + "clAmdBlasZher2k", +}; + +static void* openclamdblas_check_fn(int ID) +{ + void* func = CV_CL_GET_PROC_ADDRESS(openclamdblas_fn_names[ID]); + if (!func) + { + std::ostringstream msg; + msg << "OpenCL AMD BLAS function is not available: [" << openclamdblas_fn_names[ID] << "]"; + CV_Error(CV_StsBadFunc, msg.str()); + } + extern void* openclamdblas_fn_ptrs[]; + *(void**)(openclamdblas_fn_ptrs[ID]) = func; + return func; +} + +namespace { +// generated by parser_clamdblas.py +template +struct openclamdblas_fn0 +{ + typedef _R (*FN)(); + static _R switch_fn() + { return ((FN)openclamdblas_check_fn(ID))(); } +}; + +template +struct openclamdblas_fn1 +{ + typedef _R (*FN)(_T1); + static _R switch_fn(_T1 p1) + { return ((FN)openclamdblas_check_fn(ID))(p1); } +}; + +template +struct openclamdblas_fn2 +{ + typedef _R (*FN)(_T1, _T2); + static _R switch_fn(_T1 p1, _T2 p2) + { return ((FN)openclamdblas_check_fn(ID))(p1, p2); } +}; + +template +struct openclamdblas_fn3 +{ + typedef _R (*FN)(_T1, _T2, _T3); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3) + { return ((FN)openclamdblas_check_fn(ID))(p1, p2, p3); } +}; + +template +struct openclamdblas_fn4 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4) + { return ((FN)openclamdblas_check_fn(ID))(p1, p2, p3, p4); } +}; + +template +struct openclamdblas_fn5 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5) + { return ((FN)openclamdblas_check_fn(ID))(p1, p2, p3, p4, p5); } +}; + +template +struct openclamdblas_fn6 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6) + { return ((FN)openclamdblas_check_fn(ID))(p1, p2, p3, p4, p5, p6); } +}; + +template +struct openclamdblas_fn7 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7) + { return ((FN)openclamdblas_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7); } +}; + +template +struct openclamdblas_fn8 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8) + { return ((FN)openclamdblas_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8); } +}; + +template +struct openclamdblas_fn9 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9) + { return ((FN)openclamdblas_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9); } +}; + +template +struct openclamdblas_fn10 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10) + { return ((FN)openclamdblas_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); } +}; + +template +struct openclamdblas_fn11 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11) + { return ((FN)openclamdblas_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11); } +}; + +template +struct openclamdblas_fn12 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12) + { return ((FN)openclamdblas_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12); } +}; + +template +struct openclamdblas_fn13 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13) + { return ((FN)openclamdblas_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); } +}; + +template +struct openclamdblas_fn14 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13, _T14); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13, _T14 p14) + { return ((FN)openclamdblas_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14); } +}; + +template +struct openclamdblas_fn15 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13, _T14, _T15); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13, _T14 p14, _T15 p15) + { return ((FN)openclamdblas_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15); } +}; + +template +struct openclamdblas_fn16 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13, _T14, _T15, _T16); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13, _T14 p14, _T15 p15, _T16 p16) + { return ((FN)openclamdblas_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16); } +}; + +template +struct openclamdblas_fn17 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13, _T14, _T15, _T16, _T17); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13, _T14 p14, _T15 p15, _T16 p16, _T17 p17) + { return ((FN)openclamdblas_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, p17); } +}; + +template +struct openclamdblas_fn18 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13, _T14, _T15, _T16, _T17, _T18); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13, _T14 p14, _T15 p15, _T16 p16, _T17 p17, _T18 p18) + { return ((FN)openclamdblas_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, p17, p18); } +}; + +template +struct openclamdblas_fn19 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13, _T14, _T15, _T16, _T17, _T18, _T19); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13, _T14 p14, _T15 p15, _T16 p16, _T17 p17, _T18 p18, _T19 p19) + { return ((FN)openclamdblas_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19); } +}; + +template +struct openclamdblas_fn20 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13, _T14, _T15, _T16, _T17, _T18, _T19, _T20); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13, _T14 p14, _T15 p15, _T16 p16, _T17 p17, _T18 p18, _T19 p19, _T20 p20) + { return ((FN)openclamdblas_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20); } +}; + +template +struct openclamdblas_fn21 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13, _T14, _T15, _T16, _T17, _T18, _T19, _T20, _T21); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13, _T14 p14, _T15 p15, _T16 p16, _T17 p17, _T18 p18, _T19 p19, _T20 p20, _T21 p21) + { return ((FN)openclamdblas_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21); } +}; + +template +struct openclamdblas_fn22 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13, _T14, _T15, _T16, _T17, _T18, _T19, _T20, _T21, _T22); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13, _T14 p14, _T15 p15, _T16 p16, _T17 p17, _T18 p18, _T19 p19, _T20 p20, _T21 p21, _T22 p22) + { return ((FN)openclamdblas_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, p22); } +}; + +} + +// generated by parser_clamdblas.py +clAmdBlasStatus (*clAmdBlasGetVersion)(cl_uint*, cl_uint*, cl_uint*) = openclamdblas_fn3::switch_fn; +clAmdBlasStatus (*clAmdBlasSetup)() = openclamdblas_fn0::switch_fn; +void (*clAmdBlasTeardown)() = openclamdblas_fn0::switch_fn; +cl_ulong (*clAmdBlasAddScratchImage)(cl_context, size_t, size_t, clAmdBlasStatus*) = openclamdblas_fn4::switch_fn; +clAmdBlasStatus (*clAmdBlasRemoveScratchImage)(cl_ulong) = openclamdblas_fn1::switch_fn; +clAmdBlasStatus (*clAmdBlasSswap)(size_t, cl_mem, size_t, int, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn12::switch_fn; +clAmdBlasStatus (*clAmdBlasDswap)(size_t, cl_mem, size_t, int, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn12::switch_fn; +clAmdBlasStatus (*clAmdBlasCswap)(size_t, cl_mem, size_t, int, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn12::switch_fn; +clAmdBlasStatus (*clAmdBlasZswap)(size_t, cl_mem, size_t, int, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn12::switch_fn; +clAmdBlasStatus (*clAmdBlasSscal)(size_t, cl_float, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn10::switch_fn; +clAmdBlasStatus (*clAmdBlasDscal)(size_t, cl_double, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn10::switch_fn; +clAmdBlasStatus (*clAmdBlasCscal)(size_t, cl_float2, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn10::switch_fn; +clAmdBlasStatus (*clAmdBlasZscal)(size_t, cl_double2, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn10::switch_fn; +clAmdBlasStatus (*clAmdBlasCsscal)(size_t, cl_float, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn10::switch_fn; +clAmdBlasStatus (*clAmdBlasZdscal)(size_t, cl_double, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn10::switch_fn; +clAmdBlasStatus (*clAmdBlasScopy)(size_t, const cl_mem, size_t, int, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn12::switch_fn; +clAmdBlasStatus (*clAmdBlasDcopy)(size_t, const cl_mem, size_t, int, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn12::switch_fn; +clAmdBlasStatus (*clAmdBlasCcopy)(size_t, const cl_mem, size_t, int, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn12::switch_fn; +clAmdBlasStatus (*clAmdBlasZcopy)(size_t, const cl_mem, size_t, int, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn12::switch_fn; +clAmdBlasStatus (*clAmdBlasSaxpy)(size_t, cl_float, const cl_mem, size_t, int, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn13::switch_fn; +clAmdBlasStatus (*clAmdBlasDaxpy)(size_t, cl_double, const cl_mem, size_t, int, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn13::switch_fn; +clAmdBlasStatus (*clAmdBlasCaxpy)(size_t, cl_float2, const cl_mem, size_t, int, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn13::switch_fn; +clAmdBlasStatus (*clAmdBlasZaxpy)(size_t, cl_double2, const cl_mem, size_t, int, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn13::switch_fn; +clAmdBlasStatus (*clAmdBlasSdot)(size_t, cl_mem, size_t, const cl_mem, size_t, int, const cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn15::switch_fn; +clAmdBlasStatus (*clAmdBlasDdot)(size_t, cl_mem, size_t, const cl_mem, size_t, int, const cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn15::switch_fn; +clAmdBlasStatus (*clAmdBlasCdotu)(size_t, cl_mem, size_t, const cl_mem, size_t, int, const cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn15::switch_fn; +clAmdBlasStatus (*clAmdBlasZdotu)(size_t, cl_mem, size_t, const cl_mem, size_t, int, const cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn15::switch_fn; +clAmdBlasStatus (*clAmdBlasCdotc)(size_t, cl_mem, size_t, const cl_mem, size_t, int, const cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn15::switch_fn; +clAmdBlasStatus (*clAmdBlasZdotc)(size_t, cl_mem, size_t, const cl_mem, size_t, int, const cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn15::switch_fn; +clAmdBlasStatus (*clAmdBlasSrotg)(cl_mem, size_t, cl_mem, size_t, cl_mem, size_t, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn13::switch_fn; +clAmdBlasStatus (*clAmdBlasDrotg)(cl_mem, size_t, cl_mem, size_t, cl_mem, size_t, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn13::switch_fn; +clAmdBlasStatus (*clAmdBlasCrotg)(cl_mem, size_t, cl_mem, size_t, cl_mem, size_t, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn13::switch_fn; +clAmdBlasStatus (*clAmdBlasZrotg)(cl_mem, size_t, cl_mem, size_t, cl_mem, size_t, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn13::switch_fn; +clAmdBlasStatus (*clAmdBlasSrotmg)(cl_mem, size_t, cl_mem, size_t, cl_mem, size_t, const cl_mem, size_t, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn15::switch_fn; +clAmdBlasStatus (*clAmdBlasDrotmg)(cl_mem, size_t, cl_mem, size_t, cl_mem, size_t, const cl_mem, size_t, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn15::switch_fn; +clAmdBlasStatus (*clAmdBlasSrot)(size_t, cl_mem, size_t, int, cl_mem, size_t, int, cl_float, cl_float, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn14::switch_fn; +clAmdBlasStatus (*clAmdBlasDrot)(size_t, cl_mem, size_t, int, cl_mem, size_t, int, cl_double, cl_double, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn14::switch_fn; +clAmdBlasStatus (*clAmdBlasCsrot)(size_t, cl_mem, size_t, int, cl_mem, size_t, int, cl_float, cl_float, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn14::switch_fn; +clAmdBlasStatus (*clAmdBlasZdrot)(size_t, cl_mem, size_t, int, cl_mem, size_t, int, cl_double, cl_double, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn14::switch_fn; +clAmdBlasStatus (*clAmdBlasSrotm)(size_t, cl_mem, size_t, int, cl_mem, size_t, int, const cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn14::switch_fn; +clAmdBlasStatus (*clAmdBlasDrotm)(size_t, cl_mem, size_t, int, cl_mem, size_t, int, const cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn14::switch_fn; +clAmdBlasStatus (*clAmdBlasSnrm2)(size_t, cl_mem, size_t, const cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn12::switch_fn; +clAmdBlasStatus (*clAmdBlasDnrm2)(size_t, cl_mem, size_t, const cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn12::switch_fn; +clAmdBlasStatus (*clAmdBlasScnrm2)(size_t, cl_mem, size_t, const cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn12::switch_fn; +clAmdBlasStatus (*clAmdBlasDznrm2)(size_t, cl_mem, size_t, const cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn12::switch_fn; +clAmdBlasStatus (*clAmdBlasiSamax)(size_t, cl_mem, size_t, const cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn12::switch_fn; +clAmdBlasStatus (*clAmdBlasiDamax)(size_t, cl_mem, size_t, const cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn12::switch_fn; +clAmdBlasStatus (*clAmdBlasiCamax)(size_t, cl_mem, size_t, const cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn12::switch_fn; +clAmdBlasStatus (*clAmdBlasiZamax)(size_t, cl_mem, size_t, const cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn12::switch_fn; +clAmdBlasStatus (*clAmdBlasSasum)(size_t, cl_mem, size_t, const cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn12::switch_fn; +clAmdBlasStatus (*clAmdBlasDasum)(size_t, cl_mem, size_t, const cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn12::switch_fn; +clAmdBlasStatus (*clAmdBlasScasum)(size_t, cl_mem, size_t, const cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn12::switch_fn; +clAmdBlasStatus (*clAmdBlasDzasum)(size_t, cl_mem, size_t, const cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn12::switch_fn; +clAmdBlasStatus (*clAmdBlasSgemv)(clAmdBlasOrder, clAmdBlasTranspose, size_t, size_t, cl_float, const cl_mem, size_t, const cl_mem, size_t, int, cl_float, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn19::switch_fn; +clAmdBlasStatus (*clAmdBlasDgemv)(clAmdBlasOrder, clAmdBlasTranspose, size_t, size_t, cl_double, const cl_mem, size_t, const cl_mem, size_t, int, cl_double, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn19::switch_fn; +clAmdBlasStatus (*clAmdBlasCgemv)(clAmdBlasOrder, clAmdBlasTranspose, size_t, size_t, FloatComplex, const cl_mem, size_t, const cl_mem, size_t, int, FloatComplex, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn19::switch_fn; +clAmdBlasStatus (*clAmdBlasZgemv)(clAmdBlasOrder, clAmdBlasTranspose, size_t, size_t, DoubleComplex, const cl_mem, size_t, const cl_mem, size_t, int, DoubleComplex, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn19::switch_fn; +clAmdBlasStatus (*clAmdBlasSgemvEx)(clAmdBlasOrder, clAmdBlasTranspose, size_t, size_t, cl_float, const cl_mem, size_t, size_t, const cl_mem, size_t, int, cl_float, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn20::switch_fn; +clAmdBlasStatus (*clAmdBlasDgemvEx)(clAmdBlasOrder, clAmdBlasTranspose, size_t, size_t, cl_double, const cl_mem, size_t, size_t, const cl_mem, size_t, int, cl_double, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn20::switch_fn; +clAmdBlasStatus (*clAmdBlasCgemvEx)(clAmdBlasOrder, clAmdBlasTranspose, size_t, size_t, FloatComplex, const cl_mem, size_t, size_t, const cl_mem, size_t, int, FloatComplex, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn20::switch_fn; +clAmdBlasStatus (*clAmdBlasZgemvEx)(clAmdBlasOrder, clAmdBlasTranspose, size_t, size_t, DoubleComplex, const cl_mem, size_t, size_t, const cl_mem, size_t, int, DoubleComplex, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn20::switch_fn; +clAmdBlasStatus (*clAmdBlasSsymv)(clAmdBlasOrder, clAmdBlasUplo, size_t, cl_float, const cl_mem, size_t, const cl_mem, size_t, int, cl_float, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasDsymv)(clAmdBlasOrder, clAmdBlasUplo, size_t, cl_double, const cl_mem, size_t, const cl_mem, size_t, int, cl_double, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasSsymvEx)(clAmdBlasOrder, clAmdBlasUplo, size_t, cl_float, const cl_mem, size_t, size_t, const cl_mem, size_t, int, cl_float, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn19::switch_fn; +clAmdBlasStatus (*clAmdBlasDsymvEx)(clAmdBlasOrder, clAmdBlasUplo, size_t, cl_double, const cl_mem, size_t, size_t, const cl_mem, size_t, int, cl_double, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn19::switch_fn; +clAmdBlasStatus (*clAmdBlasChemv)(clAmdBlasOrder, clAmdBlasUplo, size_t, FloatComplex, const cl_mem, size_t, size_t, const cl_mem, size_t, int, FloatComplex, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn19::switch_fn; +clAmdBlasStatus (*clAmdBlasZhemv)(clAmdBlasOrder, clAmdBlasUplo, size_t, DoubleComplex, const cl_mem, size_t, size_t, const cl_mem, size_t, int, DoubleComplex, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn19::switch_fn; +clAmdBlasStatus (*clAmdBlasStrmv)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, const cl_mem, size_t, size_t, cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn17::switch_fn; +clAmdBlasStatus (*clAmdBlasDtrmv)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, const cl_mem, size_t, size_t, cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn17::switch_fn; +clAmdBlasStatus (*clAmdBlasCtrmv)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, const cl_mem, size_t, size_t, cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn17::switch_fn; +clAmdBlasStatus (*clAmdBlasZtrmv)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, const cl_mem, size_t, size_t, cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn17::switch_fn; +clAmdBlasStatus (*clAmdBlasStrsv)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, const cl_mem, size_t, size_t, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn16::switch_fn; +clAmdBlasStatus (*clAmdBlasDtrsv)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, const cl_mem, size_t, size_t, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn16::switch_fn; +clAmdBlasStatus (*clAmdBlasCtrsv)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, const cl_mem, size_t, size_t, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn16::switch_fn; +clAmdBlasStatus (*clAmdBlasZtrsv)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, const cl_mem, size_t, size_t, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn16::switch_fn; +clAmdBlasStatus (*clAmdBlasSger)(clAmdBlasOrder, size_t, size_t, cl_float, const cl_mem, size_t, int, const cl_mem, size_t, int, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasDger)(clAmdBlasOrder, size_t, size_t, cl_double, const cl_mem, size_t, int, const cl_mem, size_t, int, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasCgeru)(clAmdBlasOrder, size_t, size_t, cl_float2, const cl_mem, size_t, int, const cl_mem, size_t, int, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasZgeru)(clAmdBlasOrder, size_t, size_t, cl_double2, const cl_mem, size_t, int, const cl_mem, size_t, int, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasCgerc)(clAmdBlasOrder, size_t, size_t, cl_float2, const cl_mem, size_t, int, const cl_mem, size_t, int, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasZgerc)(clAmdBlasOrder, size_t, size_t, cl_double2, const cl_mem, size_t, int, const cl_mem, size_t, int, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasSsyr)(clAmdBlasOrder, clAmdBlasUplo, size_t, cl_float, const cl_mem, size_t, int, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn15::switch_fn; +clAmdBlasStatus (*clAmdBlasDsyr)(clAmdBlasOrder, clAmdBlasUplo, size_t, cl_double, const cl_mem, size_t, int, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn15::switch_fn; +clAmdBlasStatus (*clAmdBlasCher)(clAmdBlasOrder, clAmdBlasUplo, size_t, cl_float, const cl_mem, size_t, int, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn15::switch_fn; +clAmdBlasStatus (*clAmdBlasZher)(clAmdBlasOrder, clAmdBlasUplo, size_t, cl_double, const cl_mem, size_t, int, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn15::switch_fn; +clAmdBlasStatus (*clAmdBlasSsyr2)(clAmdBlasOrder, clAmdBlasUplo, size_t, cl_float, const cl_mem, size_t, int, const cl_mem, size_t, int, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasDsyr2)(clAmdBlasOrder, clAmdBlasUplo, size_t, cl_double, const cl_mem, size_t, int, const cl_mem, size_t, int, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasCher2)(clAmdBlasOrder, clAmdBlasUplo, size_t, cl_float2, const cl_mem, size_t, int, const cl_mem, size_t, int, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasZher2)(clAmdBlasOrder, clAmdBlasUplo, size_t, cl_double2, const cl_mem, size_t, int, const cl_mem, size_t, int, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasStpmv)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, const cl_mem, size_t, cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn16::switch_fn; +clAmdBlasStatus (*clAmdBlasDtpmv)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, const cl_mem, size_t, cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn16::switch_fn; +clAmdBlasStatus (*clAmdBlasCtpmv)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, const cl_mem, size_t, cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn16::switch_fn; +clAmdBlasStatus (*clAmdBlasZtpmv)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, const cl_mem, size_t, cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn16::switch_fn; +clAmdBlasStatus (*clAmdBlasStpsv)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, const cl_mem, size_t, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn15::switch_fn; +clAmdBlasStatus (*clAmdBlasDtpsv)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, const cl_mem, size_t, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn15::switch_fn; +clAmdBlasStatus (*clAmdBlasCtpsv)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, const cl_mem, size_t, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn15::switch_fn; +clAmdBlasStatus (*clAmdBlasZtpsv)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, const cl_mem, size_t, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn15::switch_fn; +clAmdBlasStatus (*clAmdBlasSspmv)(clAmdBlasOrder, clAmdBlasUplo, size_t, cl_float, const cl_mem, size_t, const cl_mem, size_t, int, cl_float, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasDspmv)(clAmdBlasOrder, clAmdBlasUplo, size_t, cl_double, const cl_mem, size_t, const cl_mem, size_t, int, cl_double, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasChpmv)(clAmdBlasOrder, clAmdBlasUplo, size_t, cl_float2, const cl_mem, size_t, const cl_mem, size_t, int, cl_float2, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasZhpmv)(clAmdBlasOrder, clAmdBlasUplo, size_t, cl_double2, const cl_mem, size_t, const cl_mem, size_t, int, cl_double2, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasSspr)(clAmdBlasOrder, clAmdBlasUplo, size_t, cl_float, const cl_mem, size_t, int, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn14::switch_fn; +clAmdBlasStatus (*clAmdBlasDspr)(clAmdBlasOrder, clAmdBlasUplo, size_t, cl_double, const cl_mem, size_t, int, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn14::switch_fn; +clAmdBlasStatus (*clAmdBlasChpr)(clAmdBlasOrder, clAmdBlasUplo, size_t, cl_float, const cl_mem, size_t, int, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn14::switch_fn; +clAmdBlasStatus (*clAmdBlasZhpr)(clAmdBlasOrder, clAmdBlasUplo, size_t, cl_double, const cl_mem, size_t, int, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn14::switch_fn; +clAmdBlasStatus (*clAmdBlasSspr2)(clAmdBlasOrder, clAmdBlasUplo, size_t, cl_float, const cl_mem, size_t, int, const cl_mem, size_t, int, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn17::switch_fn; +clAmdBlasStatus (*clAmdBlasDspr2)(clAmdBlasOrder, clAmdBlasUplo, size_t, cl_double, const cl_mem, size_t, int, const cl_mem, size_t, int, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn17::switch_fn; +clAmdBlasStatus (*clAmdBlasChpr2)(clAmdBlasOrder, clAmdBlasUplo, size_t, cl_float2, const cl_mem, size_t, int, const cl_mem, size_t, int, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn17::switch_fn; +clAmdBlasStatus (*clAmdBlasZhpr2)(clAmdBlasOrder, clAmdBlasUplo, size_t, cl_double2, const cl_mem, size_t, int, const cl_mem, size_t, int, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn17::switch_fn; +clAmdBlasStatus (*clAmdBlasSgbmv)(clAmdBlasOrder, clAmdBlasTranspose, size_t, size_t, size_t, size_t, cl_float, const cl_mem, size_t, size_t, const cl_mem, size_t, int, cl_float, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn22::switch_fn; +clAmdBlasStatus (*clAmdBlasDgbmv)(clAmdBlasOrder, clAmdBlasTranspose, size_t, size_t, size_t, size_t, cl_double, const cl_mem, size_t, size_t, const cl_mem, size_t, int, cl_double, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn22::switch_fn; +clAmdBlasStatus (*clAmdBlasCgbmv)(clAmdBlasOrder, clAmdBlasTranspose, size_t, size_t, size_t, size_t, cl_float2, const cl_mem, size_t, size_t, const cl_mem, size_t, int, cl_float2, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn22::switch_fn; +clAmdBlasStatus (*clAmdBlasZgbmv)(clAmdBlasOrder, clAmdBlasTranspose, size_t, size_t, size_t, size_t, cl_double2, const cl_mem, size_t, size_t, const cl_mem, size_t, int, cl_double2, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn22::switch_fn; +clAmdBlasStatus (*clAmdBlasStbmv)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, size_t, const cl_mem, size_t, size_t, cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasDtbmv)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, size_t, const cl_mem, size_t, size_t, cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasCtbmv)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, size_t, const cl_mem, size_t, size_t, cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasZtbmv)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, size_t, const cl_mem, size_t, size_t, cl_mem, size_t, int, cl_mem, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasSsbmv)(clAmdBlasOrder, clAmdBlasUplo, size_t, size_t, cl_float, const cl_mem, size_t, size_t, const cl_mem, size_t, int, cl_float, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn20::switch_fn; +clAmdBlasStatus (*clAmdBlasDsbmv)(clAmdBlasOrder, clAmdBlasUplo, size_t, size_t, cl_double, const cl_mem, size_t, size_t, const cl_mem, size_t, int, cl_double, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn20::switch_fn; +clAmdBlasStatus (*clAmdBlasChbmv)(clAmdBlasOrder, clAmdBlasUplo, size_t, size_t, cl_float2, const cl_mem, size_t, size_t, const cl_mem, size_t, int, cl_float2, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn20::switch_fn; +clAmdBlasStatus (*clAmdBlasZhbmv)(clAmdBlasOrder, clAmdBlasUplo, size_t, size_t, cl_double2, const cl_mem, size_t, size_t, const cl_mem, size_t, int, cl_double2, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn20::switch_fn; +clAmdBlasStatus (*clAmdBlasStbsv)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, size_t, const cl_mem, size_t, size_t, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn17::switch_fn; +clAmdBlasStatus (*clAmdBlasDtbsv)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, size_t, const cl_mem, size_t, size_t, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn17::switch_fn; +clAmdBlasStatus (*clAmdBlasCtbsv)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, size_t, const cl_mem, size_t, size_t, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn17::switch_fn; +clAmdBlasStatus (*clAmdBlasZtbsv)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, size_t, const cl_mem, size_t, size_t, cl_mem, size_t, int, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn17::switch_fn; +clAmdBlasStatus (*clAmdBlasSgemm)(clAmdBlasOrder, clAmdBlasTranspose, clAmdBlasTranspose, size_t, size_t, size_t, cl_float, const cl_mem, size_t, const cl_mem, size_t, cl_float, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn19::switch_fn; +clAmdBlasStatus (*clAmdBlasDgemm)(clAmdBlasOrder, clAmdBlasTranspose, clAmdBlasTranspose, size_t, size_t, size_t, cl_double, const cl_mem, size_t, const cl_mem, size_t, cl_double, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn19::switch_fn; +clAmdBlasStatus (*clAmdBlasCgemm)(clAmdBlasOrder, clAmdBlasTranspose, clAmdBlasTranspose, size_t, size_t, size_t, FloatComplex, const cl_mem, size_t, const cl_mem, size_t, FloatComplex, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn19::switch_fn; +clAmdBlasStatus (*clAmdBlasZgemm)(clAmdBlasOrder, clAmdBlasTranspose, clAmdBlasTranspose, size_t, size_t, size_t, DoubleComplex, const cl_mem, size_t, const cl_mem, size_t, DoubleComplex, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn19::switch_fn; +clAmdBlasStatus (*clAmdBlasSgemmEx)(clAmdBlasOrder, clAmdBlasTranspose, clAmdBlasTranspose, size_t, size_t, size_t, cl_float, const cl_mem, size_t, size_t, const cl_mem, size_t, size_t, cl_float, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn22::switch_fn; +clAmdBlasStatus (*clAmdBlasDgemmEx)(clAmdBlasOrder, clAmdBlasTranspose, clAmdBlasTranspose, size_t, size_t, size_t, cl_double, const cl_mem, size_t, size_t, const cl_mem, size_t, size_t, cl_double, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn22::switch_fn; +clAmdBlasStatus (*clAmdBlasCgemmEx)(clAmdBlasOrder, clAmdBlasTranspose, clAmdBlasTranspose, size_t, size_t, size_t, FloatComplex, const cl_mem, size_t, size_t, const cl_mem, size_t, size_t, FloatComplex, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn22::switch_fn; +clAmdBlasStatus (*clAmdBlasZgemmEx)(clAmdBlasOrder, clAmdBlasTranspose, clAmdBlasTranspose, size_t, size_t, size_t, DoubleComplex, const cl_mem, size_t, size_t, const cl_mem, size_t, size_t, DoubleComplex, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn22::switch_fn; +clAmdBlasStatus (*clAmdBlasStrmm)(clAmdBlasOrder, clAmdBlasSide, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, size_t, cl_float, const cl_mem, size_t, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn17::switch_fn; +clAmdBlasStatus (*clAmdBlasDtrmm)(clAmdBlasOrder, clAmdBlasSide, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, size_t, cl_double, const cl_mem, size_t, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn17::switch_fn; +clAmdBlasStatus (*clAmdBlasCtrmm)(clAmdBlasOrder, clAmdBlasSide, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, size_t, FloatComplex, const cl_mem, size_t, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn17::switch_fn; +clAmdBlasStatus (*clAmdBlasZtrmm)(clAmdBlasOrder, clAmdBlasSide, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, size_t, DoubleComplex, const cl_mem, size_t, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn17::switch_fn; +clAmdBlasStatus (*clAmdBlasStrmmEx)(clAmdBlasOrder, clAmdBlasSide, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, size_t, cl_float, const cl_mem, size_t, size_t, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn19::switch_fn; +clAmdBlasStatus (*clAmdBlasDtrmmEx)(clAmdBlasOrder, clAmdBlasSide, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, size_t, cl_double, const cl_mem, size_t, size_t, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn19::switch_fn; +clAmdBlasStatus (*clAmdBlasCtrmmEx)(clAmdBlasOrder, clAmdBlasSide, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, size_t, FloatComplex, const cl_mem, size_t, size_t, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn19::switch_fn; +clAmdBlasStatus (*clAmdBlasZtrmmEx)(clAmdBlasOrder, clAmdBlasSide, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, size_t, DoubleComplex, const cl_mem, size_t, size_t, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn19::switch_fn; +clAmdBlasStatus (*clAmdBlasStrsm)(clAmdBlasOrder, clAmdBlasSide, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, size_t, cl_float, const cl_mem, size_t, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn17::switch_fn; +clAmdBlasStatus (*clAmdBlasDtrsm)(clAmdBlasOrder, clAmdBlasSide, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, size_t, cl_double, const cl_mem, size_t, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn17::switch_fn; +clAmdBlasStatus (*clAmdBlasCtrsm)(clAmdBlasOrder, clAmdBlasSide, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, size_t, FloatComplex, const cl_mem, size_t, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn17::switch_fn; +clAmdBlasStatus (*clAmdBlasZtrsm)(clAmdBlasOrder, clAmdBlasSide, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, size_t, DoubleComplex, const cl_mem, size_t, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn17::switch_fn; +clAmdBlasStatus (*clAmdBlasStrsmEx)(clAmdBlasOrder, clAmdBlasSide, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, size_t, cl_float, const cl_mem, size_t, size_t, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn19::switch_fn; +clAmdBlasStatus (*clAmdBlasDtrsmEx)(clAmdBlasOrder, clAmdBlasSide, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, size_t, cl_double, const cl_mem, size_t, size_t, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn19::switch_fn; +clAmdBlasStatus (*clAmdBlasCtrsmEx)(clAmdBlasOrder, clAmdBlasSide, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, size_t, FloatComplex, const cl_mem, size_t, size_t, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn19::switch_fn; +clAmdBlasStatus (*clAmdBlasZtrsmEx)(clAmdBlasOrder, clAmdBlasSide, clAmdBlasUplo, clAmdBlasTranspose, clAmdBlasDiag, size_t, size_t, DoubleComplex, const cl_mem, size_t, size_t, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn19::switch_fn; +clAmdBlasStatus (*clAmdBlasSsyrk)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, size_t, size_t, cl_float, const cl_mem, size_t, cl_float, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn16::switch_fn; +clAmdBlasStatus (*clAmdBlasDsyrk)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, size_t, size_t, cl_double, const cl_mem, size_t, cl_double, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn16::switch_fn; +clAmdBlasStatus (*clAmdBlasCsyrk)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, size_t, size_t, FloatComplex, const cl_mem, size_t, FloatComplex, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn16::switch_fn; +clAmdBlasStatus (*clAmdBlasZsyrk)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, size_t, size_t, DoubleComplex, const cl_mem, size_t, DoubleComplex, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn16::switch_fn; +clAmdBlasStatus (*clAmdBlasSsyrkEx)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, size_t, size_t, cl_float, const cl_mem, size_t, size_t, cl_float, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasDsyrkEx)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, size_t, size_t, cl_double, const cl_mem, size_t, size_t, cl_double, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasCsyrkEx)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, size_t, size_t, FloatComplex, const cl_mem, size_t, size_t, FloatComplex, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasZsyrkEx)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, size_t, size_t, DoubleComplex, const cl_mem, size_t, size_t, DoubleComplex, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasSsyr2k)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, size_t, size_t, cl_float, const cl_mem, size_t, const cl_mem, size_t, cl_float, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasDsyr2k)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, size_t, size_t, cl_double, const cl_mem, size_t, const cl_mem, size_t, cl_double, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasCsyr2k)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, size_t, size_t, FloatComplex, const cl_mem, size_t, const cl_mem, size_t, FloatComplex, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasZsyr2k)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, size_t, size_t, DoubleComplex, const cl_mem, size_t, const cl_mem, size_t, DoubleComplex, cl_mem, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasSsyr2kEx)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, size_t, size_t, cl_float, const cl_mem, size_t, size_t, const cl_mem, size_t, size_t, cl_float, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn21::switch_fn; +clAmdBlasStatus (*clAmdBlasDsyr2kEx)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, size_t, size_t, cl_double, const cl_mem, size_t, size_t, const cl_mem, size_t, size_t, cl_double, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn21::switch_fn; +clAmdBlasStatus (*clAmdBlasCsyr2kEx)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, size_t, size_t, FloatComplex, const cl_mem, size_t, size_t, const cl_mem, size_t, size_t, FloatComplex, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn21::switch_fn; +clAmdBlasStatus (*clAmdBlasZsyr2kEx)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, size_t, size_t, DoubleComplex, const cl_mem, size_t, size_t, const cl_mem, size_t, size_t, DoubleComplex, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn21::switch_fn; +clAmdBlasStatus (*clAmdBlasSsymm)(clAmdBlasOrder, clAmdBlasSide, clAmdBlasUplo, size_t, size_t, cl_float, const cl_mem, size_t, size_t, const cl_mem, size_t, size_t, cl_float, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn21::switch_fn; +clAmdBlasStatus (*clAmdBlasDsymm)(clAmdBlasOrder, clAmdBlasSide, clAmdBlasUplo, size_t, size_t, cl_double, const cl_mem, size_t, size_t, const cl_mem, size_t, size_t, cl_double, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn21::switch_fn; +clAmdBlasStatus (*clAmdBlasCsymm)(clAmdBlasOrder, clAmdBlasSide, clAmdBlasUplo, size_t, size_t, cl_float2, const cl_mem, size_t, size_t, const cl_mem, size_t, size_t, cl_float2, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn21::switch_fn; +clAmdBlasStatus (*clAmdBlasZsymm)(clAmdBlasOrder, clAmdBlasSide, clAmdBlasUplo, size_t, size_t, cl_double2, const cl_mem, size_t, size_t, const cl_mem, size_t, size_t, cl_double2, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn21::switch_fn; +clAmdBlasStatus (*clAmdBlasChemm)(clAmdBlasOrder, clAmdBlasSide, clAmdBlasUplo, size_t, size_t, cl_float2, const cl_mem, size_t, size_t, const cl_mem, size_t, size_t, cl_float2, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn21::switch_fn; +clAmdBlasStatus (*clAmdBlasZhemm)(clAmdBlasOrder, clAmdBlasSide, clAmdBlasUplo, size_t, size_t, cl_double2, const cl_mem, size_t, size_t, const cl_mem, size_t, size_t, cl_double2, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn21::switch_fn; +clAmdBlasStatus (*clAmdBlasCherk)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, size_t, size_t, float, const cl_mem, size_t, size_t, float, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasZherk)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, size_t, size_t, double, const cl_mem, size_t, size_t, double, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn18::switch_fn; +clAmdBlasStatus (*clAmdBlasCher2k)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, size_t, size_t, FloatComplex, const cl_mem, size_t, size_t, const cl_mem, size_t, size_t, cl_float, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn21::switch_fn; +clAmdBlasStatus (*clAmdBlasZher2k)(clAmdBlasOrder, clAmdBlasUplo, clAmdBlasTranspose, size_t, size_t, DoubleComplex, const cl_mem, size_t, size_t, const cl_mem, size_t, size_t, cl_double, cl_mem, size_t, size_t, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*) = openclamdblas_fn21::switch_fn; + +// generated by parser_clamdblas.py +void* openclamdblas_fn_ptrs[] = { + &clAmdBlasGetVersion, + &clAmdBlasSetup, + &clAmdBlasTeardown, + &clAmdBlasAddScratchImage, + &clAmdBlasRemoveScratchImage, + &clAmdBlasSswap, + &clAmdBlasDswap, + &clAmdBlasCswap, + &clAmdBlasZswap, + &clAmdBlasSscal, + &clAmdBlasDscal, + &clAmdBlasCscal, + &clAmdBlasZscal, + &clAmdBlasCsscal, + &clAmdBlasZdscal, + &clAmdBlasScopy, + &clAmdBlasDcopy, + &clAmdBlasCcopy, + &clAmdBlasZcopy, + &clAmdBlasSaxpy, + &clAmdBlasDaxpy, + &clAmdBlasCaxpy, + &clAmdBlasZaxpy, + &clAmdBlasSdot, + &clAmdBlasDdot, + &clAmdBlasCdotu, + &clAmdBlasZdotu, + &clAmdBlasCdotc, + &clAmdBlasZdotc, + &clAmdBlasSrotg, + &clAmdBlasDrotg, + &clAmdBlasCrotg, + &clAmdBlasZrotg, + &clAmdBlasSrotmg, + &clAmdBlasDrotmg, + &clAmdBlasSrot, + &clAmdBlasDrot, + &clAmdBlasCsrot, + &clAmdBlasZdrot, + &clAmdBlasSrotm, + &clAmdBlasDrotm, + &clAmdBlasSnrm2, + &clAmdBlasDnrm2, + &clAmdBlasScnrm2, + &clAmdBlasDznrm2, + &clAmdBlasiSamax, + &clAmdBlasiDamax, + &clAmdBlasiCamax, + &clAmdBlasiZamax, + &clAmdBlasSasum, + &clAmdBlasDasum, + &clAmdBlasScasum, + &clAmdBlasDzasum, + &clAmdBlasSgemv, + &clAmdBlasDgemv, + &clAmdBlasCgemv, + &clAmdBlasZgemv, + &clAmdBlasSgemvEx, + &clAmdBlasDgemvEx, + &clAmdBlasCgemvEx, + &clAmdBlasZgemvEx, + &clAmdBlasSsymv, + &clAmdBlasDsymv, + &clAmdBlasSsymvEx, + &clAmdBlasDsymvEx, + &clAmdBlasChemv, + &clAmdBlasZhemv, + &clAmdBlasStrmv, + &clAmdBlasDtrmv, + &clAmdBlasCtrmv, + &clAmdBlasZtrmv, + &clAmdBlasStrsv, + &clAmdBlasDtrsv, + &clAmdBlasCtrsv, + &clAmdBlasZtrsv, + &clAmdBlasSger, + &clAmdBlasDger, + &clAmdBlasCgeru, + &clAmdBlasZgeru, + &clAmdBlasCgerc, + &clAmdBlasZgerc, + &clAmdBlasSsyr, + &clAmdBlasDsyr, + &clAmdBlasCher, + &clAmdBlasZher, + &clAmdBlasSsyr2, + &clAmdBlasDsyr2, + &clAmdBlasCher2, + &clAmdBlasZher2, + &clAmdBlasStpmv, + &clAmdBlasDtpmv, + &clAmdBlasCtpmv, + &clAmdBlasZtpmv, + &clAmdBlasStpsv, + &clAmdBlasDtpsv, + &clAmdBlasCtpsv, + &clAmdBlasZtpsv, + &clAmdBlasSspmv, + &clAmdBlasDspmv, + &clAmdBlasChpmv, + &clAmdBlasZhpmv, + &clAmdBlasSspr, + &clAmdBlasDspr, + &clAmdBlasChpr, + &clAmdBlasZhpr, + &clAmdBlasSspr2, + &clAmdBlasDspr2, + &clAmdBlasChpr2, + &clAmdBlasZhpr2, + &clAmdBlasSgbmv, + &clAmdBlasDgbmv, + &clAmdBlasCgbmv, + &clAmdBlasZgbmv, + &clAmdBlasStbmv, + &clAmdBlasDtbmv, + &clAmdBlasCtbmv, + &clAmdBlasZtbmv, + &clAmdBlasSsbmv, + &clAmdBlasDsbmv, + &clAmdBlasChbmv, + &clAmdBlasZhbmv, + &clAmdBlasStbsv, + &clAmdBlasDtbsv, + &clAmdBlasCtbsv, + &clAmdBlasZtbsv, + &clAmdBlasSgemm, + &clAmdBlasDgemm, + &clAmdBlasCgemm, + &clAmdBlasZgemm, + &clAmdBlasSgemmEx, + &clAmdBlasDgemmEx, + &clAmdBlasCgemmEx, + &clAmdBlasZgemmEx, + &clAmdBlasStrmm, + &clAmdBlasDtrmm, + &clAmdBlasCtrmm, + &clAmdBlasZtrmm, + &clAmdBlasStrmmEx, + &clAmdBlasDtrmmEx, + &clAmdBlasCtrmmEx, + &clAmdBlasZtrmmEx, + &clAmdBlasStrsm, + &clAmdBlasDtrsm, + &clAmdBlasCtrsm, + &clAmdBlasZtrsm, + &clAmdBlasStrsmEx, + &clAmdBlasDtrsmEx, + &clAmdBlasCtrsmEx, + &clAmdBlasZtrsmEx, + &clAmdBlasSsyrk, + &clAmdBlasDsyrk, + &clAmdBlasCsyrk, + &clAmdBlasZsyrk, + &clAmdBlasSsyrkEx, + &clAmdBlasDsyrkEx, + &clAmdBlasCsyrkEx, + &clAmdBlasZsyrkEx, + &clAmdBlasSsyr2k, + &clAmdBlasDsyr2k, + &clAmdBlasCsyr2k, + &clAmdBlasZsyr2k, + &clAmdBlasSsyr2kEx, + &clAmdBlasDsyr2kEx, + &clAmdBlasCsyr2kEx, + &clAmdBlasZsyr2kEx, + &clAmdBlasSsymm, + &clAmdBlasDsymm, + &clAmdBlasCsymm, + &clAmdBlasZsymm, + &clAmdBlasChemm, + &clAmdBlasZhemm, + &clAmdBlasCherk, + &clAmdBlasZherk, + &clAmdBlasCher2k, + &clAmdBlasZher2k, +}; + +#endif diff --git a/modules/ocl/src/cl_runtime/clamdfft_runtime.cpp b/modules/ocl/src/cl_runtime/clamdfft_runtime.cpp new file mode 100644 index 0000000000..60cbecef2a --- /dev/null +++ b/modules/ocl/src/cl_runtime/clamdfft_runtime.cpp @@ -0,0 +1,396 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#include "precomp.hpp" + +#ifdef HAVE_CLAMDFFT + +#include "opencv2/ocl/cl_runtime/cl_runtime.hpp" +#include "opencv2/ocl/cl_runtime/clamdfft_runtime.hpp" + +#if defined(_WIN32) + static void* WinGetProcAddress(const char* name) + { + static HMODULE opencl_module = NULL; + if (!opencl_module) + { + opencl_module = GetModuleHandleA("clAmdFft.Runtime.dll"); + if (!opencl_module) + { + opencl_module = LoadLibraryA("clAmdFft.Runtime.dll"); + if (!opencl_module) + return NULL; + } + } + return (void*)GetProcAddress(opencl_module, name); + } + #define CV_CL_GET_PROC_ADDRESS(name) WinGetProcAddress(name) +#endif // _WIN32 + +#if defined(linux) + #include + #include + + static void* GetProcAddress (const char* name) + { + static void* h = NULL; + if (!h) + { + h = dlopen("libclAmdFft.Runtime.so", RTLD_LAZY | RTLD_GLOBAL); + if (!h) + return NULL; + } + + return dlsym(h, name); + } + #define CV_CL_GET_PROC_ADDRESS(name) GetProcAddress(name) +#endif + +#ifndef CV_CL_GET_PROC_ADDRESS +#define CV_CL_GET_PROC_ADDRESS(name) NULL +#endif + +// generated by parser_clamdfft.py +enum OPENCLAMDFFT_FN_ID { + OPENCLAMDFFT_FN_clAmdFftSetup = 0, + OPENCLAMDFFT_FN_clAmdFftTeardown, + OPENCLAMDFFT_FN_clAmdFftGetVersion, + OPENCLAMDFFT_FN_clAmdFftCreateDefaultPlan, + OPENCLAMDFFT_FN_clAmdFftCopyPlan, + OPENCLAMDFFT_FN_clAmdFftBakePlan, + OPENCLAMDFFT_FN_clAmdFftDestroyPlan, + OPENCLAMDFFT_FN_clAmdFftGetPlanContext, + OPENCLAMDFFT_FN_clAmdFftGetPlanPrecision, + OPENCLAMDFFT_FN_clAmdFftSetPlanPrecision, + OPENCLAMDFFT_FN_clAmdFftGetPlanScale, + OPENCLAMDFFT_FN_clAmdFftSetPlanScale, + OPENCLAMDFFT_FN_clAmdFftGetPlanBatchSize, + OPENCLAMDFFT_FN_clAmdFftSetPlanBatchSize, + OPENCLAMDFFT_FN_clAmdFftGetPlanDim, + OPENCLAMDFFT_FN_clAmdFftSetPlanDim, + OPENCLAMDFFT_FN_clAmdFftGetPlanLength, + OPENCLAMDFFT_FN_clAmdFftSetPlanLength, + OPENCLAMDFFT_FN_clAmdFftGetPlanInStride, + OPENCLAMDFFT_FN_clAmdFftSetPlanInStride, + OPENCLAMDFFT_FN_clAmdFftGetPlanOutStride, + OPENCLAMDFFT_FN_clAmdFftSetPlanOutStride, + OPENCLAMDFFT_FN_clAmdFftGetPlanDistance, + OPENCLAMDFFT_FN_clAmdFftSetPlanDistance, + OPENCLAMDFFT_FN_clAmdFftGetLayout, + OPENCLAMDFFT_FN_clAmdFftSetLayout, + OPENCLAMDFFT_FN_clAmdFftGetResultLocation, + OPENCLAMDFFT_FN_clAmdFftSetResultLocation, + OPENCLAMDFFT_FN_clAmdFftGetPlanTransposeResult, + OPENCLAMDFFT_FN_clAmdFftSetPlanTransposeResult, + OPENCLAMDFFT_FN_clAmdFftGetTmpBufSize, + OPENCLAMDFFT_FN_clAmdFftEnqueueTransform, +}; +// generated by parser_clamdfft.py +const char* openclamdfft_fn_names[] = { + "clAmdFftSetup", + "clAmdFftTeardown", + "clAmdFftGetVersion", + "clAmdFftCreateDefaultPlan", + "clAmdFftCopyPlan", + "clAmdFftBakePlan", + "clAmdFftDestroyPlan", + "clAmdFftGetPlanContext", + "clAmdFftGetPlanPrecision", + "clAmdFftSetPlanPrecision", + "clAmdFftGetPlanScale", + "clAmdFftSetPlanScale", + "clAmdFftGetPlanBatchSize", + "clAmdFftSetPlanBatchSize", + "clAmdFftGetPlanDim", + "clAmdFftSetPlanDim", + "clAmdFftGetPlanLength", + "clAmdFftSetPlanLength", + "clAmdFftGetPlanInStride", + "clAmdFftSetPlanInStride", + "clAmdFftGetPlanOutStride", + "clAmdFftSetPlanOutStride", + "clAmdFftGetPlanDistance", + "clAmdFftSetPlanDistance", + "clAmdFftGetLayout", + "clAmdFftSetLayout", + "clAmdFftGetResultLocation", + "clAmdFftSetResultLocation", + "clAmdFftGetPlanTransposeResult", + "clAmdFftSetPlanTransposeResult", + "clAmdFftGetTmpBufSize", + "clAmdFftEnqueueTransform", +}; + +static void* openclamdfft_check_fn(int ID) +{ + void* func = CV_CL_GET_PROC_ADDRESS(openclamdfft_fn_names[ID]); + if (!func) + { + std::ostringstream msg; + msg << "OpenCL AMD FFT function is not available: [" << openclamdfft_fn_names[ID] << "]"; + CV_Error(CV_StsBadFunc, msg.str()); + } + extern void* openclamdfft_fn_ptrs[]; + *(void**)(openclamdfft_fn_ptrs[ID]) = func; + return func; +} + +namespace { +// generated by parser_clamdfft.py +template +struct openclamdfft_fn0 +{ + typedef _R (*FN)(); + static _R switch_fn() + { return ((FN)openclamdfft_check_fn(ID))(); } +}; + +template +struct openclamdfft_fn1 +{ + typedef _R (*FN)(_T1); + static _R switch_fn(_T1 p1) + { return ((FN)openclamdfft_check_fn(ID))(p1); } +}; + +template +struct openclamdfft_fn2 +{ + typedef _R (*FN)(_T1, _T2); + static _R switch_fn(_T1 p1, _T2 p2) + { return ((FN)openclamdfft_check_fn(ID))(p1, p2); } +}; + +template +struct openclamdfft_fn3 +{ + typedef _R (*FN)(_T1, _T2, _T3); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3) + { return ((FN)openclamdfft_check_fn(ID))(p1, p2, p3); } +}; + +template +struct openclamdfft_fn4 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4) + { return ((FN)openclamdfft_check_fn(ID))(p1, p2, p3, p4); } +}; + +template +struct openclamdfft_fn5 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5) + { return ((FN)openclamdfft_check_fn(ID))(p1, p2, p3, p4, p5); } +}; + +template +struct openclamdfft_fn6 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6) + { return ((FN)openclamdfft_check_fn(ID))(p1, p2, p3, p4, p5, p6); } +}; + +template +struct openclamdfft_fn7 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7) + { return ((FN)openclamdfft_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7); } +}; + +template +struct openclamdfft_fn8 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8) + { return ((FN)openclamdfft_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8); } +}; + +template +struct openclamdfft_fn9 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9) + { return ((FN)openclamdfft_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9); } +}; + +template +struct openclamdfft_fn10 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10) + { return ((FN)openclamdfft_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); } +}; + +template +struct openclamdfft_fn11 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11) + { return ((FN)openclamdfft_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11); } +}; + +template +struct openclamdfft_fn12 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12) + { return ((FN)openclamdfft_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12); } +}; + +template +struct openclamdfft_fn13 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13) + { return ((FN)openclamdfft_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); } +}; + +template +struct openclamdfft_fn14 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13, _T14); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13, _T14 p14) + { return ((FN)openclamdfft_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14); } +}; + +template +struct openclamdfft_fn15 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13, _T14, _T15); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13, _T14 p14, _T15 p15) + { return ((FN)openclamdfft_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15); } +}; + +template +struct openclamdfft_fn16 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13, _T14, _T15, _T16); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13, _T14 p14, _T15 p15, _T16 p16) + { return ((FN)openclamdfft_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16); } +}; + +template +struct openclamdfft_fn17 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13, _T14, _T15, _T16, _T17); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13, _T14 p14, _T15 p15, _T16 p16, _T17 p17) + { return ((FN)openclamdfft_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, p17); } +}; + +template +struct openclamdfft_fn18 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13, _T14, _T15, _T16, _T17, _T18); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13, _T14 p14, _T15 p15, _T16 p16, _T17 p17, _T18 p18) + { return ((FN)openclamdfft_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, p17, p18); } +}; + +template +struct openclamdfft_fn19 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13, _T14, _T15, _T16, _T17, _T18, _T19); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13, _T14 p14, _T15 p15, _T16 p16, _T17 p17, _T18 p18, _T19 p19) + { return ((FN)openclamdfft_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19); } +}; + +template +struct openclamdfft_fn20 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13, _T14, _T15, _T16, _T17, _T18, _T19, _T20); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13, _T14 p14, _T15 p15, _T16 p16, _T17 p17, _T18 p18, _T19 p19, _T20 p20) + { return ((FN)openclamdfft_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20); } +}; + +template +struct openclamdfft_fn21 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13, _T14, _T15, _T16, _T17, _T18, _T19, _T20, _T21); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13, _T14 p14, _T15 p15, _T16 p16, _T17 p17, _T18 p18, _T19 p19, _T20 p20, _T21 p21) + { return ((FN)openclamdfft_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21); } +}; + +template +struct openclamdfft_fn22 +{ + typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13, _T14, _T15, _T16, _T17, _T18, _T19, _T20, _T21, _T22); + static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13, _T14 p14, _T15 p15, _T16 p16, _T17 p17, _T18 p18, _T19 p19, _T20 p20, _T21 p21, _T22 p22) + { return ((FN)openclamdfft_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, p22); } +}; + +} + +// generated by parser_clamdfft.py +clAmdFftStatus (*clAmdFftSetup)(const clAmdFftSetupData*) = openclamdfft_fn1::switch_fn; +clAmdFftStatus (*clAmdFftTeardown)() = openclamdfft_fn0::switch_fn; +clAmdFftStatus (*clAmdFftGetVersion)(cl_uint*, cl_uint*, cl_uint*) = openclamdfft_fn3::switch_fn; +clAmdFftStatus (*clAmdFftCreateDefaultPlan)(clAmdFftPlanHandle*, cl_context, const clAmdFftDim, const size_t*) = openclamdfft_fn4::switch_fn; +clAmdFftStatus (*clAmdFftCopyPlan)(clAmdFftPlanHandle*, cl_context, clAmdFftPlanHandle) = openclamdfft_fn3::switch_fn; +clAmdFftStatus (*clAmdFftBakePlan)(clAmdFftPlanHandle, cl_uint, cl_command_queue*, void (CL_CALLBACK*) (clAmdFftPlanHandle plHandle, void* user_data), void*) = openclamdfft_fn5::switch_fn; +clAmdFftStatus (*clAmdFftDestroyPlan)(clAmdFftPlanHandle*) = openclamdfft_fn1::switch_fn; +clAmdFftStatus (*clAmdFftGetPlanContext)(const clAmdFftPlanHandle, cl_context*) = openclamdfft_fn2::switch_fn; +clAmdFftStatus (*clAmdFftGetPlanPrecision)(const clAmdFftPlanHandle, clAmdFftPrecision*) = openclamdfft_fn2::switch_fn; +clAmdFftStatus (*clAmdFftSetPlanPrecision)(clAmdFftPlanHandle, clAmdFftPrecision) = openclamdfft_fn2::switch_fn; +clAmdFftStatus (*clAmdFftGetPlanScale)(const clAmdFftPlanHandle, clAmdFftDirection, cl_float*) = openclamdfft_fn3::switch_fn; +clAmdFftStatus (*clAmdFftSetPlanScale)(clAmdFftPlanHandle, clAmdFftDirection, cl_float) = openclamdfft_fn3::switch_fn; +clAmdFftStatus (*clAmdFftGetPlanBatchSize)(const clAmdFftPlanHandle, size_t*) = openclamdfft_fn2::switch_fn; +clAmdFftStatus (*clAmdFftSetPlanBatchSize)(clAmdFftPlanHandle, size_t) = openclamdfft_fn2::switch_fn; +clAmdFftStatus (*clAmdFftGetPlanDim)(const clAmdFftPlanHandle, clAmdFftDim*, cl_uint*) = openclamdfft_fn3::switch_fn; +clAmdFftStatus (*clAmdFftSetPlanDim)(clAmdFftPlanHandle, const clAmdFftDim) = openclamdfft_fn2::switch_fn; +clAmdFftStatus (*clAmdFftGetPlanLength)(const clAmdFftPlanHandle, const clAmdFftDim, size_t*) = openclamdfft_fn3::switch_fn; +clAmdFftStatus (*clAmdFftSetPlanLength)(clAmdFftPlanHandle, const clAmdFftDim, const size_t*) = openclamdfft_fn3::switch_fn; +clAmdFftStatus (*clAmdFftGetPlanInStride)(const clAmdFftPlanHandle, const clAmdFftDim, size_t*) = openclamdfft_fn3::switch_fn; +clAmdFftStatus (*clAmdFftSetPlanInStride)(clAmdFftPlanHandle, const clAmdFftDim, size_t*) = openclamdfft_fn3::switch_fn; +clAmdFftStatus (*clAmdFftGetPlanOutStride)(const clAmdFftPlanHandle, const clAmdFftDim, size_t*) = openclamdfft_fn3::switch_fn; +clAmdFftStatus (*clAmdFftSetPlanOutStride)(clAmdFftPlanHandle, const clAmdFftDim, size_t*) = openclamdfft_fn3::switch_fn; +clAmdFftStatus (*clAmdFftGetPlanDistance)(const clAmdFftPlanHandle, size_t*, size_t*) = openclamdfft_fn3::switch_fn; +clAmdFftStatus (*clAmdFftSetPlanDistance)(clAmdFftPlanHandle, size_t, size_t) = openclamdfft_fn3::switch_fn; +clAmdFftStatus (*clAmdFftGetLayout)(const clAmdFftPlanHandle, clAmdFftLayout*, clAmdFftLayout*) = openclamdfft_fn3::switch_fn; +clAmdFftStatus (*clAmdFftSetLayout)(clAmdFftPlanHandle, clAmdFftLayout, clAmdFftLayout) = openclamdfft_fn3::switch_fn; +clAmdFftStatus (*clAmdFftGetResultLocation)(const clAmdFftPlanHandle, clAmdFftResultLocation*) = openclamdfft_fn2::switch_fn; +clAmdFftStatus (*clAmdFftSetResultLocation)(clAmdFftPlanHandle, clAmdFftResultLocation) = openclamdfft_fn2::switch_fn; +clAmdFftStatus (*clAmdFftGetPlanTransposeResult)(const clAmdFftPlanHandle, clAmdFftResultTransposed*) = openclamdfft_fn2::switch_fn; +clAmdFftStatus (*clAmdFftSetPlanTransposeResult)(clAmdFftPlanHandle, clAmdFftResultTransposed) = openclamdfft_fn2::switch_fn; +clAmdFftStatus (*clAmdFftGetTmpBufSize)(const clAmdFftPlanHandle, size_t*) = openclamdfft_fn2::switch_fn; +clAmdFftStatus (*clAmdFftEnqueueTransform)(clAmdFftPlanHandle, clAmdFftDirection, cl_uint, cl_command_queue*, cl_uint, const cl_event*, cl_event*, cl_mem*, cl_mem*, cl_mem) = openclamdfft_fn10::switch_fn; + +// generated by parser_clamdfft.py +void* openclamdfft_fn_ptrs[] = { + &clAmdFftSetup, + &clAmdFftTeardown, + &clAmdFftGetVersion, + &clAmdFftCreateDefaultPlan, + &clAmdFftCopyPlan, + &clAmdFftBakePlan, + &clAmdFftDestroyPlan, + &clAmdFftGetPlanContext, + &clAmdFftGetPlanPrecision, + &clAmdFftSetPlanPrecision, + &clAmdFftGetPlanScale, + &clAmdFftSetPlanScale, + &clAmdFftGetPlanBatchSize, + &clAmdFftSetPlanBatchSize, + &clAmdFftGetPlanDim, + &clAmdFftSetPlanDim, + &clAmdFftGetPlanLength, + &clAmdFftSetPlanLength, + &clAmdFftGetPlanInStride, + &clAmdFftSetPlanInStride, + &clAmdFftGetPlanOutStride, + &clAmdFftSetPlanOutStride, + &clAmdFftGetPlanDistance, + &clAmdFftSetPlanDistance, + &clAmdFftGetLayout, + &clAmdFftSetLayout, + &clAmdFftGetResultLocation, + &clAmdFftSetResultLocation, + &clAmdFftGetPlanTransposeResult, + &clAmdFftSetPlanTransposeResult, + &clAmdFftGetTmpBufSize, + &clAmdFftEnqueueTransform, +}; + +#endif From 12eb340a4e3898c704cff3b56a12b69cd35204bb Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 2 Oct 2013 19:35:21 +0400 Subject: [PATCH 19/39] removed invalid comment --- modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime.hpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime.hpp b/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime.hpp index 7b31f457e7..3e62edec92 100644 --- a/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime.hpp +++ b/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime.hpp @@ -1,6 +1,3 @@ -// -// AUTOGENERATED, DO NOT EDIT -// #ifndef __OPENCV_OCL_CL_RUNTIME_HPP__ #define __OPENCV_OCL_CL_RUNTIME_HPP__ From adca219f18570e08b4c9ad81463b8ca4854e7d8a Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 2 Oct 2013 23:21:28 +0400 Subject: [PATCH 20/39] fixed convertC3C4 and convertC4C3 functions in case cols == 1 --- modules/ocl/src/matrix_operations.cpp | 147 +++++---------------- modules/ocl/src/opencl/convertC3C4.cl | 46 ++++--- modules/ocl/test/test_matrix_operation.cpp | 2 +- 3 files changed, 59 insertions(+), 136 deletions(-) diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp index 78d1cd4afb..3ae14eb48d 100644 --- a/modules/ocl/src/matrix_operations.cpp +++ b/modules/ocl/src/matrix_operations.cpp @@ -58,12 +58,13 @@ using namespace std; //////////////////////////////// oclMat //////////////////////////////// //////////////////////////////////////////////////////////////////////// -//helper routines +// helper routines namespace cv { namespace ocl { - ///////////////////////////OpenCL kernel strings/////////////////////////// + /////////////////////////// OpenCL kernel strings /////////////////////////// + extern const char *operator_copyToM; extern const char *operator_convertTo; extern const char *operator_setTo; @@ -74,42 +75,18 @@ namespace cv } } - //////////////////////////////////////////////////////////////////////// // convert_C3C4 + static void convert_C3C4(const cl_mem &src, oclMat &dst) { - int dstStep_in_pixel = dst.step1() / dst.oclchannels(); - int pixel_end = dst.wholecols * dst.wholerows - 1; Context *clCxt = dst.clCxt; - string kernelName = "convertC3C4"; - char compile_option[32]; - switch(dst.depth()) - { - case 0: - sprintf(compile_option, "-D GENTYPE4=uchar4"); - break; - case 1: - sprintf(compile_option, "-D GENTYPE4=char4"); - break; - case 2: - sprintf(compile_option, "-D GENTYPE4=ushort4"); - break; - case 3: - sprintf(compile_option, "-D GENTYPE4=short4"); - break; - case 4: - sprintf(compile_option, "-D GENTYPE4=int4"); - break; - case 5: - sprintf(compile_option, "-D GENTYPE4=float4"); - break; - case 6: - sprintf(compile_option, "-D GENTYPE4=double4"); - break; - default: - CV_Error(CV_StsUnsupportedFormat, "unknown depth"); - } + int pixel_end = dst.wholecols * dst.wholerows - 1; + int dstStep_in_pixel = dst.step1() / dst.oclchannels(); + + const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; + std::string buildOptions = format("-D GENTYPE4=%s4", typeMap[dst.depth()]); + vector< pair > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src)); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data)); @@ -118,46 +95,24 @@ static void convert_C3C4(const cl_mem &src, oclMat &dst) args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep_in_pixel)); args.push_back( make_pair( sizeof(cl_int), (void *)&pixel_end)); - size_t globalThreads[3] = {((dst.wholecols * dst.wholerows + 3) / 4 + 255) / 256 * 256, 1, 1}; - size_t localThreads[3] = {256, 1, 1}; + size_t globalThreads[3] = { divUp(dst.wholecols * dst.wholerows, 4), 1, 1 }; + size_t localThreads[3] = { 256, 1, 1 }; - openCLExecuteKernel(clCxt, &convertC3C4, kernelName, globalThreads, localThreads, args, -1, -1, compile_option); + openCLExecuteKernel(clCxt, &convertC3C4, "convertC3C4", globalThreads, localThreads, + args, -1, -1, buildOptions.c_str()); } + //////////////////////////////////////////////////////////////////////// // convert_C4C3 + static void convert_C4C3(const oclMat &src, cl_mem &dst) { int srcStep_in_pixel = src.step1() / src.oclchannels(); int pixel_end = src.wholecols * src.wholerows - 1; Context *clCxt = src.clCxt; - string kernelName = "convertC4C3"; - char compile_option[32]; - switch(src.depth()) - { - case 0: - sprintf(compile_option, "-D GENTYPE4=uchar4"); - break; - case 1: - sprintf(compile_option, "-D GENTYPE4=char4"); - break; - case 2: - sprintf(compile_option, "-D GENTYPE4=ushort4"); - break; - case 3: - sprintf(compile_option, "-D GENTYPE4=short4"); - break; - case 4: - sprintf(compile_option, "-D GENTYPE4=int4"); - break; - case 5: - sprintf(compile_option, "-D GENTYPE4=float4"); - break; - case 6: - sprintf(compile_option, "-D GENTYPE4=double4"); - break; - default: - CV_Error(CV_StsUnsupportedFormat, "unknown depth"); - } + + const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; + std::string buildOptions = format("-D GENTYPE4=%s4", typeMap[src.depth()]); vector< pair > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); @@ -167,10 +122,10 @@ static void convert_C4C3(const oclMat &src, cl_mem &dst) args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep_in_pixel)); args.push_back( make_pair( sizeof(cl_int), (void *)&pixel_end)); - size_t globalThreads[3] = {((src.wholecols * src.wholerows + 3) / 4 + 255) / 256 * 256, 1, 1}; - size_t localThreads[3] = {256, 1, 1}; + size_t globalThreads[3] = { divUp(src.wholecols * src.wholerows, 4), 1, 1}; + size_t localThreads[3] = { 256, 1, 1 }; - openCLExecuteKernel(clCxt, &convertC3C4, kernelName, globalThreads, localThreads, args, -1, -1, compile_option); + openCLExecuteKernel(clCxt, &convertC3C4, "convertC4C3", globalThreads, localThreads, args, -1, -1, buildOptions.c_str()); } void cv::ocl::oclMat::upload(const Mat &m) @@ -179,14 +134,10 @@ void cv::ocl::oclMat::upload(const Mat &m) Size wholeSize; Point ofs; m.locateROI(wholeSize, ofs); - // int type = m.type(); - // if(m.oclchannels() == 3) - //{ - // type = CV_MAKETYPE(m.depth(), 4); - //} + create(wholeSize, m.type()); - if(m.channels() == 3) + if (m.channels() == 3) { int pitch = wholeSize.width * 3 * m.elemSize1(); int tail_padding = m.elemSize1() * 3072; @@ -197,35 +148,15 @@ void cv::ocl::oclMat::upload(const Mat &m) openCLMemcpy2D(clCxt, temp, pitch, m.datastart, m.step, wholeSize.width * m.elemSize(), wholeSize.height, clMemcpyHostToDevice, 3); convert_C3C4(temp, *this); - //int* cputemp=new int[wholeSize.height*wholeSize.width * 3]; - //int* cpudata=new int[this->step*this->wholerows/sizeof(int)]; - //openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, temp, CL_TRUE, - // 0, wholeSize.height*wholeSize.width * 3* sizeof(int), cputemp, 0, NULL, NULL)); - //openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)data, CL_TRUE, - // 0, this->step*this->wholerows, cpudata, 0, NULL, NULL)); - //for(int i=0;istep/sizeof(int); - // for(int j=0;jempty()); - // int t = type(); - // if(download_channels == 3) - //{ - // t = CV_MAKETYPE(depth(), 3); - //} m.create(wholerows, wholecols, type()); if(m.channels() == 3) @@ -277,30 +203,14 @@ void cv::ocl::oclMat::download(cv::Mat &m) const convert_C4C3(*this, temp); openCLMemcpy2D(clCxt, m.data, m.step, temp, pitch, wholecols * m.elemSize(), wholerows, clMemcpyDeviceToHost, 3); - //int* cputemp=new int[wholecols*wholerows * 3]; - //int* cpudata=new int[this->step*this->wholerows/sizeof(int)]; - //openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, temp, CL_TRUE, - // 0, wholecols*wholerows * 3* sizeof(int), cputemp, 0, NULL, NULL)); - //openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)data, CL_TRUE, - // 0, this->step*this->wholerows, cpudata, 0, NULL, NULL)); - //for(int i=0;istep/sizeof(int); - // for(int j=0;jsupportsFeature(Context::CL_DOUBLE) && diff --git a/modules/ocl/src/opencl/convertC3C4.cl b/modules/ocl/src/opencl/convertC3C4.cl index 3e61827691..1908f92a2a 100644 --- a/modules/ocl/src/opencl/convertC3C4.cl +++ b/modules/ocl/src/opencl/convertC3C4.cl @@ -32,23 +32,23 @@ // the use of this software, even if advised of the possibility of such damage. // // -//#pragma OPENCL EXTENSION cl_amd_printf : enable + #if defined (DOUBLE_SUPPORT) #pragma OPENCL EXTENSION cl_khr_fp64:enable #endif + __kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst, int cols, int rows, int dstStep_in_piexl,int pixel_end) { int id = get_global_id(0); - //int pixel_end = mul24(cols -1 , rows -1); int3 pixelid = (int3)(mul24(id,3),mad24(id,3,1),mad24(id,3,2)); pixelid = clamp(pixelid,0,pixel_end); GENTYPE4 pixel0, pixel1, pixel2, outpix0,outpix1,outpix2,outpix3; + pixel0 = src[pixelid.x]; pixel1 = src[pixelid.y]; pixel2 = src[pixelid.z]; - outpix0 = (GENTYPE4)(pixel0.x,pixel0.y,pixel0.z,0); outpix1 = (GENTYPE4)(pixel0.w,pixel1.x,pixel1.y,0); outpix2 = (GENTYPE4)(pixel1.z,pixel1.w,pixel2.x,0); @@ -56,17 +56,19 @@ __kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTY int4 outy = (id<<2)/cols; int4 outx = (id<<2)%cols; - outx.y++; - outx.z+=2; - outx.w+=3; - outy = select(outy,outy+1,outx>=cols); - outx = select(outx,outx-cols,outx>=cols); - //outpix3 = select(outpix3, outpix0, (uchar4)(outy.w>=rows)); - //outpix2 = select(outpix2, outpix0, (uchar4)(outy.z>=rows)); - //outpix1 = select(outpix1, outpix0, (uchar4)(outy.y>=rows)); - //outx = select(outx,(int4)outx.x,outy>=rows); - //outy = select(outy,(int4)outy.x,outy>=rows); + + outx += (int4)(0, 1, 2, 3); + outy = select(outy, outy+1, outx>=cols); + outx = select(outx, outx-cols, outx>=cols); + + // when cols == 1 + outy = select(outy, outy + 1, outx >= cols); + outx = select(outx, outx-cols, outx >= cols); + outy = select(outy, outy + 1, outx >= cols); + outx = select(outx, outx-cols, outx >= cols); + int4 addr = mad24(outy,(int4)dstStep_in_piexl,outx); + if(outx.w=(int4)cols); - y4=clamp(y4,(int4)0,(int4)(rows-1)); x4 = select(x4,x4-(int4)cols,x4>=(int4)cols); - int4 addr = mad24(y4,(int4)srcStep_in_pixel,x4); + + // when cols == 1 + y4 = select(y4, y4 + 1,x4>=(int4)cols); + x4 = select(x4, x4 - (int4)cols,x4>=(int4)cols); + y4 = select(y4, y4 + 1,x4>=(int4)cols); + x4 = select(x4, x4-(int4)cols,x4>=(int4)cols); + + y4=clamp(y4,(int4)0,(int4)(rows-1)); + int4 addr = mad24(y4, (int4)srcStep_in_pixel, x4); + GENTYPE4 pixel0,pixel1,pixel2,pixel3, outpixel1, outpixel2; pixel0 = src[addr.x]; pixel1 = src[addr.y]; @@ -120,9 +128,11 @@ __kernel void convertC4C3(__global const GENTYPE4 * restrict src, __global GENTY outpixel2.y = pixel3.x; outpixel2.z = pixel3.y; outpixel2.w = pixel3.z; + int4 outaddr = mul24(id>>2 , 3); outaddr.y++; outaddr.z+=2; + if(outaddr.z <= pixel_end) { dst[outaddr.x] = pixel0; diff --git a/modules/ocl/test/test_matrix_operation.cpp b/modules/ocl/test/test_matrix_operation.cpp index d1d24689b5..46e077a6bb 100644 --- a/modules/ocl/test/test_matrix_operation.cpp +++ b/modules/ocl/test/test_matrix_operation.cpp @@ -402,7 +402,7 @@ PARAM_TEST_CASE(convertC3C4, MatType, bool) int type = CV_MAKE_TYPE(depth, 3); cv::RNG &rng = TS::ptr()->get_rng(); - src = randomMat(rng, randomSize(MIN_VALUE, MAX_VALUE), type, 0, 40, false); + src = randomMat(rng, randomSize(1, MAX_VALUE), type, 0, 40, false); } void random_roi() From 1aaeb52a4cadd0d66e2d432575a77365ed57158e Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Tue, 1 Oct 2013 12:47:03 +0400 Subject: [PATCH 21/39] Android NDK r9 support. x64 NDK support fix for MacOS X. --- modules/imgproc/src/floodfill.cpp | 4 ++++ modules/imgproc/test/test_convhull.cpp | 14 ++++++++++++++ modules/legacy/src/bgfg_gaussmix.cpp | 9 +++++++++ modules/legacy/src/lmeds.cpp | 10 ++++++++++ platforms/android/android.toolchain.cmake | 6 ++++-- 5 files changed, 41 insertions(+), 2 deletions(-) diff --git a/modules/imgproc/src/floodfill.cpp b/modules/imgproc/src/floodfill.cpp index ada6dcce36..74047676e4 100644 --- a/modules/imgproc/src/floodfill.cpp +++ b/modules/imgproc/src/floodfill.cpp @@ -41,6 +41,10 @@ #include "precomp.hpp" +#if (__GNUC__ == 4) && (__GNUC_MINOR__ == 8) +# pragma GCC diagnostic ignored "-Warray-bounds" +#endif + typedef struct CvFFillSegment { ushort y; diff --git a/modules/imgproc/test/test_convhull.cpp b/modules/imgproc/test/test_convhull.cpp index cae75d0cb0..64fe7f6370 100644 --- a/modules/imgproc/test/test_convhull.cpp +++ b/modules/imgproc/test/test_convhull.cpp @@ -1225,6 +1225,10 @@ CV_FitLineTest::CV_FitLineTest() max_noise = 0.05; } +#if (__GNUC__ == 4) && (__GNUC_MINOR__ == 8) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Warray-bounds" +#endif void CV_FitLineTest::generate_point_set( void* pointsSet ) { @@ -1297,6 +1301,9 @@ void CV_FitLineTest::generate_point_set( void* pointsSet ) } } +#if (__GNUC__ == 4) && (__GNUC_MINOR__ == 8) +# pragma GCC diagnostic pop +#endif int CV_FitLineTest::prepare_test_case( int test_case_idx ) { @@ -1322,6 +1329,10 @@ void CV_FitLineTest::run_func() cv::fitLine(cv::cvarrToMat(points), (cv::Vec6f&)line[0], dist_type, 0, reps, aeps); } +#if (__GNUC__ == 4) && (__GNUC_MINOR__ == 8) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Warray-bounds" +#endif int CV_FitLineTest::validate_test_results( int test_case_idx ) { @@ -1401,6 +1412,9 @@ _exit_: return code; } +#if (__GNUC__ == 4) && (__GNUC_MINOR__ == 8) +# pragma GCC diagnostic pop +#endif /****************************************************************************************\ * ContourMoments Test * diff --git a/modules/legacy/src/bgfg_gaussmix.cpp b/modules/legacy/src/bgfg_gaussmix.cpp index 6c6839e4a2..3cb7a5af9f 100644 --- a/modules/legacy/src/bgfg_gaussmix.cpp +++ b/modules/legacy/src/bgfg_gaussmix.cpp @@ -415,6 +415,11 @@ CV_INLINE int _icvRemoveShadowGMM(float* data, int nD, //IEEE Trans. on Pattern Analysis and Machine Intelligence, vol.26, no.5, pages 651-656, 2004 //http://www.zoranz.net/Publications/zivkovic2004PAMI.pdf +#if (__GNUC__ == 4) && (__GNUC_MINOR__ == 8) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif + CV_INLINE int _icvUpdateGMM(float* data, int nD, unsigned char* pModesUsed, CvPBGMMGaussian* pGMM, @@ -603,6 +608,10 @@ CV_INLINE int _icvUpdateGMM(float* data, int nD, return bBackground; } +#if (__GNUC__ == 4) && (__GNUC_MINOR__ == 8) +# pragma GCC diagnostic pop +#endif + // a bit more efficient implementation for common case of 3 channel (rgb) images CV_INLINE int _icvUpdateGMM_C3(float r,float g, float b, unsigned char* pModesUsed, diff --git a/modules/legacy/src/lmeds.cpp b/modules/legacy/src/lmeds.cpp index 7794640769..33b57a7597 100644 --- a/modules/legacy/src/lmeds.cpp +++ b/modules/legacy/src/lmeds.cpp @@ -162,6 +162,12 @@ icvLMedS( int *points1, int *points2, int numPoints, CvMatrix3 * fundamentalMatr /*===========================================================================*/ /*===========================================================================*/ + +#if (__GNUC__ == 4) && (__GNUC_MINOR__ == 8) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Warray-bounds" +#endif + void icvChoose7( int *ml, int *mr, int num, int *ml7, int *mr7 ) { @@ -322,6 +328,10 @@ icvCubic( double a2, double a1, double a0, double *squares ) return CV_NO_ERR; } /* icvCubic */ +#if (__GNUC__ == 4) && (__GNUC_MINOR__ == 8) +# pragma GCC diagnostic pop +#endif + /*======================================================================================*/ double icvDet( double *M ) diff --git a/platforms/android/android.toolchain.cmake b/platforms/android/android.toolchain.cmake index d7f09c7888..bee73dbea1 100644 --- a/platforms/android/android.toolchain.cmake +++ b/platforms/android/android.toolchain.cmake @@ -318,7 +318,7 @@ set( CMAKE_SYSTEM_VERSION 1 ) # rpath makes low sence for Android set( CMAKE_SKIP_RPATH TRUE CACHE BOOL "If set, runtime paths are not added when using shared libraries." ) -set( ANDROID_SUPPORTED_NDK_VERSIONS ${ANDROID_EXTRA_NDK_VERSIONS} -r8e -r8d -r8c -r8b -r8 -r7c -r7b -r7 -r6b -r6 -r5c -r5b -r5 "" ) +set( ANDROID_SUPPORTED_NDK_VERSIONS ${ANDROID_EXTRA_NDK_VERSIONS} -r9 -r8e -r8d -r8c -r8b -r8 -r7c -r7b -r7 -r6b -r6 -r5c -r5b -r5 "" ) if(NOT DEFINED ANDROID_NDK_SEARCH_PATHS) if( CMAKE_HOST_WIN32 ) file( TO_CMAKE_PATH "$ENV{PROGRAMFILES}" ANDROID_NDK_SEARCH_PATHS ) @@ -484,7 +484,9 @@ else() message( FATAL_ERROR "Cross-compilation on your platform is not supported by this cmake toolchain" ) endif() -if( NOT ANDROID_NDK_HOST_X64 ) +# CMAKE_HOST_SYSTEM_PROCESSOR on MacOS X always says i386 on Intel platform +# So we do not trust ANDROID_NDK_HOST_X64 on Apple hosts +if( NOT ANDROID_NDK_HOST_X64 AND NOT CMAKE_HOST_APPLE) set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) endif() From fa9d092c435c109b8460674f36c0a3fc4b80b7f0 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Thu, 3 Oct 2013 12:04:57 +0400 Subject: [PATCH 22/39] Fix a -Wparentheses-equality Clang warning in cap_libv4l.cpp. --- modules/highgui/src/cap_libv4l.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/modules/highgui/src/cap_libv4l.cpp b/modules/highgui/src/cap_libv4l.cpp index 3fd6dd59ad..91047de1f1 100644 --- a/modules/highgui/src/cap_libv4l.cpp +++ b/modules/highgui/src/cap_libv4l.cpp @@ -856,8 +856,7 @@ static int _capture_V4L (CvCaptureCAM_V4L *capture, char *deviceName) detect_v4l = try_init_v4l(capture, deviceName); - if ((detect_v4l == -1) - ) + if (detect_v4l == -1) { fprintf (stderr, "HIGHGUI ERROR: V4L" ": device %s: Unable to open for READ ONLY\n", deviceName); @@ -865,8 +864,7 @@ static int _capture_V4L (CvCaptureCAM_V4L *capture, char *deviceName) return -1; } - if ((detect_v4l <= 0) - ) + if (detect_v4l <= 0) { fprintf (stderr, "HIGHGUI ERROR: V4L" ": device %s: Unable to query number of channels\n", deviceName); From dafd4da03117f8a2d6027ba710d5dc475350f106 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Thu, 3 Oct 2013 11:36:49 +0400 Subject: [PATCH 23/39] removed ocl::minMax_buf, updated doc (operation on matrices) --- modules/ocl/doc/matrix_reductions.rst | 38 +- modules/ocl/doc/operations_on_matrices.rst | 415 +++++++++++---------- modules/ocl/include/opencv2/ocl/ocl.hpp | 11 +- modules/ocl/src/arithm.cpp | 23 +- modules/ocl/src/gftt.cpp | 2 +- 5 files changed, 260 insertions(+), 229 deletions(-) diff --git a/modules/ocl/doc/matrix_reductions.rst b/modules/ocl/doc/matrix_reductions.rst index 350f861032..4bedb944f9 100644 --- a/modules/ocl/doc/matrix_reductions.rst +++ b/modules/ocl/doc/matrix_reductions.rst @@ -3,6 +3,16 @@ Matrix Reductions .. highlight:: cpp +ocl::absSum +--------------- +Returns the sum of absolute values for matrix elements. + +.. ocv:function:: Scalar ocl::absSum(const oclMat &m) + + :param m: The Source image of all depth. + +Counts the abs sum of matrix elements for each channel. Supports all data types. + ocl::countNonZero --------------------- Returns the number of non-zero elements in src @@ -11,7 +21,7 @@ Returns the number of non-zero elements in src :param src: Single-channel array -Counts non-zero array elements. +Counts non-zero array elements. Supports all data types. ocl::minMax ------------------ @@ -49,32 +59,22 @@ Returns void The functions minMaxLoc find minimum and maximum element values and their positions. The extremums are searched across the whole array, or, if mask is not an empty array, in the specified array region. The functions do not work with multi-channel arrays. -ocl::Sum +ocl::sqrSum ------------------ -Returns the sum of matrix elements for each channel - -.. ocv:function:: Scalar ocl::sum(const oclMat &m) - - :param m: The Source image of all depth. - -Counts the sum of matrix elements for each channel. - -ocl::absSum ---------------- -Returns the sum of absolute values for matrix elements. +Returns the squared sum of matrix elements for each channel -.. ocv:function:: Scalar ocl::absSum(const oclMat &m) +.. ocv:function:: Scalar ocl::sqrSum(const oclMat &m) :param m: The Source image of all depth. -Counts the abs sum of matrix elements for each channel. +Counts the squared sum of matrix elements for each channel. Supports all data types. -ocl::sqrSum +ocl::sum ------------------ -Returns the squared sum of matrix elements for each channel +Returns the sum of matrix elements for each channel -.. ocv:function:: Scalar ocl::sqrSum(const oclMat &m) +.. ocv:function:: Scalar ocl::sum(const oclMat &m) :param m: The Source image of all depth. -Counts the squared sum of matrix elements for each channel. +Counts the sum of matrix elements for each channel. diff --git a/modules/ocl/doc/operations_on_matrices.rst b/modules/ocl/doc/operations_on_matrices.rst index 7eaaf0d81b..24a4ea1dc5 100644 --- a/modules/ocl/doc/operations_on_matrices.rst +++ b/modules/ocl/doc/operations_on_matrices.rst @@ -3,46 +3,6 @@ Operations on Matrics .. highlight:: cpp -ocl::oclMat::convertTo --------------------------- -Returns void - -.. ocv:function:: void ocl::oclMat::convertTo(oclMat &m, int rtype, double alpha = 1, double beta = 0) const - - :param m: the destination matrix. If it does not have a proper size or type before the operation, it will be reallocated. - - :param rtype: the desired destination matrix type, or rather, the depth (since the number of channels will be the same with the source one). If rtype is negative, the destination matrix will have the same type as the source. - - :param alpha: optional scale factor. - - :param beta: optional delta added to the scaled values. - -The method converts source pixel values to the target datatype. Saturate cast is applied in the end to avoid possible overflows. Supports all data types. - -ocl::oclMat::copyTo ------------------------ -Returns void - -.. ocv:function:: void ocl::oclMat::copyTo(oclMat &m, const oclMat &mask = oclMat()) const - - :param m: The destination matrix. If it does not have a proper size or type before the operation, it will be reallocated. - - :param mask: The operation mask. Its non-zero elements indicate, which matrix elements need to be copied. - -Copies the matrix to another one. Supports all data types. - -ocl::oclMat::setTo ----------------------- -Returns oclMat - -.. ocv:function:: oclMat& ocl::oclMat::setTo(const Scalar &s, const oclMat &mask = oclMat()) - - :param s: Assigned scalar, which is converted to the actual array type. - - :param mask: The operation mask of the same size as ``*this`` and type ``CV_8UC1``. - -Sets all or some of the array elements to the specified value. This is the advanced variant of Mat::operator=(const Scalar s) operator. Supports all data types. - ocl::absdiff ------------------ Returns void @@ -109,13 +69,13 @@ where ``I`` is a multi-dimensional index of array elements. In case of multi-cha .. seealso:: :ocv:func:`addWeighted` -ocl::subtract +ocl::bitwise_and ------------------ Returns void -.. ocv:function:: void ocl::subtract(const oclMat& src1, const oclMat& src2, oclMat& dst, const oclMat& mask = oclMat()) +.. ocv:function:: void ocl::bitwise_and(const oclMat& src1, const oclMat& src2, oclMat& dst, const oclMat& mask = oclMat()) -.. ocv:function:: void ocl::subtract(const oclMat& src1, const Scalar& s, oclMat& dst, const oclMat& mask = oclMat()) +.. ocv:function:: void ocl::bitwise_and(const oclMat& src1, const Scalar& s, oclMat& dst, const oclMat& mask = oclMat()) :param src1: the first input array. @@ -127,61 +87,19 @@ Returns void :param mask: the optional operation mask, 8-bit single channel array; specifies elements of the destination array to be changed. -Computes per-element subtract between two arrays or between array and a scalar. Supports all data types. - -ocl::multiply ------------------- -Returns void - -.. ocv:function:: void ocl::multiply(const oclMat& src1, const oclMat& src2, oclMat& dst, double scale = 1) - - :param src1: the first input array. - - :param src2: the second input array, must be the same size and same type as ``src1``. - - :param dst: the destination array, it will have the same size and same type as ``src1``. - - :param scale: optional scale factor. - -Computes per-element multiply between two arrays or between array and a scalar. Supports all data types. - -ocl::divide ------------------- -Returns void - -.. ocv:function:: void ocl::divide(const oclMat& src1, const oclMat& src2, oclMat& dst, double scale = 1) - -.. ocv:function:: void ocl::divide(double scale, const oclMat& src1, oclMat& dst) - - :param src1: the first input array. - - :param src2: the second input array, must be the same size and same type as ``src1``. - - :param dst: the destination array, it will have the same size and same type as ``src1``. - - :param scale: scalar factor. - -Computes per-element divide between two arrays or between array and a scalar. Supports all data types. +Computes per-element bitwise_and between two arrays or between array and a scalar. Supports all data types. -ocl::bitwise_and +ocl::bitwise_not ------------------ Returns void -.. ocv:function:: void ocl::bitwise_and(const oclMat& src1, const oclMat& src2, oclMat& dst, const oclMat& mask = oclMat()) - -.. ocv:function:: void ocl::bitwise_and(const oclMat& src1, const Scalar& s, oclMat& dst, const oclMat& mask = oclMat()) - - :param src1: the first input array. - - :param src2: the second input array, must be the same size and same type as ``src1``. - - :param s: scalar, the second input parameter. +.. ocv:function:: void ocl::bitwise_not(const oclMat &src, oclMat &dst) - :param dst: the destination array, it will have the same size and same type as ``src1``. + :param src: the input array. - :param mask: the optional operation mask, 8-bit single channel array; specifies elements of the destination array to be changed. + :param dst: the destination array, it will have the same size and same type as ``src``. -Computes per-element bitwise_and between two arrays or between array and a scalar. Supports all data types. +The functions bitwise not compute per-element bit-wise inversion of the source array. Supports all data types. ocl::bitwise_or ------------------ @@ -223,18 +141,6 @@ Returns void Computes per-element bitwise_xor between two arrays or between array and a scalar. Supports all data types. -ocl::bitwise_not ------------------- -Returns void - -.. ocv:function:: void ocl::bitwise_not(const oclMat &src, oclMat &dst) - - :param src: the input array. - - :param dst: the destination array, it will have the same size and same type as ``src``. - -The functions bitwise not compute per-element bit-wise inversion of the source array. Supports all data types. - ocl::cartToPolar ------------------ Returns void @@ -253,39 +159,67 @@ Returns void Calculates the magnitude and angle of 2D vectors. Supports only ``CV_32F`` and ``CV_64F`` data types. -ocl::polarToCart +ocl::compare ------------------ Returns void -.. ocv:function:: void ocl::polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees = false) +.. ocv:function:: void ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst, int cmpop) - :param magnitude: the source floating-point array of magnitudes of 2D vectors. It can be an empty matrix (=Mat()) - in this case the function assumes that all the magnitudes are = 1. If it's not empty, it must have the same size and same type as ``angle``. + :param src1: the first source array. - :param angle: the source floating-point array of angles of the 2D vectors. + :param src2: the second source array; must have the same size and same type as ``src1``. - :param x: the destination array of x-coordinates of 2D vectors; will have the same size and the same type as ``angle``. + :param dst: the destination array; will have the same size as ``src1`` and type ``CV_8UC1``. - :param y: the destination array of y-coordinates of 2D vectors; will have the same size and the same type as ``angle``. + :param cmpop: the flag specifying the relation between the elements to be checked. - :param angleInDegrees: the flag indicating whether the angles are measured in radians, which is default mode, or in degrees. +Performs per-element comparison of two arrays or an array and scalar value. Supports all data types. -The function polarToCart computes the cartesian coordinates of each 2D vector represented by the corresponding elements of magnitude and angle. Supports only ``CV_32F`` and ``CV_64F`` data types. +ocl::dft +------------ +Performs a forward or inverse discrete Fourier transform (1D or 2D) of the floating point matrix. -ocl::compare +.. ocv:function:: void ocl::dft(const oclMat& src, oclMat& dst, Size dft_size = Size(), int flags = 0) + + :param src: source matrix (real or complex). + + :param dst: destination matrix (real or complex). + + :param dft_size: size of original input, which is used for transformation from complex to real. + + :param flags: optional flags: + + * **DFT_ROWS** transforms each individual row of the source matrix. + + * **DFT_COMPLEX_OUTPUT** performs a forward transformation of 1D or 2D real array. The result, though being a complex array, has complex-conjugate symmetry (*CCS*, see the function description below for details). Such an array can be packed into a real array of the same size as input, which is the fastest option and which is what the function does by default. However, you may wish to get a full complex array (for simpler spectrum analysis, and so on). Pass the flag to enable the function to produce a full-size complex output array. + + * **DFT_INVERSE** inverts DFT. Use for complex-complex cases (real-complex and complex-real cases are always forward and inverse, respectively). + + * **DFT_REAL_OUTPUT** specifies the output as real. The source matrix is the result of real-complex transform, so the destination matrix must be real. + +Use to handle real matrices (``CV_32FC1``) and complex matrices in the interleaved format (``CV_32FC2``). + +The ``dft_size`` must be powers of ``2``, ``3`` and ``5``. Real to complex dft output is not the same with cpu version. Real to complex and complex to real does not support ``DFT_ROWS``. + +.. seealso:: :ocv:func:`dft` + +ocl::divide ------------------ Returns void -.. ocv:function:: void ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst, int cmpop) +.. ocv:function:: void ocl::divide(const oclMat& src1, const oclMat& src2, oclMat& dst, double scale = 1) - :param src1: the first source array. +.. ocv:function:: void ocl::divide(double scale, const oclMat& src1, oclMat& dst) - :param src2: the second source array; must have the same size and same type as ``src1``. + :param src1: the first input array. - :param dst: the destination array; will have the same size as ``src1`` and type ``CV_8UC1``. + :param src2: the second input array, must be the same size and same type as ``src1``. - :param cmpop: the flag specifying the relation between the elements to be checked. + :param dst: the destination array, it will have the same size and same type as ``src1``. -Performs per-element comparison of two arrays or an array and scalar value. Supports all data types. + :param scale: scalar factor. + +Computes per-element divide between two arrays or between array and a scalar. Supports all data types. ocl::exp ------------------ @@ -299,6 +233,45 @@ Returns void The function exp calculates the exponent of every element of the input array. Supports only ``CV_32FC1`` and ``CV_64F`` data types. +ocl::flip +------------------ +Returns void + +.. ocv:function:: void ocl::flip(const oclMat& src, oclMat& dst, int flipCode) + + :param src: source image. + + :param dst: destination image. + + :param flipCode: specifies how to flip the array: 0 means flipping around the x-axis, positive (e.g., 1) means flipping around y-axis, and negative (e.g., -1) means flipping around both axes. + +The function flip flips the array in one of three different ways (row and column indices are 0-based). Supports all data types. + +ocl::gemm +------------------ +Performs generalized matrix multiplication. + +.. ocv:function:: void ocl::gemm(const oclMat& src1, const oclMat& src2, double alpha, const oclMat& src3, double beta, oclMat& dst, int flags = 0) + + :param src1: first multiplied input matrix that should be ``CV_32FC1`` type. + + :param src2: second multiplied input matrix of the same type as ``src1``. + + :param alpha: weight of the matrix product. + + :param src3: third optional delta matrix added to the matrix product. It should have the same type as ``src1`` and ``src2``. + + :param beta: weight of ``src3``. + + :param dst: destination matrix. It has the proper size and the same type as input matrices. + + :param flags: operation flags: + + * **GEMM_1_T** transpose ``src1``. + * **GEMM_2_T** transpose ``src2``. + +.. seealso:: :ocv:func:`gemm` + ocl::log ------------------ Returns void @@ -339,20 +312,6 @@ Returns void The function magnitude calculates magnitude of 2D vectors formed from the corresponding elements of ``x`` and ``y`` arrays. Supports only ``CV_32F`` and ``CV_64F`` data types. -ocl::flip ------------------- -Returns void - -.. ocv:function:: void ocl::flip(const oclMat& src, oclMat& dst, int flipCode) - - :param src: source image. - - :param dst: destination image. - - :param flipCode: specifies how to flip the array: 0 means flipping around the x-axis, positive (e.g., 1) means flipping around y-axis, and negative (e.g., -1) means flipping around both axes. - -The function flip flips the array in one of three different ways (row and column indices are 0-based). Supports all data types. - ocl::meanStdDev ------------------ Returns void @@ -365,7 +324,7 @@ Returns void :param stddev: the output parameter: computed standard deviation. -The functions meanStdDev compute the mean and the standard deviation M of array elements, independently for each channel, and return it via the output parameters. Supports all data types except ``CV_32F``, ``CV_64F``. +The functions meanStdDev compute the mean and the standard deviation M of array elements, independently for each channel, and return it via the output parameters. Supports all data types. ocl::merge ------------------ @@ -379,17 +338,21 @@ Returns void Composes a multi-channel array from several single-channel arrays. Supports all data types. -ocl::split +ocl::multiply ------------------ Returns void -.. ocv:function:: void ocl::split(const oclMat &src, vector &dst) +.. ocv:function:: void ocl::multiply(const oclMat& src1, const oclMat& src2, oclMat& dst, double scale = 1) - :param src: The source multi-channel array + :param src1: the first input array. - :param dst: The destination array or vector of arrays; The number of arrays must match src.channels(). The arrays themselves will be reallocated if needed + :param src2: the second input array, must be the same size and same type as ``src1``. -The functions split split multi-channel array into separate single-channel arrays. Supports all data types. + :param dst: the destination array, it will have the same size and same type as ``src1``. + + :param scale: optional scale factor. + +Computes per-element multiply between two arrays or between array and a scalar. Supports all data types. ocl::norm ------------------ @@ -405,103 +368,133 @@ Returns the calculated norm :param normType: type of the norm. -Calculates absolute array norm, absolute difference norm, or relative difference norm. Supports only ``CV_8UC1`` data type. +The functions ``norm`` calculate an absolute norm of ``src1`` (when there is no ``src2`` ): -ocl::phase ------------------- -Returns void +.. math:: -.. ocv:function:: void ocl::phase(const oclMat &x, const oclMat &y, oclMat &angle, bool angleInDegrees = false) + norm = \forkthree{\|\texttt{src1}\|_{L_{\infty}} = \max _I | \texttt{src1} (I)|}{if $\texttt{normType} = \texttt{NORM\_INF}$ } + { \| \texttt{src1} \| _{L_1} = \sum _I | \texttt{src1} (I)|}{if $\texttt{normType} = \texttt{NORM\_L1}$ } + { \| \texttt{src1} \| _{L_2} = \sqrt{\sum_I \texttt{src1}(I)^2} }{if $\texttt{normType} = \texttt{NORM\_L2}$ } - :param x: the source floating-point array of x-coordinates of 2D vectors +or an absolute or relative difference norm if ``src2`` is there: - :param y: the source array of y-coordinates of 2D vectors; must have the same size and the same type as ``x``. +.. math:: - :param angle: the destination array of vector angles; it will have the same size and same type as ``x``. + norm = \forkthree{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} = \max _I | \texttt{src1} (I) - \texttt{src2} (I)|}{if $\texttt{normType} = \texttt{NORM\_INF}$ } + { \| \texttt{src1} - \texttt{src2} \| _{L_1} = \sum _I | \texttt{src1} (I) - \texttt{src2} (I)|}{if $\texttt{normType} = \texttt{NORM\_L1}$ } + { \| \texttt{src1} - \texttt{src2} \| _{L_2} = \sqrt{\sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2} }{if $\texttt{normType} = \texttt{NORM\_L2}$ } - :param angleInDegrees: when it is true, the function will compute angle in degrees, otherwise they will be measured in radians. +or -The function phase computes the rotation angle of each 2D vector that is formed from the corresponding elements of ``x`` and ``y``. Supports only ``CV_32FC1`` and ``CV_64FC1`` data type. +.. math:: -ocl::pow ------------------- + norm = \forkthree{\frac{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} }{\|\texttt{src2}\|_{L_{\infty}} }}{if $\texttt{normType} = \texttt{NORM\_RELATIVE\_INF}$ } + { \frac{\|\texttt{src1}-\texttt{src2}\|_{L_1} }{\|\texttt{src2}\|_{L_1}} }{if $\texttt{normType} = \texttt{NORM\_RELATIVE\_L1}$ } + { \frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}} }{if $\texttt{normType} = \texttt{NORM\_RELATIVE\_L2}$ } + +The functions ``norm`` return the calculated norm. + +A multi-channel input arrays are treated as a single-channel, that is, the results for all channels are combined. + +ocl::oclMat::convertTo +-------------------------- Returns void -.. ocv:function:: void ocl::pow(const oclMat &x, double p, oclMat &y) +.. ocv:function:: void ocl::oclMat::convertTo(oclMat &m, int rtype, double alpha = 1, double beta = 0) const - :param x: the source array. + :param m: the destination matrix. If it does not have a proper size or type before the operation, it will be reallocated. - :param p: the exponent of power; the source floating-point array of angles of the 2D vectors. + :param rtype: the desired destination matrix type, or rather, the depth (since the number of channels will be the same with the source one). If rtype is negative, the destination matrix will have the same type as the source. - :param y: the destination array, should be the same type as the source. + :param alpha: optional scale factor. -The function pow raises every element of the input array to ``p``. Supports only ``CV_32FC1`` and ``CV_64FC1`` data types. + :param beta: optional delta added to the scaled values. -ocl::transpose ------------------- +The method converts source pixel values to the target datatype. Saturate cast is applied in the end to avoid possible overflows. Supports all data types. + +ocl::oclMat::copyTo +----------------------- Returns void -.. ocv:function:: void ocl::transpose(const oclMat &src, oclMat &dst) +.. ocv:function:: void ocl::oclMat::copyTo(oclMat &m, const oclMat &mask = oclMat()) const - :param src: the source array. + :param m: The destination matrix. If it does not have a proper size or type before the operation, it will be reallocated. - :param dst: the destination array of the same type as ``src``. + :param mask: The operation mask. Its non-zero elements indicate, which matrix elements need to be copied. -Transposes a matrix (in case when ``src`` == ``dst`` and matrix is square the operation are performed inplace) +Copies the matrix to another one. Supports all data types. +ocl::oclMat::setTo +---------------------- +Returns oclMat -ocl::dft ------------- -Performs a forward or inverse discrete Fourier transform (1D or 2D) of the floating point matrix. +.. ocv:function:: oclMat& ocl::oclMat::setTo(const Scalar &s, const oclMat &mask = oclMat()) -.. ocv:function:: void ocl::dft(const oclMat& src, oclMat& dst, Size dft_size = Size(), int flags = 0) + :param s: Assigned scalar, which is converted to the actual array type. - :param src: source matrix (real or complex). + :param mask: The operation mask of the same size as ``*this`` and type ``CV_8UC1``. - :param dst: destination matrix (real or complex). +Sets all or some of the array elements to the specified value. This is the advanced variant of Mat::operator=(const Scalar s) operator. Supports all data types. - :param dft_size: size of original input, which is used for transformation from complex to real. +ocl::phase +------------------ +Returns void - :param flags: optional flags: +.. ocv:function:: void ocl::phase(const oclMat &x, const oclMat &y, oclMat &angle, bool angleInDegrees = false) - * **DFT_ROWS** transforms each individual row of the source matrix. + :param x: the source floating-point array of x-coordinates of 2D vectors - * **DFT_COMPLEX_OUTPUT** performs a forward transformation of 1D or 2D real array. The result, though being a complex array, has complex-conjugate symmetry (*CCS*, see the function description below for details). Such an array can be packed into a real array of the same size as input, which is the fastest option and which is what the function does by default. However, you may wish to get a full complex array (for simpler spectrum analysis, and so on). Pass the flag to enable the function to produce a full-size complex output array. + :param y: the source array of y-coordinates of 2D vectors; must have the same size and the same type as ``x``. - * **DFT_INVERSE** inverts DFT. Use for complex-complex cases (real-complex and complex-real cases are always forward and inverse, respectively). + :param angle: the destination array of vector angles; it will have the same size and same type as ``x``. - * **DFT_REAL_OUTPUT** specifies the output as real. The source matrix is the result of real-complex transform, so the destination matrix must be real. + :param angleInDegrees: when it is true, the function will compute angle in degrees, otherwise they will be measured in radians. -Use to handle real matrices (``CV_32FC1``) and complex matrices in the interleaved format (``CV_32FC2``). +The function phase computes the rotation angle of each 2D vector that is formed from the corresponding elements of ``x`` and ``y``. Supports only ``CV_32FC1`` and ``CV_64FC1`` data type. -The ``dft_size`` must be powers of ``2``, ``3`` and ``5``. Real to complex dft output is not the same with cpu version. Real to complex and complex to real does not support ``DFT_ROWS``. +ocl::polarToCart +------------------ +Returns void -.. seealso:: :ocv:func:`dft` +.. ocv:function:: void ocl::polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees = false) -ocl::gemm + :param magnitude: the source floating-point array of magnitudes of 2D vectors. It can be an empty matrix (=Mat()) - in this case the function assumes that all the magnitudes are = 1. If it's not empty, it must have the same size and same type as ``angle``. + + :param angle: the source floating-point array of angles of the 2D vectors. + + :param x: the destination array of x-coordinates of 2D vectors; will have the same size and the same type as ``angle``. + + :param y: the destination array of y-coordinates of 2D vectors; will have the same size and the same type as ``angle``. + + :param angleInDegrees: the flag indicating whether the angles are measured in radians, which is default mode, or in degrees. + +The function polarToCart computes the cartesian coordinates of each 2D vector represented by the corresponding elements of magnitude and angle. Supports only ``CV_32F`` and ``CV_64F`` data types. + +ocl::pow ------------------ -Performs generalized matrix multiplication. +Returns void -.. ocv:function:: void ocl::gemm(const oclMat& src1, const oclMat& src2, double alpha, const oclMat& src3, double beta, oclMat& dst, int flags = 0) +.. ocv:function:: void ocl::pow(const oclMat &x, double p, oclMat &y) - :param src1: first multiplied input matrix that should be ``CV_32FC1`` type. + :param x: the source array. - :param src2: second multiplied input matrix of the same type as ``src1``. + :param p: the exponent of power; the source floating-point array of angles of the 2D vectors. - :param alpha: weight of the matrix product. + :param y: the destination array, should be the same type as the source. - :param src3: third optional delta matrix added to the matrix product. It should have the same type as ``src1`` and ``src2``. +The function pow raises every element of the input array to ``p``. Supports only ``CV_32FC1`` and ``CV_64FC1`` data types. - :param beta: weight of ``src3``. +ocl::setIdentity +------------------ +Returns void - :param dst: destination matrix. It has the proper size and the same type as input matrices. +.. ocv:function:: void ocl::setIdentity(oclMat& src, const Scalar & val = Scalar(1)) - :param flags: operation flags: + :param src: matrix to initialize (not necessarily square). - * **GEMM_1_T** transpose ``src1``. - * **GEMM_2_T** transpose ``src2``. + :param val: value to assign to diagonal elements. -.. seealso:: :ocv:func:`gemm` +The function initializes a scaled identity matrix. ocl::sortByKey ------------------ @@ -539,3 +532,47 @@ Example:: output - keys = {1, 2, 3} (CV_8UC1) values = {6,2, 10,5, 4,3} (CV_8UC2) + +ocl::split +------------------ +Returns void + +.. ocv:function:: void ocl::split(const oclMat &src, vector &dst) + + :param src: The source multi-channel array + + :param dst: The destination array or vector of arrays; The number of arrays must match src.channels(). The arrays themselves will be reallocated if needed. + +The functions split split multi-channel array into separate single-channel arrays. Supports all data types. + +ocl::subtract +------------------ +Returns void + +.. ocv:function:: void ocl::subtract(const oclMat& src1, const oclMat& src2, oclMat& dst, const oclMat& mask = oclMat()) + +.. ocv:function:: void ocl::subtract(const oclMat& src1, const Scalar& s, oclMat& dst, const oclMat& mask = oclMat()) + + :param src1: the first input array. + + :param src2: the second input array, must be the same size and same type as ``src1``. + + :param s: scalar, the second input parameter. + + :param dst: the destination array, it will have the same size and same type as ``src1``. + + :param mask: the optional operation mask, 8-bit single channel array; specifies elements of the destination array to be changed. + +Computes per-element subtract between two arrays or between array and a scalar. Supports all data types. + +ocl::transpose +------------------ +Returns void + +.. ocv:function:: void ocl::transpose(const oclMat &src, oclMat &dst) + + :param src: the source array. + + :param dst: the destination array of the same type as ``src``. + +Transposes a matrix (in case when ``src`` == ``dst`` and matrix is square the operation are performed inplace). diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index c296f57a3d..c08148fe34 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -410,6 +410,7 @@ namespace cv ////////////////////////////// Arithmetics /////////////////////////////////// //! adds one matrix to another with scale (dst = src1 * alpha + src2 * beta + gama) + // supports all data types CV_EXPORTS void addWeighted(const oclMat &src1, double alpha, const oclMat &src2, double beta, double gama, oclMat &dst); //! adds one matrix to another (dst = src1 + src2) @@ -456,17 +457,17 @@ namespace cv CV_EXPORTS void absdiff(const oclMat &src1, const Scalar &s, oclMat &dst); //! computes mean value and standard deviation of all or selected array elements - // supports except CV_32F,CV_64F + // supports all data types CV_EXPORTS void meanStdDev(const oclMat &mtx, Scalar &mean, Scalar &stddev); //! computes norm of array // supports NORM_INF, NORM_L1, NORM_L2 - // supports only CV_8UC1 type + // supports all data types CV_EXPORTS double norm(const oclMat &src1, int normType = NORM_L2); //! computes norm of the difference between two arrays // supports NORM_INF, NORM_L1, NORM_L2 - // supports only CV_8UC1 type + // supports all data types CV_EXPORTS double norm(const oclMat &src1, const oclMat &src2, int normType = NORM_L2); //! reverses the order of the rows, columns or both in a matrix @@ -474,7 +475,6 @@ namespace cv CV_EXPORTS void flip(const oclMat &src, oclMat &dst, int flipCode); //! computes sum of array elements - // disabled until fix crash // support all types CV_EXPORTS Scalar sum(const oclMat &m); CV_EXPORTS Scalar absSum(const oclMat &m); @@ -483,7 +483,6 @@ namespace cv //! finds global minimum and maximum array elements and returns their values // support all C1 types CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat()); - CV_EXPORTS void minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat& buf); //! finds global minimum and maximum array elements and returns their values with locations // support all C1 types @@ -582,7 +581,7 @@ namespace cv // support only CV_32FC1 type CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result); - CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0); + CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code, int dcn = 0); //! initializes a scaled identity matrix CV_EXPORTS void setIdentity(oclMat& src, const Scalar & val = Scalar(1)); diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index 2a663b990a..00c0bebaf9 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -521,13 +521,13 @@ static void arithmetic_minMax_run(const oclMat &src, const oclMat & mask, cl_mem } template -void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, - const oclMat &mask, oclMat &buf) +void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask) { size_t groupnum = src.clCxt->computeUnits(); CV_Assert(groupnum != 0); int dbsize = groupnum * 2 * src.elemSize(); + oclMat buf; ensureSizeIsEnough(1, dbsize, CV_8UC1, buf); cl_mem buf_data = reinterpret_cast(buf.data); @@ -549,15 +549,9 @@ void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, } } -void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask) -{ - oclMat buf; - minMax_buf(src, minVal, maxVal, mask, buf); -} - -typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf); +typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask); -void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf) +void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask) { CV_Assert(src.channels() == 1); CV_Assert(src.size() == mask.size() || mask.empty()); @@ -571,7 +565,7 @@ void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, cons CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); } - static minMaxFunc functab[8] = + static minMaxFunc functab[] = { arithmetic_minMax, arithmetic_minMax, @@ -583,9 +577,10 @@ void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, cons 0 }; - minMaxFunc func; - func = functab[src.depth()]; - func(src, minVal, maxVal, mask, buf); + minMaxFunc func = functab[src.depth()]; + CV_Assert(func != 0); + + func(src, minVal, maxVal, mask); } ////////////////////////////////////////////////////////////////////////////// diff --git a/modules/ocl/src/gftt.cpp b/modules/ocl/src/gftt.cpp index 37ebaafa38..a928188316 100644 --- a/modules/ocl/src/gftt.cpp +++ b/modules/ocl/src/gftt.cpp @@ -221,7 +221,7 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3); double maxVal = 0; - minMax_buf(eig_, 0, &maxVal, oclMat(), minMaxbuf_); + minMax(eig_, NULL, &maxVal); ensureSizeIsEnough(1, std::max(1000, static_cast(image.size().area() * 0.05)), CV_32FC2, tmpCorners_); From e8d9ed89559ad33167d23e562b52d18bb0c9f817 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 20 Sep 2013 19:19:52 +0400 Subject: [PATCH 24/39] ocl: split initialization.cpp into 3 files: context, operations, programcache --- modules/nonfree/src/surf.ocl.cpp | 2 +- modules/ocl/include/opencv2/ocl/ocl.hpp | 148 ++- .../ocl/include/opencv2/ocl/private/util.hpp | 242 ++-- modules/ocl/perf/main.cpp | 28 +- modules/ocl/src/arithm.cpp | 122 +- modules/ocl/src/bgfg_mog.cpp | 4 +- modules/ocl/src/binarycaching.hpp | 55 +- modules/ocl/src/brute_force_matcher.cpp | 8 +- modules/ocl/src/canny.cpp | 8 +- modules/ocl/src/cl_context.cpp | 507 ++++++++ modules/ocl/src/cl_operations.cpp | 434 +++++++ modules/ocl/src/cl_programcache.cpp | 311 +++++ modules/ocl/src/error.cpp | 16 +- modules/ocl/src/fft.cpp | 44 +- modules/ocl/src/filtering.cpp | 2 +- modules/ocl/src/gemm.cpp | 2 +- modules/ocl/src/gftt.cpp | 2 +- modules/ocl/src/haar.cpp | 14 +- modules/ocl/src/hog.cpp | 14 +- modules/ocl/src/imgproc.cpp | 38 +- modules/ocl/src/initialization.cpp | 1090 ----------------- modules/ocl/src/knearest.cpp | 14 +- modules/ocl/src/matrix_operations.cpp | 13 +- modules/ocl/src/mcwutil.cpp | 24 +- modules/ocl/src/moments.cpp | 4 +- modules/ocl/src/pyrdown.cpp | 4 +- modules/ocl/src/pyrlk.cpp | 4 +- modules/ocl/src/pyrup.cpp | 4 +- modules/ocl/src/split_merge.cpp | 4 +- modules/ocl/src/stereo_csbp.cpp | 36 +- modules/ocl/src/stereobp.cpp | 5 +- modules/ocl/src/tvl1flow.cpp | 2 +- modules/ocl/test/main.cpp | 33 +- modules/superres/perf/perf_superres_ocl.cpp | 3 - modules/superres/src/btv_l1_ocl.cpp | 2 +- modules/superres/test/test_superres.cpp | 2 - 36 files changed, 1705 insertions(+), 1540 deletions(-) create mode 100644 modules/ocl/src/cl_context.cpp create mode 100644 modules/ocl/src/cl_operations.cpp create mode 100644 modules/ocl/src/cl_programcache.cpp delete mode 100644 modules/ocl/src/initialization.cpp diff --git a/modules/nonfree/src/surf.ocl.cpp b/modules/nonfree/src/surf.ocl.cpp index f8c1ad7294..59eab705d6 100644 --- a/modules/nonfree/src/surf.ocl.cpp +++ b/modules/nonfree/src/surf.ocl.cpp @@ -74,7 +74,7 @@ namespace cv } cl_kernel kernel; kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optBufPtr); - size_t wave_size = queryDeviceInfo(kernel); + size_t wave_size = queryWaveFrontSize(kernel); CV_Assert(clReleaseKernel(kernel) == CL_SUCCESS); sprintf(optBufPtr, "-D WAVE_SIZE=%d", static_cast(wave_size)); openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optBufPtr); diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index c296f57a3d..21bb607471 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -57,8 +57,7 @@ namespace cv { namespace ocl { - using std::auto_ptr; - enum + enum DeviceType { CVCL_DEVICE_TYPE_DEFAULT = (1 << 0), CVCL_DEVICE_TYPE_CPU = (1 << 1), @@ -93,77 +92,113 @@ namespace cv //return -1 if the target type is unsupported, otherwise return 0 CV_EXPORTS int setDevMemType(DevMemRW rw_type = DEVICE_MEM_R_W, DevMemType mem_type = DEVICE_MEM_DEFAULT); - //this class contains ocl runtime information - class CV_EXPORTS Info + // these classes contain OpenCL runtime information + + struct PlatformInfo; + + struct DeviceInfo { - public: - struct Impl; - Impl *impl; + int _id; // reserved, don't use it - Info(); - Info(const Info &m); - ~Info(); - void release(); - Info &operator = (const Info &m); - std::vector DeviceName; + DeviceType deviceType; + std::string deviceProfile; + std::string deviceVersion; + std::string deviceName; + std::string deviceVendor; + int deviceVendorId; + std::string deviceDriverVersion; + std::string deviceExtensions; + + size_t maxWorkGroupSize; + std::vector maxWorkItemSizes; + int maxComputeUnits; + size_t localMemorySize; + + int deviceVersionMajor; + int deviceVersionMinor; + + bool haveDoubleSupport; + bool isUnifiedMemory; // 1 means integrated GPU, otherwise this value is 0 + + std::string compilationExtraOptions; + + const PlatformInfo* platform; + + DeviceInfo(); + }; + + struct PlatformInfo + { + int _id; // reserved, don't use it + + std::string platformProfile; + std::string platformVersion; + std::string platformName; + std::string platformVendor; + std::string platformExtensons; + + int platformVersionMajor; + int platformVersionMinor; + + std::vector devices; + + PlatformInfo(); }; - //////////////////////////////// Initialization & Info //////////////////////// - //this function may be obsoleted - //CV_EXPORTS cl_device_id getDevice(); - //the function must be called before any other cv::ocl::functions, it initialize ocl runtime - //each Info relates to an OpenCL platform - //there is one or more devices in each platform, each one has a separate name - CV_EXPORTS int getDevice(std::vector &oclinfo, int devicetype = CVCL_DEVICE_TYPE_GPU); - //set device you want to use, optional function after getDevice be called - //the devnum is the index of the selected device in DeviceName vector of INfo - CV_EXPORTS void setDevice(Info &oclinfo, int devnum = 0); + //////////////////////////////// Initialization & Info //////////////////////// + typedef std::vector PlatformsInfo; - //The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue - //returns cl_context * - CV_EXPORTS void* getoclContext(); - //returns cl_command_queue * - CV_EXPORTS void* getoclCommandQueue(); + CV_EXPORTS int getOpenCLPlatforms(PlatformsInfo& platforms); - //explicit call clFinish. The global command queue will be used. - CV_EXPORTS void finish(); + typedef std::vector DevicesInfo; - //this function enable ocl module to use customized cl_context and cl_command_queue - //getDevice also need to be called before this function - CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0); + CV_EXPORTS int getOpenCLDevices(DevicesInfo& devices, int deviceType = CVCL_DEVICE_TYPE_GPU, + const PlatformInfo* platform = NULL); - //returns true when global OpenCL context is initialized - CV_EXPORTS bool initialized(); + // set device you want to use + CV_EXPORTS void setDevice(const DeviceInfo* info); //////////////////////////////// Error handling //////////////////////// CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func); - //////////////////////////////// OpenCL context //////////////////////// - //This is a global singleton class used to represent a OpenCL context. + enum FEATURE_TYPE + { + FEATURE_CL_DOUBLE = 1, + FEATURE_CL_UNIFIED_MEM, + FEATURE_CL_VER_1_2 + }; + + // Represents OpenCL context, interface class CV_EXPORTS Context { protected: - Context(); - friend class auto_ptr; - friend bool initialized(); - private: - static auto_ptr clCxt; - static int val; + Context() { } + ~Context() { } public: - ~Context(); - void release(); - Info::Impl* impl; - static Context* getContext(); - static void setContext(Info &oclinfo); - enum {CL_DOUBLE, CL_UNIFIED_MEM, CL_VER_1_2}; - bool supportsFeature(int ftype) const; - size_t computeUnits() const; - void* oclContext(); - void* oclCommandQueue(); + bool supportsFeature(FEATURE_TYPE featureType) const; + const DeviceInfo& getDeviceInfo() const; + + const void* getOpenCLContextPtr() const; + const void* getOpenCLCommandQueuePtr() const; + const void* getOpenCLDeviceIDPtr() const; }; + inline const void *getClContextPtr() + { + return Context::getContext()->getOpenCLContextPtr(); + } + + inline const void *getClCommandQueuePtr() + { + return Context::getContext()->getOpenCLCommandQueuePtr(); + } + + bool CV_EXPORTS supportsFeature(FEATURE_TYPE featureType); + + void CV_EXPORTS finish(); + //! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing. CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt , const char **source, string kernelName, @@ -384,7 +419,7 @@ namespace cv uchar *dataend; //! OpenCL context associated with the oclMat object. - Context *clCxt; + Context *clCxt; // TODO clCtx //add offset for handle ROI, calculated in byte int offset; //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used @@ -1879,11 +1914,6 @@ namespace cv oclMat temp5; }; - static inline size_t divUp(size_t total, size_t grain) - { - return (total + grain - 1) / grain; - } - /*!***************K Nearest Neighbour*************!*/ class CV_EXPORTS KNearestNeighbour: public CvKNearest { diff --git a/modules/ocl/include/opencv2/ocl/private/util.hpp b/modules/ocl/include/opencv2/ocl/private/util.hpp index 3de0d438d4..2aba472f66 100644 --- a/modules/ocl/include/opencv2/ocl/private/util.hpp +++ b/modules/ocl/include/opencv2/ocl/private/util.hpp @@ -52,120 +52,138 @@ namespace cv { - namespace ocl +namespace ocl +{ + +inline cl_device_id getClDeviceID(const Context *ctx) +{ + return *(cl_device_id*)(ctx->getOpenCLDeviceIDPtr()); +} + +inline cl_context getClContext(const Context *ctx) +{ + return *(cl_context*)(ctx->getOpenCLContextPtr()); +} + +inline cl_command_queue getClCommandQueue(const Context *ctx) +{ + return *(cl_command_queue*)(ctx->getOpenCLCommandQueuePtr()); +} + +enum openCLMemcpyKind +{ + clMemcpyHostToDevice = 0, + clMemcpyDeviceToHost, + clMemcpyDeviceToDevice +}; +///////////////////////////OpenCL call wrappers//////////////////////////// +void CV_EXPORTS openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch, + size_t widthInBytes, size_t height); +void CV_EXPORTS openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch, + size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type); +void CV_EXPORTS openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch, + const void *src, size_t spitch, + size_t width, size_t height, openCLMemcpyKind kind, int channels = -1); +void CV_EXPORTS openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset, + const void *src, size_t spitch, + size_t width, size_t height, int src_offset); +void CV_EXPORTS openCLFree(void *devPtr); +cl_mem CV_EXPORTS openCLCreateBuffer(Context *clCxt, size_t flag, size_t size); +void CV_EXPORTS openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size); +cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt, + const char **source, std::string kernelName); +cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt, + const char **source, std::string kernelName, const char *build_options); +void CV_EXPORTS openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads); +void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, std::vector< std::pair > &args, + int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1); +void CV_EXPORTS openCLExecuteKernel_(Context *clCxt , const char **source, std::string kernelName, + size_t globalThreads[3], size_t localThreads[3], + std::vector< std::pair > &args, int channels, int depth, const char *build_options); +void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], + size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth); +void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], + size_t localThreads[3], std::vector< std::pair > &args, int channels, + int depth, const char *build_options); + +cl_mem CV_EXPORTS load_constant(cl_context context, cl_command_queue command_queue, const void *value, + const size_t size); + +cl_mem CV_EXPORTS openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr); + +int CV_EXPORTS savetofile(const Context *clcxt, cl_program &program, const char *fileName); + +enum FLUSH_MODE +{ + CLFINISH = 0, + CLFLUSH, + DISABLE +}; + +void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], + size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE); +void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], + size_t localThreads[3], std::vector< std::pair > &args, int channels, + int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE); +// bind oclMat to OpenCL image textures +// note: +// 1. there is no memory management. User need to explicitly release the resource +// 2. for faster clamping, there is no buffer padding for the constructed texture +cl_mem CV_EXPORTS bindTexture(const oclMat &mat); +void CV_EXPORTS releaseTexture(cl_mem& texture); + +//Represents an image texture object +class CV_EXPORTS TextureCL +{ +public: + TextureCL(cl_mem tex, int r, int c, int t) + : tex_(tex), rows(r), cols(c), type(t) {} + ~TextureCL() + { + openCLFree(tex_); + } + operator cl_mem() { - enum openCLMemcpyKind - { - clMemcpyHostToDevice = 0, - clMemcpyDeviceToHost, - clMemcpyDeviceToDevice - }; - ///////////////////////////OpenCL call wrappers//////////////////////////// - void CV_EXPORTS openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch, - size_t widthInBytes, size_t height); - void CV_EXPORTS openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch, - size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type); - void CV_EXPORTS openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch, - const void *src, size_t spitch, - size_t width, size_t height, openCLMemcpyKind kind, int channels = -1); - void CV_EXPORTS openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset, - const void *src, size_t spitch, - size_t width, size_t height, int src_offset); - void CV_EXPORTS openCLFree(void *devPtr); - cl_mem CV_EXPORTS openCLCreateBuffer(Context *clCxt, size_t flag, size_t size); - void CV_EXPORTS openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size); - cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt, - const char **source, std::string kernelName); - cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt, - const char **source, std::string kernelName, const char *build_options); - void CV_EXPORTS openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads); - void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, std::vector< std::pair > &args, - int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1); - void CV_EXPORTS openCLExecuteKernel_(Context *clCxt , const char **source, std::string kernelName, - size_t globalThreads[3], size_t localThreads[3], - std::vector< std::pair > &args, int channels, int depth, const char *build_options); - void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], - size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth); - void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], - size_t localThreads[3], std::vector< std::pair > &args, int channels, - int depth, const char *build_options); - - cl_mem CV_EXPORTS load_constant(cl_context context, cl_command_queue command_queue, const void *value, - const size_t size); - - cl_mem CV_EXPORTS openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr); - - int CV_EXPORTS savetofile(const Context *clcxt, cl_program &program, const char *fileName); - - enum FLUSH_MODE - { - CLFINISH = 0, - CLFLUSH, - DISABLE - }; - - void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], - size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE); - void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], - size_t localThreads[3], std::vector< std::pair > &args, int channels, - int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE); - // bind oclMat to OpenCL image textures - // note: - // 1. there is no memory management. User need to explicitly release the resource - // 2. for faster clamping, there is no buffer padding for the constructed texture - cl_mem CV_EXPORTS bindTexture(const oclMat &mat); - void CV_EXPORTS releaseTexture(cl_mem& texture); - - //Represents an image texture object - class CV_EXPORTS TextureCL - { - public: - TextureCL(cl_mem tex, int r, int c, int t) - : tex_(tex), rows(r), cols(c), type(t) {} - ~TextureCL() - { - openCLFree(tex_); - } - operator cl_mem() - { - return tex_; - } - cl_mem const tex_; - const int rows; - const int cols; - const int type; - private: - //disable assignment - void operator=(const TextureCL&); - }; - // bind oclMat to OpenCL image textures and retunrs an TextureCL object - // note: - // for faster clamping, there is no buffer padding for the constructed texture - Ptr CV_EXPORTS bindTexturePtr(const oclMat &mat); - - // returns whether the current context supports image2d_t format or not - bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext()); - - // the enums are used to query device information - // currently only support wavefront size queries - enum DEVICE_INFO - { - WAVEFRONT_SIZE, //in AMD speak - IS_CPU_DEVICE //check if the device is CPU - }; - template - _ty queryDeviceInfo(cl_kernel kernel = NULL); - - template<> - int CV_EXPORTS queryDeviceInfo(cl_kernel kernel); - template<> - size_t CV_EXPORTS queryDeviceInfo(cl_kernel kernel); - template<> - bool CV_EXPORTS queryDeviceInfo(cl_kernel kernel); - - unsigned long CV_EXPORTS queryLocalMemInfo(); - }//namespace ocl + return tex_; + } + cl_mem const tex_; + const int rows; + const int cols; + const int type; +private: + //disable assignment + void operator=(const TextureCL&); +}; +// bind oclMat to OpenCL image textures and retunrs an TextureCL object +// note: +// for faster clamping, there is no buffer padding for the constructed texture +Ptr CV_EXPORTS bindTexturePtr(const oclMat &mat); + +// returns whether the current context supports image2d_t format or not +bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext()); + +bool CV_EXPORTS isCpuDevice(); + +size_t CV_EXPORTS queryWaveFrontSize(cl_kernel kernel); + + +inline size_t divUp(size_t total, size_t grain) +{ + return (total + grain - 1) / grain; +} + +inline size_t roundUp(size_t sz, size_t n) +{ + // we don't assume that n is a power of 2 (see alignSize) + // equal to divUp(sz, n) * n + size_t t = sz + n - 1; + size_t rem = t % n; + size_t result = t - rem; + return result; +} + +}//namespace ocl }//namespace cv #endif //__OPENCV_OCL_PRIVATE_UTIL__ diff --git a/modules/ocl/perf/main.cpp b/modules/ocl/perf/main.cpp index e24c2c14e5..e82af4e322 100644 --- a/modules/ocl/perf/main.cpp +++ b/modules/ocl/perf/main.cpp @@ -56,7 +56,7 @@ int main(int argc, char ** argv) const char * keys = "{ h | help | false | print help message }" "{ t | type | gpu | set device type:cpu or gpu}" - "{ p | platform | 0 | set platform id }" + "{ p | platform | -1 | set platform id }" "{ d | device | 0 | set device id }"; CommandLineParser cmd(argc, argv, keys); @@ -68,28 +68,34 @@ int main(int argc, char ** argv) } string type = cmd.get("type"); - unsigned int pid = cmd.get("platform"); + int pid = cmd.get("platform"); int device = cmd.get("device"); int flag = type == "cpu" ? cv::ocl::CVCL_DEVICE_TYPE_CPU : cv::ocl::CVCL_DEVICE_TYPE_GPU; - std::vector oclinfo; - int devnums = cv::ocl::getDevice(oclinfo, flag); - if (devnums <= device || device < 0) + cv::ocl::PlatformsInfo platformsInfo; + cv::ocl::getOpenCLPlatforms(platformsInfo); + if (pid >= (int)platformsInfo.size()) { - std::cout << "device invalid\n"; - return -1; + std::cout << "platform is invalid\n"; + return 1; } - if (pid >= oclinfo.size()) + cv::ocl::DevicesInfo devicesInfo; + int devnums = cv::ocl::getOpenCLDevices(devicesInfo, flag, (pid < 0) ? NULL : platformsInfo[pid]); + if (device < 0 || device >= devnums) { - std::cout << "platform invalid\n"; - return -1; + std::cout << "device/platform invalid\n"; + return 1; } - cv::ocl::setDevice(oclinfo[pid], device); + cv::ocl::setDevice(devicesInfo[device]); cv::ocl::setBinaryDiskCache(cv::ocl::CACHE_UPDATE); + cout << "Device type:" << type << endl + << "Platform name:" << devicesInfo[device]->platform->platformName << endl + << "Device name:" << devicesInfo[device]->deviceName << endl; + CV_PERF_TEST_MAIN_INTERNALS(ocl, impls) } diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index 2a663b990a..7d97e67545 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -51,50 +51,10 @@ //M*/ #include "precomp.hpp" -#include - +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; - -namespace cv -{ - namespace ocl - { - //////////////////////////////// OpenCL kernel strings ///////////////////// - - extern const char *arithm_absdiff_nonsaturate; - extern const char *arithm_nonzero; - extern const char *arithm_sum; - extern const char *arithm_minMax; - extern const char *arithm_minMaxLoc; - extern const char *arithm_minMaxLoc_mask; - extern const char *arithm_LUT; - extern const char *arithm_add; - extern const char *arithm_add_mask; - extern const char *arithm_add_scalar; - extern const char *arithm_add_scalar_mask; - extern const char *arithm_bitwise_binary; - extern const char *arithm_bitwise_binary_mask; - extern const char *arithm_bitwise_binary_scalar; - extern const char *arithm_bitwise_binary_scalar_mask; - extern const char *arithm_bitwise_not; - extern const char *arithm_compare; - extern const char *arithm_transpose; - extern const char *arithm_flip; - extern const char *arithm_flip_rc; - extern const char *arithm_magnitude; - extern const char *arithm_cartToPolar; - extern const char *arithm_polarToCart; - extern const char *arithm_exp; - extern const char *arithm_log; - extern const char *arithm_addWeighted; - extern const char *arithm_phase; - extern const char *arithm_pow; - extern const char *arithm_setidentity; - } -} ////////////////////////////////////////////////////////////////////////////// /////////////////////// add subtract multiply divide ///////////////////////// @@ -106,7 +66,7 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const oclMat &dst, int op_type, bool use_scalar = false) { Context *clCxt = src1.clCxt; - bool hasDouble = clCxt->supportsFeature(Context::CL_DOUBLE); + bool hasDouble = clCxt->supportsFeature(FEATURE_CL_DOUBLE); if (!hasDouble && (src1.depth() == CV_64F || src2.depth() == CV_64F || dst.depth() == CV_64F)) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); @@ -264,7 +224,7 @@ void cv::ocl::absdiff(const oclMat &src1, const Scalar &src2, oclMat &dst) ////////////////////////////////////////////////////////////////////////////// static void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, int cmpOp, - string kernelName, const char **kernelString) + string kernelName, const cv::ocl::ProgramEntry* source) { CV_Assert(src1.type() == src2.type()); dst.create(src1.size(), CV_8UC1); @@ -295,13 +255,13 @@ static void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, int args.push_back( make_pair( sizeof(cl_int), (void *)&src1.cols )); args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows )); - openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, + openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str()); } void cv::ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst , int cmpOp) { - if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.depth() == CV_64F) + if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F) { cout << "Selected device do not support double" << endl; return; @@ -358,7 +318,7 @@ Scalar arithmetic_sum(const oclMat &src, int type, int ddepth) { CV_Assert(src.step % src.elemSize() == 0); - size_t groupnum = src.clCxt->computeUnits(); + size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits; CV_Assert(groupnum != 0); int dbsize = groupnum * src.oclchannels(); @@ -385,7 +345,7 @@ typedef Scalar (*sumFunc)(const oclMat &src, int type, int ddepth); Scalar cv::ocl::sum(const oclMat &src) { - if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); } @@ -396,7 +356,7 @@ Scalar cv::ocl::sum(const oclMat &src) arithmetic_sum }; - bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE); + bool hasDouble = src.clCxt->supportsFeature(FEATURE_CL_DOUBLE); int ddepth = std::max(src.depth(), CV_32S); if (!hasDouble && ddepth == CV_64F) ddepth = CV_32F; @@ -407,7 +367,7 @@ Scalar cv::ocl::sum(const oclMat &src) Scalar cv::ocl::absSum(const oclMat &src) { - if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); } @@ -418,7 +378,7 @@ Scalar cv::ocl::absSum(const oclMat &src) arithmetic_sum }; - bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE); + bool hasDouble = src.clCxt->supportsFeature(FEATURE_CL_DOUBLE); int ddepth = std::max(src.depth(), CV_32S); if (!hasDouble && ddepth == CV_64F) ddepth = CV_32F; @@ -429,7 +389,7 @@ Scalar cv::ocl::absSum(const oclMat &src) Scalar cv::ocl::sqrSum(const oclMat &src) { - if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); } @@ -440,7 +400,7 @@ Scalar cv::ocl::sqrSum(const oclMat &src) arithmetic_sum }; - bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE); + bool hasDouble = src.clCxt->supportsFeature(FEATURE_CL_DOUBLE); int ddepth = src.depth() <= CV_32S ? CV_32S : (hasDouble ? CV_64F : CV_32F); sumFunc func = functab[ddepth - CV_32S]; @@ -524,7 +484,7 @@ template void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf) { - size_t groupnum = src.clCxt->computeUnits(); + size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits; CV_Assert(groupnum != 0); int dbsize = groupnum * 2 * src.elemSize(); @@ -566,7 +526,7 @@ void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, cons if (minVal == NULL && maxVal == NULL) return; - if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); } @@ -699,7 +659,7 @@ double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType) static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kernelName) { - if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); return; @@ -746,7 +706,7 @@ static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kern static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kernelName, bool isVertical) { - if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); return; @@ -792,9 +752,9 @@ static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kern args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 )); - const char **kernelString = isVertical ? &arithm_flip_rc : &arithm_flip; + const cv::ocl::ProgramEntry* source = isVertical ? &arithm_flip_rc : &arithm_flip; - openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, src.oclchannels(), depth); + openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, src.oclchannels(), depth); } void cv::ocl::flip(const oclMat &src, oclMat &dst, int flipCode) @@ -860,10 +820,10 @@ void cv::ocl::LUT(const oclMat &src, const oclMat &lut, oclMat &dst) //////////////////////////////// exp log ///////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernelName, const char **kernelString) +static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source) { Context *clCxt = src.clCxt; - if (!clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); return; @@ -893,7 +853,7 @@ static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernel args.push_back( make_pair( sizeof(cl_int), (void *)&srcstep1 )); args.push_back( make_pair( sizeof(cl_int), (void *)&dststep1 )); - openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, + openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, src.oclchannels(), -1, buildOptions.c_str()); } @@ -913,7 +873,7 @@ void cv::ocl::log(const oclMat &src, oclMat &dst) static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName) { - if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) + if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); return; @@ -955,9 +915,9 @@ void cv::ocl::magnitude(const oclMat &src1, const oclMat &src2, oclMat &dst) arithmetic_magnitude_phase_run(src1, src2, dst, "arithm_magnitude"); } -static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString) +static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source) { - if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) + if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); return; @@ -985,7 +945,7 @@ static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat args.push_back( make_pair( sizeof(cl_int), (void *)&cols1 )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows )); - openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth); + openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth); } void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle, bool angleInDegrees) @@ -1004,7 +964,7 @@ void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle, bool angleI static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, oclMat &dst_mag, oclMat &dst_cart, string kernelName, bool angleInDegrees) { - if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) + if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); return; @@ -1057,7 +1017,7 @@ void cv::ocl::cartToPolar(const oclMat &x, const oclMat &y, oclMat &mag, oclMat static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &dst1, oclMat &dst2, bool angleInDegrees, string kernelName) { - if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) + if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); return; @@ -1176,7 +1136,7 @@ void arithmetic_minMaxLoc(const oclMat &src, double *minVal, double *maxVal, Point *minLoc, Point *maxLoc, const oclMat &mask) { CV_Assert(src.oclchannels() == 1); - size_t groupnum = src.clCxt->computeUnits(); + size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits; CV_Assert(groupnum != 0); int minloc = -1 , maxloc = -1; int vlen = 4, dbsize = groupnum * vlen * 4 * sizeof(T) ; @@ -1238,7 +1198,7 @@ typedef void (*minMaxLocFunc)(const oclMat &src, double *minVal, double *maxVal, void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal, Point *minLoc, Point *maxLoc, const oclMat &mask) { - if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); return; @@ -1251,7 +1211,7 @@ void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal, }; minMaxLocFunc func; - func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)]; + func = functab[(int)src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)]; func(src, minVal, maxVal, minLoc, maxLoc, mask); } @@ -1296,7 +1256,7 @@ int cv::ocl::countNonZero(const oclMat &src) CV_Assert(src.channels() == 1); Context *clCxt = src.clCxt; - if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "selected device doesn't support double"); } @@ -1327,7 +1287,7 @@ int cv::ocl::countNonZero(const oclMat &src) ////////////////////////////////bitwise_op//////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -static void bitwise_unary_run(const oclMat &src1, oclMat &dst, string kernelName, const char **kernelString) +static void bitwise_unary_run(const oclMat &src1, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source) { dst.create(src1.size(), src1.type()); @@ -1361,7 +1321,7 @@ static void bitwise_unary_run(const oclMat &src1, oclMat &dst, string kernelName args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 )); - openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth); + openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth); } enum { AND = 0, OR, XOR }; @@ -1370,7 +1330,7 @@ static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Sca oclMat &dst, int operationType) { Context *clCxt = src1.clCxt; - if (!clCxt->supportsFeature(Context::CL_DOUBLE) && src1.depth() == CV_64F) + if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F) { cout << "Selected device does not support double" << endl; return; @@ -1442,7 +1402,7 @@ static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Sca void cv::ocl::bitwise_not(const oclMat &src, oclMat &dst) { - if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F) { cout << "Selected device does not support double" << endl; return; @@ -1571,7 +1531,7 @@ oclMatExpr::operator oclMat() const static void transpose_run(const oclMat &src, oclMat &dst, string kernelName, bool inplace = false) { Context *clCxt = src.clCxt; - if (!clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); return; @@ -1623,7 +1583,7 @@ void cv::ocl::transpose(const oclMat &src, oclMat &dst) void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2, double beta, double gama, oclMat &dst) { Context *clCxt = src1.clCxt; - bool hasDouble = clCxt->supportsFeature(Context::CL_DOUBLE); + bool hasDouble = clCxt->supportsFeature(FEATURE_CL_DOUBLE); if (!hasDouble && src1.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); @@ -1688,7 +1648,7 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2, /////////////////////////////////// Pow ////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string kernelName, const char **kernelString) +static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source) { CV_Assert(src1.cols == dst.cols && src1.rows == dst.rows); CV_Assert(src1.type() == dst.type()); @@ -1718,17 +1678,17 @@ static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 )); float pf = static_cast(p); - if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE)) + if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) args.push_back( make_pair( sizeof(cl_float), (void *)&pf )); else args.push_back( make_pair( sizeof(cl_double), (void *)&p )); - openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth); + openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth); } void cv::ocl::pow(const oclMat &x, double p, oclMat &y) { - if (!x.clCxt->supportsFeature(Context::CL_DOUBLE) && x.type() == CV_64F) + if (!x.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && x.type() == CV_64F) { cout << "Selected device do not support double" << endl; return; diff --git a/modules/ocl/src/bgfg_mog.cpp b/modules/ocl/src/bgfg_mog.cpp index 3051ac82f3..cb0dee80f8 100644 --- a/modules/ocl/src/bgfg_mog.cpp +++ b/modules/ocl/src/bgfg_mog.cpp @@ -392,7 +392,7 @@ void cv::ocl::device::mog::loadConstants(float Tb, float TB, float Tg, float var constants->c_tau = tau; constants->c_shadowVal = shadowVal; - cl_constants = load_constant(*((cl_context*)getoclContext()), *((cl_command_queue*)getoclCommandQueue()), + cl_constants = load_constant(*((cl_context*)getClContextPtr()), *((cl_command_queue*)getClCommandQueuePtr()), (void *)constants, sizeof(_contant_struct)); } @@ -635,4 +635,4 @@ void cv::ocl::MOG2::release() mean_.release(); bgmodelUsedModes_.release(); -} \ No newline at end of file +} diff --git a/modules/ocl/src/binarycaching.hpp b/modules/ocl/src/binarycaching.hpp index 0ec565f88b..cc9e71a330 100644 --- a/modules/ocl/src/binarycaching.hpp +++ b/modules/ocl/src/binarycaching.hpp @@ -50,41 +50,36 @@ using namespace std; using std::cout; using std::endl; -namespace cv +namespace cv { namespace ocl { + +class ProgramCache { - namespace ocl - { - class ProgramCache - { - protected: - ProgramCache(); - friend class auto_ptr; - static auto_ptr programCache; +protected: + ProgramCache(); + ~ProgramCache(); + friend class std::auto_ptr; +public: + static ProgramCache *getProgramCache(); - public: - ~ProgramCache(); - static ProgramCache *getProgramCache() - { - if( NULL == programCache.get()) - programCache.reset(new ProgramCache()); - return programCache.get(); - } + cl_program getProgram(const Context *ctx, const char **source, string kernelName, + const char *build_options); - //lookup the binary given the file name - cl_program progLookup(string srcsign); + void releaseProgram(); +protected: + //lookup the binary given the file name + cl_program progLookup(string srcsign); - //add program to the cache - void addProgram(string srcsign, cl_program program); - void releaseProgram(); + //add program to the cache + void addProgram(string srcsign, cl_program program); - map codeCache; - unsigned int cacheSize; - //The presumed watermark for the cache volume (256MB). Is it enough? - //We may need more delicate algorithms when necessary later. - //Right now, let's just leave it along. - static const unsigned MAX_PROG_CACHE_SIZE = 1024; - }; + map codeCache; + unsigned int cacheSize; - }//namespace ocl + //The presumed watermark for the cache volume (256MB). Is it enough? + //We may need more delicate algorithms when necessary later. + //Right now, let's just leave it along. + static const unsigned MAX_PROG_CACHE_SIZE = 1024; +}; +}//namespace ocl }//namespace cv diff --git a/modules/ocl/src/brute_force_matcher.cpp b/modules/ocl/src/brute_force_matcher.cpp index 74da6ddd06..0273ed5891 100644 --- a/modules/ocl/src/brute_force_matcher.cpp +++ b/modules/ocl/src/brute_force_matcher.cpp @@ -245,7 +245,7 @@ static void matchDispatcher(const oclMat &query, const oclMat &train, const oclM { const oclMat zeroMask; const oclMat &tempMask = mask.data ? mask : zeroMask; - bool is_cpu = queryDeviceInfo(); + bool is_cpu = isCpuDevice(); if (query.cols <= 64) { matchUnrolledCached<16, 64>(query, train, tempMask, trainIdx, distance, distType); @@ -265,7 +265,7 @@ static void matchDispatcher(const oclMat &query, const oclMat *trains, int n, co { const oclMat zeroMask; const oclMat &tempMask = mask.data ? mask : zeroMask; - bool is_cpu = queryDeviceInfo(); + bool is_cpu = isCpuDevice(); if (query.cols <= 64) { matchUnrolledCached<16, 64>(query, trains, n, tempMask, trainIdx, imgIdx, distance, distType); @@ -286,7 +286,7 @@ static void matchDispatcher(const oclMat &query, const oclMat &train, float maxD { const oclMat zeroMask; const oclMat &tempMask = mask.data ? mask : zeroMask; - bool is_cpu = queryDeviceInfo(); + bool is_cpu = isCpuDevice(); if (query.cols <= 64) { matchUnrolledCached<16, 64>(query, train, maxDistance, tempMask, trainIdx, distance, nMatches, distType); @@ -469,7 +469,7 @@ static void calcDistanceDispatcher(const oclMat &query, const oclMat &train, con static void match2Dispatcher(const oclMat &query, const oclMat &train, const oclMat &mask, const oclMat &trainIdx, const oclMat &distance, int distType) { - bool is_cpu = queryDeviceInfo(); + bool is_cpu = isCpuDevice(); if (query.cols <= 64) { knn_matchUnrolledCached<16, 64>(query, train, mask, trainIdx, distance, distType); diff --git a/modules/ocl/src/canny.cpp b/modules/ocl/src/canny.cpp index 4c7b988f6f..a25c1973ef 100644 --- a/modules/ocl/src/canny.cpp +++ b/modules/ocl/src/canny.cpp @@ -98,7 +98,7 @@ void cv::ocl::CannyBuf::create(const Size &image_size, int apperture_size) { openCLFree(counter); } - counter = clCreateBuffer( *((cl_context*)getoclContext()), CL_MEM_COPY_HOST_PTR, sizeof(int), counter_i, &err ); + counter = clCreateBuffer( *((cl_context*)getClContextPtr()), CL_MEM_COPY_HOST_PTR, sizeof(int), counter_i, &err ); openCLSafeCall(err); } @@ -354,7 +354,7 @@ void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, in void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols) { unsigned int count; - openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL)); + openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getClCommandQueuePtr(), (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL)); Context *clCxt = map.clCxt; string kernelName = "edgesHysteresisGlobal"; vector< pair > args; @@ -363,7 +363,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi int count_i[1] = {0}; while(count > 0) { - openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL)); + openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)getClCommandQueuePtr(), (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL)); args.clear(); size_t globalThreads[3] = {std::min(count, 65535u) * 128, divUp(count, 65535), 1}; @@ -378,7 +378,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset)); openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); - openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL)); + openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getClCommandQueuePtr(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL)); std::swap(st1, st2); } } diff --git a/modules/ocl/src/cl_context.cpp b/modules/ocl/src/cl_context.cpp new file mode 100644 index 0000000000..6413465f65 --- /dev/null +++ b/modules/ocl/src/cl_context.cpp @@ -0,0 +1,507 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Guoping Long, longguoping@gmail.com +// Niko Li, newlife20080214@gmail.com +// Yao Wang, bitwangyaoyao@gmail.com +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include +#include +#include "binarycaching.hpp" + +#undef __CL_ENABLE_EXCEPTIONS +#include + +namespace cv { namespace ocl { + +extern void fft_teardown(); +extern void clBlasTeardown(); + +struct PlatformInfoImpl +{ + cl_platform_id platform_id; + + std::vector deviceIDs; + + PlatformInfo info; + + PlatformInfoImpl() + : platform_id(NULL) + { + } +}; + +struct DeviceInfoImpl +{ + cl_platform_id platform_id; + cl_device_id device_id; + + DeviceInfo info; + + DeviceInfoImpl() + : platform_id(NULL), device_id(NULL) + { + } +}; + +static std::vector global_platforms; +static std::vector global_devices; + +static bool parseOpenCLVersion(const std::string& versionStr, int& major, int& minor) +{ + size_t p0 = versionStr.find(' '); + while (true) + { + if (p0 == std::string::npos) + break; + if (p0 + 1 >= versionStr.length()) + break; + char c = versionStr[p0 + 1]; + if (isdigit(c)) + break; + p0 = versionStr.find(' ', p0 + 1); + } + size_t p1 = versionStr.find('.', p0); + size_t p2 = versionStr.find(' ', p1); + if (p0 == std::string::npos || p1 == std::string::npos || p2 == std::string::npos) + { + major = 0; + minor = 0; + return false; + } + std::string majorStr = versionStr.substr(p0 + 1, p1 - p0 - 1); + std::string minorStr = versionStr.substr(p1 + 1, p2 - p1 - 1); + major = atoi(majorStr.c_str()); + minor = atoi(minorStr.c_str()); + return true; +} + +static int initializeOpenCLDevices() +{ + assert(global_devices.size() == 0); + + std::vector platforms; + try + { + openCLSafeCall(cl::Platform::get(&platforms)); + } + catch (cv::Exception& e) + { + return 0; // OpenCL not found + } + + global_platforms.resize(platforms.size()); + + for (size_t i = 0; i < platforms.size(); ++i) + { + PlatformInfoImpl& platformInfo = global_platforms[i]; + platformInfo.info._id = i; + + cl::Platform& platform = platforms[i]; + + platformInfo.platform_id = platform(); + openCLSafeCall(platform.getInfo(CL_PLATFORM_PROFILE, &platformInfo.info.platformProfile)); + openCLSafeCall(platform.getInfo(CL_PLATFORM_VERSION, &platformInfo.info.platformVersion)); + openCLSafeCall(platform.getInfo(CL_PLATFORM_NAME, &platformInfo.info.platformName)); + openCLSafeCall(platform.getInfo(CL_PLATFORM_VENDOR, &platformInfo.info.platformVendor)); + openCLSafeCall(platform.getInfo(CL_PLATFORM_EXTENSIONS, &platformInfo.info.platformExtensons)); + + parseOpenCLVersion(platformInfo.info.platformVersion, + platformInfo.info.platformVersionMajor, platformInfo.info.platformVersionMinor); + + std::vector devices; + cl_int status = platform.getDevices(CL_DEVICE_TYPE_ALL, &devices); + if(status != CL_DEVICE_NOT_FOUND) + openCLVerifyCall(status); + + if(devices.size() > 0) + { + int baseIndx = global_devices.size(); + global_devices.resize(baseIndx + devices.size()); + platformInfo.deviceIDs.resize(devices.size()); + platformInfo.info.devices.resize(devices.size()); + + for(size_t j = 0; j < devices.size(); ++j) + { + cl::Device& device = devices[j]; + + DeviceInfoImpl& deviceInfo = global_devices[baseIndx + j]; + deviceInfo.info._id = baseIndx + j; + deviceInfo.platform_id = platform(); + deviceInfo.device_id = device(); + + deviceInfo.info.platform = &platformInfo.info; + platformInfo.deviceIDs[j] = deviceInfo.info._id; + + cl_device_type type = -1; + openCLSafeCall(device.getInfo(CL_DEVICE_TYPE, &type)); + deviceInfo.info.deviceType = DeviceType(type); + + openCLSafeCall(device.getInfo(CL_DEVICE_PROFILE, &deviceInfo.info.deviceProfile)); + openCLSafeCall(device.getInfo(CL_DEVICE_VERSION, &deviceInfo.info.deviceVersion)); + openCLSafeCall(device.getInfo(CL_DEVICE_NAME, &deviceInfo.info.deviceName)); + openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR, &deviceInfo.info.deviceVendor)); + cl_uint vendorID = -1; + openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR_ID, &vendorID)); + deviceInfo.info.deviceVendorId = vendorID; + openCLSafeCall(device.getInfo(CL_DRIVER_VERSION, &deviceInfo.info.deviceDriverVersion)); + openCLSafeCall(device.getInfo(CL_DEVICE_EXTENSIONS, &deviceInfo.info.deviceExtensions)); + + parseOpenCLVersion(deviceInfo.info.deviceVersion, + deviceInfo.info.deviceVersionMajor, deviceInfo.info.deviceVersionMinor); + + size_t maxWorkGroupSize = 0; + openCLSafeCall(device.getInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE, &maxWorkGroupSize)); + deviceInfo.info.maxWorkGroupSize = maxWorkGroupSize; + + cl_uint maxDimensions = 0; + openCLSafeCall(device.getInfo(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, &maxDimensions)); + std::vector maxWorkItemSizes(maxDimensions); + openCLSafeCall(clGetDeviceInfo(device(), CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * maxDimensions, + (void *)&maxWorkItemSizes[0], 0)); + deviceInfo.info.maxWorkItemSizes = maxWorkItemSizes; + + cl_uint maxComputeUnits = 0; + openCLSafeCall(device.getInfo(CL_DEVICE_MAX_COMPUTE_UNITS, &maxComputeUnits)); + deviceInfo.info.maxComputeUnits = maxComputeUnits; + + cl_ulong localMemorySize = 0; + openCLSafeCall(device.getInfo(CL_DEVICE_LOCAL_MEM_SIZE, &localMemorySize)); + deviceInfo.info.localMemorySize = (size_t)localMemorySize; + + + cl_bool unifiedMemory = false; + openCLSafeCall(device.getInfo(CL_DEVICE_HOST_UNIFIED_MEMORY, &unifiedMemory)); + deviceInfo.info.isUnifiedMemory = unifiedMemory != 0; + + //initialize extra options for compilation. Currently only fp64 is included. + //Assume 4KB is enough to store all possible extensions. + openCLSafeCall(device.getInfo(CL_DEVICE_EXTENSIONS, &deviceInfo.info.deviceExtensions)); + + size_t fp64_khr = deviceInfo.info.deviceExtensions.find("cl_khr_fp64"); + if(fp64_khr != std::string::npos) + { + deviceInfo.info.compilationExtraOptions += "-D DOUBLE_SUPPORT"; + deviceInfo.info.haveDoubleSupport = true; + } + else + { + deviceInfo.info.haveDoubleSupport = false; + } + } + } + } + + for (size_t i = 0; i < platforms.size(); ++i) + { + PlatformInfoImpl& platformInfo = global_platforms[i]; + for(size_t j = 0; j < platformInfo.deviceIDs.size(); ++j) + { + DeviceInfoImpl& deviceInfo = global_devices[platformInfo.deviceIDs[j]]; + platformInfo.info.devices[j] = &deviceInfo.info; + } + } + + return global_devices.size(); +} + + +DeviceInfo::DeviceInfo() + : _id(-1), deviceType(DeviceType(0)), + deviceVendorId(-1), + maxWorkGroupSize(0), maxComputeUnits(0), localMemorySize(0), + deviceVersionMajor(0), deviceVersionMinor(0), + haveDoubleSupport(false), isUnifiedMemory(false), + platform(NULL) +{ + // nothing +} + +PlatformInfo::PlatformInfo() + : _id(-1), + platformVersionMajor(0), platformVersionMinor(0) +{ + // nothing +} + +//////////////////////////////// OpenCL context //////////////////////// +//This is a global singleton class used to represent a OpenCL context. +class ContextImpl : public Context +{ +public: + const cl_device_id clDeviceID; + cl_context clContext; + cl_command_queue clCmdQueue; + const DeviceInfo& deviceInfo; + +protected: + ContextImpl(const DeviceInfo& deviceInfo, cl_device_id clDeviceID) + : clDeviceID(clDeviceID), clContext(NULL), clCmdQueue(NULL), deviceInfo(deviceInfo) + { + // nothing + } + ~ContextImpl(); +public: + + static ContextImpl* getContext(); + static void setContext(const DeviceInfo* deviceInfo); + + bool supportsFeature(FEATURE_TYPE featureType) const; + + static void cleanupContext(void); +}; + +static cv::Mutex currentContextMutex; +static ContextImpl* currentContext = NULL; + +Context* Context::getContext() +{ + return currentContext; +} + +bool Context::supportsFeature(FEATURE_TYPE featureType) const +{ + return ((ContextImpl*)this)->supportsFeature(featureType); +} + +const DeviceInfo& Context::getDeviceInfo() const +{ + return ((ContextImpl*)this)->deviceInfo; +} + +const void* Context::getOpenCLContextPtr() const +{ + return &(((ContextImpl*)this)->clContext); +} + +const void* Context::getOpenCLCommandQueuePtr() const +{ + return &(((ContextImpl*)this)->clCmdQueue); +} + +const void* Context::getOpenCLDeviceIDPtr() const +{ + return &(((ContextImpl*)this)->clDeviceID); +} + + +bool ContextImpl::supportsFeature(FEATURE_TYPE featureType) const +{ + switch (featureType) + { + case FEATURE_CL_DOUBLE: + return deviceInfo.haveDoubleSupport; + case FEATURE_CL_UNIFIED_MEM: + return deviceInfo.isUnifiedMemory; + case FEATURE_CL_VER_1_2: + return deviceInfo.deviceVersionMajor > 1 || (deviceInfo.deviceVersionMajor == 1 && deviceInfo.deviceVersionMinor >= 2); + } + CV_Error(CV_StsBadArg, "Invalid feature type"); + return false; +} + +#if defined(WIN32) +static bool __termination = false; +#endif + +ContextImpl::~ContextImpl() +{ + fft_teardown(); + clBlasTeardown(); + +#ifdef WIN32 + // if process is on termination stage (ExitProcess was called and other threads were terminated) + // then disable command queue release because it may cause program hang + if (!__termination) +#endif + { + if(clCmdQueue) + { + openCLSafeCall(clReleaseCommandQueue(clCmdQueue)); // some cleanup problems are here + } + + if(clContext) + { + openCLSafeCall(clReleaseContext(clContext)); + } + } + clCmdQueue = NULL; + clContext = NULL; +} + +void ContextImpl::cleanupContext(void) +{ + cv::AutoLock lock(currentContextMutex); + if (currentContext) + delete currentContext; + currentContext = NULL; +} + +void ContextImpl::setContext(const DeviceInfo* deviceInfo) +{ + CV_Assert(deviceInfo->_id >= 0 && deviceInfo->_id < (int)global_devices.size()); + + DeviceInfoImpl& infoImpl = global_devices[deviceInfo->_id]; + CV_Assert(deviceInfo == &infoImpl.info); + + cl_int status = 0; + cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(infoImpl.platform_id), 0 }; + cl_context clContext = clCreateContext(cps, 1, &infoImpl.device_id, NULL, NULL, &status); + openCLVerifyCall(status); + // TODO add CL_QUEUE_PROFILING_ENABLE + cl_command_queue clCmdQueue = clCreateCommandQueue(clContext, infoImpl.device_id, 0, &status); + openCLVerifyCall(status); + + ContextImpl* ctx = new ContextImpl(infoImpl.info, infoImpl.device_id); + ctx->clCmdQueue = clCmdQueue; + ctx->clContext = clContext; + + ContextImpl* old = NULL; + { + cv::AutoLock lock(currentContextMutex); + old = currentContext; + currentContext = ctx; + } + if (old != NULL) + { + delete old; + } +} + +ContextImpl* ContextImpl::getContext() +{ + return currentContext; +} + +int getOpenCLPlatforms(PlatformsInfo& platforms) +{ + platforms.clear(); + + for (size_t id = 0; id < global_platforms.size(); ++id) + { + PlatformInfoImpl& impl = global_platforms[id]; + platforms.push_back(&impl.info); + } + + return platforms.size(); +} + +int getOpenCLDevices(std::vector &devices, int deviceType, const PlatformInfo* platform) +{ + devices.clear(); + + switch(deviceType) + { + case CVCL_DEVICE_TYPE_DEFAULT: + case CVCL_DEVICE_TYPE_CPU: + case CVCL_DEVICE_TYPE_GPU: + case CVCL_DEVICE_TYPE_ACCELERATOR: + case CVCL_DEVICE_TYPE_ALL: + break; + default: + return 0; + } + + if (platform == NULL) + { + for (size_t id = 0; id < global_devices.size(); ++id) + { + DeviceInfoImpl& deviceInfo = global_devices[id]; + if (((int)deviceInfo.info.deviceType & deviceType) == deviceType) + { + devices.push_back(&deviceInfo.info); + } + } + } + else + { + for (size_t id = 0; id < platform->devices.size(); ++id) + { + const DeviceInfo* deviceInfo = platform->devices[id]; + if (((int)deviceInfo->deviceType & deviceType) == deviceType) + { + devices.push_back(deviceInfo); + } + } + } + + return (int)devices.size(); +} + +void setDevice(const DeviceInfo* info) +{ + ContextImpl::setContext(info); +} + +bool supportsFeature(FEATURE_TYPE featureType) +{ + return Context::getContext()->supportsFeature(featureType); +} + +struct __Module +{ + __Module() { initializeOpenCLDevices(); } + ~__Module() { ContextImpl::cleanupContext(); } +}; +static __Module __module; + + +}//namespace ocl +}//namespace cv + + +#if defined(WIN32) && defined(CVAPI_EXPORTS) + +extern "C" +BOOL WINAPI DllMain(HINSTANCE /*hInst*/, DWORD fdwReason, LPVOID lpReserved) +{ + if (fdwReason == DLL_PROCESS_DETACH) + { + if (lpReserved != NULL) // called after ExitProcess() call + cv::ocl::__termination = true; + } + return TRUE; +} + +#endif diff --git a/modules/ocl/src/cl_operations.cpp b/modules/ocl/src/cl_operations.cpp new file mode 100644 index 0000000000..42138adbe0 --- /dev/null +++ b/modules/ocl/src/cl_operations.cpp @@ -0,0 +1,434 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Guoping Long, longguoping@gmail.com +// Niko Li, newlife20080214@gmail.com +// Yao Wang, bitwangyaoyao@gmail.com +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include +#include +#include "binarycaching.hpp" + +#undef __CL_ENABLE_EXCEPTIONS +#include + +//#define PRINT_KERNEL_RUN_TIME +#define RUN_TIMES 100 +#ifndef CL_MEM_USE_PERSISTENT_MEM_AMD +#define CL_MEM_USE_PERSISTENT_MEM_AMD 0 +#endif +//#define AMD_DOUBLE_DIFFER + +namespace cv { namespace ocl { + +DevMemType gDeviceMemType = DEVICE_MEM_DEFAULT; +DevMemRW gDeviceMemRW = DEVICE_MEM_R_W; +int gDevMemTypeValueMap[5] = {0, + CL_MEM_ALLOC_HOST_PTR, + CL_MEM_USE_HOST_PTR, + CL_MEM_COPY_HOST_PTR, + CL_MEM_USE_PERSISTENT_MEM_AMD}; +int gDevMemRWValueMap[3] = {CL_MEM_READ_WRITE, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY}; + +void finish() +{ + clFinish(getClCommandQueue(Context::getContext())); +} + +bool isCpuDevice() +{ + const DeviceInfo& info = Context::getContext()->getDeviceInfo(); + return (info.deviceType == CVCL_DEVICE_TYPE_CPU); +} + +size_t queryWaveFrontSize(cl_kernel kernel) +{ + const DeviceInfo& info = Context::getContext()->getDeviceInfo(); + if (info.deviceType == CVCL_DEVICE_TYPE_CPU) + return 1; + size_t wavefront = 0; + CV_Assert(kernel != NULL); + openCLSafeCall(clGetKernelWorkGroupInfo(kernel, getClDeviceID(Context::getContext()), + CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &wavefront, NULL)); + return wavefront; +} + + +void openCLReadBuffer(Context *ctx, cl_mem dst_buffer, void *host_buffer, size_t size) +{ + cl_int status; + status = clEnqueueReadBuffer(getClCommandQueue(ctx), dst_buffer, CL_TRUE, 0, + size, host_buffer, 0, NULL, NULL); + openCLVerifyCall(status); +} + +cl_mem openCLCreateBuffer(Context *ctx, size_t flag , size_t size) +{ + cl_int status; + cl_mem buffer = clCreateBuffer(getClContext(ctx), (cl_mem_flags)flag, size, NULL, &status); + openCLVerifyCall(status); + return buffer; +} + +void openCLMallocPitch(Context *ctx, void **dev_ptr, size_t *pitch, + size_t widthInBytes, size_t height) +{ + openCLMallocPitchEx(ctx, dev_ptr, pitch, widthInBytes, height, gDeviceMemRW, gDeviceMemType); +} + +void openCLMallocPitchEx(Context *ctx, void **dev_ptr, size_t *pitch, + size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type) +{ + cl_int status; + *dev_ptr = clCreateBuffer(getClContext(ctx), gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type], + widthInBytes * height, 0, &status); + openCLVerifyCall(status); + *pitch = widthInBytes; +} + +void openCLMemcpy2D(Context *ctx, void *dst, size_t dpitch, + const void *src, size_t spitch, + size_t width, size_t height, openCLMemcpyKind kind, int channels) +{ + size_t buffer_origin[3] = {0, 0, 0}; + size_t host_origin[3] = {0, 0, 0}; + size_t region[3] = {width, height, 1}; + if(kind == clMemcpyHostToDevice) + { + if(dpitch == width || channels == 3 || height == 1) + { + openCLSafeCall(clEnqueueWriteBuffer(getClCommandQueue(ctx), (cl_mem)dst, CL_TRUE, + 0, width * height, src, 0, NULL, NULL)); + } + else + { + openCLSafeCall(clEnqueueWriteBufferRect(getClCommandQueue(ctx), (cl_mem)dst, CL_TRUE, + buffer_origin, host_origin, region, dpitch, 0, spitch, 0, src, 0, 0, 0)); + } + } + else if(kind == clMemcpyDeviceToHost) + { + if(spitch == width || channels == 3 || height == 1) + { + openCLSafeCall(clEnqueueReadBuffer(getClCommandQueue(ctx), (cl_mem)src, CL_TRUE, + 0, width * height, dst, 0, NULL, NULL)); + } + else + { + openCLSafeCall(clEnqueueReadBufferRect(getClCommandQueue(ctx), (cl_mem)src, CL_TRUE, + buffer_origin, host_origin, region, spitch, 0, dpitch, 0, dst, 0, 0, 0)); + } + } +} + +void openCLCopyBuffer2D(Context *ctx, void *dst, size_t dpitch, int dst_offset, + const void *src, size_t spitch, + size_t width, size_t height, int src_offset) +{ + size_t src_origin[3] = {src_offset % spitch, src_offset / spitch, 0}; + size_t dst_origin[3] = {dst_offset % dpitch, dst_offset / dpitch, 0}; + size_t region[3] = {width, height, 1}; + + openCLSafeCall(clEnqueueCopyBufferRect(getClCommandQueue(ctx), (cl_mem)src, (cl_mem)dst, src_origin, dst_origin, + region, spitch, 0, dpitch, 0, 0, 0, 0)); +} + +void openCLFree(void *devPtr) +{ + openCLSafeCall(clReleaseMemObject((cl_mem)devPtr)); +} + +cl_kernel openCLGetKernelFromSource(const Context *ctx, const char **source, string kernelName) +{ + return openCLGetKernelFromSource(ctx, source, kernelName, NULL); +} + +cl_kernel openCLGetKernelFromSource(const Context *ctx, const char **source, string kernelName, + const char *build_options) +{ + cl_kernel kernel; + cl_int status = 0; + CV_Assert(ProgramCache::getProgramCache() != NULL); + cl_program program = ProgramCache::getProgramCache()->getProgram(ctx, source, kernelName, build_options); + CV_Assert(program != NULL); + kernel = clCreateKernel(program, kernelName.c_str(), &status); + openCLVerifyCall(status); + return kernel; +} + +void openCLVerifyKernel(const Context *ctx, cl_kernel kernel, size_t *localThreads) +{ + size_t kernelWorkGroupSize; + openCLSafeCall(clGetKernelWorkGroupInfo(kernel, getClDeviceID(ctx), + CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0)); + CV_Assert( localThreads[0] <= ctx->getDeviceInfo().maxWorkItemSizes[0] ); + CV_Assert( localThreads[1] <= ctx->getDeviceInfo().maxWorkItemSizes[1] ); + CV_Assert( localThreads[2] <= ctx->getDeviceInfo().maxWorkItemSizes[2] ); + CV_Assert( localThreads[0] * localThreads[1] * localThreads[2] <= kernelWorkGroupSize ); + CV_Assert( localThreads[0] * localThreads[1] * localThreads[2] <= ctx->getDeviceInfo().maxWorkGroupSize ); +} + +#ifdef PRINT_KERNEL_RUN_TIME +static double total_execute_time = 0; +static double total_kernel_time = 0; +#endif +void openCLExecuteKernel_(Context *ctx , const char **source, string kernelName, size_t globalThreads[3], + size_t localThreads[3], vector< pair > &args, int channels, + int depth, const char *build_options) +{ + //construct kernel name + //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number + //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char) + stringstream idxStr; + if(channels != -1) + idxStr << "_C" << channels; + if(depth != -1) + idxStr << "_D" << depth; + kernelName += idxStr.str(); + + cl_kernel kernel; + kernel = openCLGetKernelFromSource(ctx, source, kernelName, build_options); + + if ( localThreads != NULL) + { + globalThreads[0] = roundUp(globalThreads[0], localThreads[0]); + globalThreads[1] = roundUp(globalThreads[1], localThreads[1]); + globalThreads[2] = roundUp(globalThreads[2], localThreads[2]); + + cv::ocl::openCLVerifyKernel(ctx, kernel, localThreads); + } + for(size_t i = 0; i < args.size(); i ++) + openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second)); + +#ifndef PRINT_KERNEL_RUN_TIME + openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads, + localThreads, 0, NULL, NULL)); +#else + cl_event event = NULL; + openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads, + localThreads, 0, NULL, &event)); + + cl_ulong start_time, end_time, queue_time; + double execute_time = 0; + double total_time = 0; + + openCLSafeCall(clWaitForEvents(1, &event)); + openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, + sizeof(cl_ulong), &start_time, 0)); + + openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, + sizeof(cl_ulong), &end_time, 0)); + + openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, + sizeof(cl_ulong), &queue_time, 0)); + + execute_time = (double)(end_time - start_time) / (1000 * 1000); + total_time = (double)(end_time - queue_time) / (1000 * 1000); + + total_execute_time += execute_time; + total_kernel_time += total_time; + clReleaseEvent(event); +#endif + + clFlush(getClCommandQueue(ctx)); + openCLSafeCall(clReleaseKernel(kernel)); +} + +void openCLExecuteKernel(Context *ctx , const char **source, string kernelName, + size_t globalThreads[3], size_t localThreads[3], + vector< pair > &args, int channels, int depth) +{ + openCLExecuteKernel(ctx, source, kernelName, globalThreads, localThreads, args, + channels, depth, NULL); +} +void openCLExecuteKernel(Context *ctx , const char **source, string kernelName, + size_t globalThreads[3], size_t localThreads[3], + vector< pair > &args, int channels, int depth, const char *build_options) + +{ +#ifndef PRINT_KERNEL_RUN_TIME + openCLExecuteKernel_(ctx, source, kernelName, globalThreads, localThreads, args, channels, depth, + build_options); +#else + string data_type[] = { "uchar", "char", "ushort", "short", "int", "float", "double"}; + cout << endl; + cout << "Function Name: " << kernelName; + if(depth >= 0) + cout << " |data type: " << data_type[depth]; + cout << " |channels: " << channels; + cout << " |Time Unit: " << "ms" << endl; + + total_execute_time = 0; + total_kernel_time = 0; + cout << "-------------------------------------" << endl; + + cout << setiosflags(ios::left) << setw(15) << "excute time"; + cout << setiosflags(ios::left) << setw(15) << "lauch time"; + cout << setiosflags(ios::left) << setw(15) << "kernel time" << endl; + int i = 0; + for(i = 0; i < RUN_TIMES; i++) + openCLExecuteKernel_(ctx, source, kernelName, globalThreads, localThreads, args, channels, depth, + build_options); + + cout << "average kernel excute time: " << total_execute_time / RUN_TIMES << endl; // "ms" << endl; + cout << "average kernel total time: " << total_kernel_time / RUN_TIMES << endl; // "ms" << endl; +#endif +} + +double openCLExecuteKernelInterop(Context *ctx , const char **source, string kernelName, + size_t globalThreads[3], size_t localThreads[3], + vector< pair > &args, int channels, int depth, const char *build_options, + bool finish, bool measureKernelTime, bool cleanUp) + +{ + //construct kernel name + //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number + //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char) + stringstream idxStr; + if(channels != -1) + idxStr << "_C" << channels; + if(depth != -1) + idxStr << "_D" << depth; + kernelName += idxStr.str(); + + cl_kernel kernel; + kernel = openCLGetKernelFromSource(ctx, source, kernelName, build_options); + + double kernelTime = 0.0; + + if( globalThreads != NULL) + { + if ( localThreads != NULL) + { + globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0]; + globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1]; + globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2]; + + //size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2]; + cv::ocl::openCLVerifyKernel(ctx, kernel, localThreads); + } + for(size_t i = 0; i < args.size(); i ++) + openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second)); + + if(measureKernelTime == false) + { + openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads, + localThreads, 0, NULL, NULL)); + } + else + { + cl_event event = NULL; + openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads, + localThreads, 0, NULL, &event)); + + cl_ulong end_time, queue_time; + + openCLSafeCall(clWaitForEvents(1, &event)); + + openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, + sizeof(cl_ulong), &end_time, 0)); + + openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, + sizeof(cl_ulong), &queue_time, 0)); + + kernelTime = (double)(end_time - queue_time) / (1000 * 1000); + + clReleaseEvent(event); + } + } + + if(finish) + { + clFinish(getClCommandQueue(ctx)); + } + + if(cleanUp) + { + openCLSafeCall(clReleaseKernel(kernel)); + } + + return kernelTime; +} + +//double openCLExecuteKernelInterop(Context *ctx , const char **fileName, const int numFiles, string kernelName, +// size_t globalThreads[3], size_t localThreads[3], +// vector< pair > &args, int channels, int depth, const char *build_options, +// bool finish, bool measureKernelTime, bool cleanUp) +// +//{ +// std::vector fsource; +// for (int i = 0 ; i < numFiles ; i++) +// { +// std::string str; +// if (convertToString(fileName[i], str) >= 0) +// fsource.push_back(str); +// } +// const char **source = new const char *[numFiles]; +// for (int i = 0 ; i < numFiles ; i++) +// source[i] = fsource[i].c_str(); +// double kernelTime = openCLExecuteKernelInterop(ctx ,source, kernelName, globalThreads, localThreads, +// args, channels, depth, build_options, finish, measureKernelTime, cleanUp); +// fsource.clear(); +// delete []source; +// return kernelTime; +//} + +cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value, + const size_t size) +{ + int status; + cl_mem con_struct; + + con_struct = clCreateBuffer(context, CL_MEM_READ_ONLY, size, NULL, &status); + openCLSafeCall(status); + + openCLSafeCall(clEnqueueWriteBuffer(command_queue, con_struct, 1, 0, size, + value, 0, 0, 0)); + + return con_struct; + +} + +}//namespace ocl +}//namespace cv diff --git a/modules/ocl/src/cl_programcache.cpp b/modules/ocl/src/cl_programcache.cpp new file mode 100644 index 0000000000..3261319c34 --- /dev/null +++ b/modules/ocl/src/cl_programcache.cpp @@ -0,0 +1,311 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Guoping Long, longguoping@gmail.com +// Niko Li, newlife20080214@gmail.com +// Yao Wang, bitwangyaoyao@gmail.com +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include +#include +#include "binarycaching.hpp" + +#undef __CL_ENABLE_EXCEPTIONS +#include + +namespace cv { namespace ocl { +/* + * The binary caching system to eliminate redundant program source compilation. + * Strictly, this is not a cache because we do not implement evictions right now. + * We shall add such features to trade-off memory consumption and performance when necessary. + */ + +std::auto_ptr _programCache; +ProgramCache* ProgramCache::getProgramCache() +{ + if (NULL == _programCache.get()) + _programCache.reset(new ProgramCache()); + return _programCache.get(); +} + +ProgramCache::ProgramCache() +{ + codeCache.clear(); + cacheSize = 0; +} + +ProgramCache::~ProgramCache() +{ + releaseProgram(); +} + +cl_program ProgramCache::progLookup(string srcsign) +{ + map::iterator iter; + iter = codeCache.find(srcsign); + if(iter != codeCache.end()) + return iter->second; + else + return NULL; +} + +void ProgramCache::addProgram(string srcsign , cl_program program) +{ + if(!progLookup(srcsign)) + { + codeCache.insert(map::value_type(srcsign, program)); + } +} + +void ProgramCache::releaseProgram() +{ + map::iterator iter; + for(iter = codeCache.begin(); iter != codeCache.end(); iter++) + { + openCLSafeCall(clReleaseProgram(iter->second)); + } + codeCache.clear(); + cacheSize = 0; +} + +static int enable_disk_cache = +#ifdef _DEBUG + false; +#else + true; +#endif +static int update_disk_cache = false; +static String binpath = ""; + +void setBinaryDiskCache(int mode, String path) +{ + if(mode == CACHE_NONE) + { + update_disk_cache = 0; + enable_disk_cache = 0; + return; + } + update_disk_cache |= (mode & CACHE_UPDATE) == CACHE_UPDATE; + enable_disk_cache |= +#ifdef _DEBUG + (mode & CACHE_DEBUG) == CACHE_DEBUG; +#else + (mode & CACHE_RELEASE) == CACHE_RELEASE; +#endif + if(enable_disk_cache && !path.empty()) + { + binpath = path; + } +} + +void setBinpath(const char *path) +{ + binpath = path; +} + +int savetofile(const Context*, cl_program &program, const char *fileName) +{ + size_t binarySize; + openCLSafeCall(clGetProgramInfo(program, + CL_PROGRAM_BINARY_SIZES, + sizeof(size_t), + &binarySize, NULL)); + char* binary = (char*)malloc(binarySize); + if(binary == NULL) + { + CV_Error(CV_StsNoMem, "Failed to allocate host memory."); + } + openCLSafeCall(clGetProgramInfo(program, + CL_PROGRAM_BINARIES, + sizeof(char *), + &binary, + NULL)); + + FILE *fp = fopen(fileName, "wb+"); + if(fp != NULL) + { + fwrite(binary, binarySize, 1, fp); + free(binary); + fclose(fp); + } + return 1; +} + +cl_program ProgramCache::getProgram(const Context *ctx, const char **source, string kernelName, + const char *build_options) +{ + cl_program program; + cl_int status = 0; + stringstream src_sign; + string srcsign; + string filename; + + if (NULL != build_options) + { + src_sign << (int64)(*source) << getClContext(ctx) << "_" << build_options; + } + else + { + src_sign << (int64)(*source) << getClContext(ctx); + } + srcsign = src_sign.str(); + + program = NULL; + program = ProgramCache::getProgramCache()->progLookup(srcsign); + + if (!program) + { + //config build programs + std::string all_build_options; + if (!ctx->getDeviceInfo().compilationExtraOptions.empty()) + all_build_options += ctx->getDeviceInfo().compilationExtraOptions; + if (build_options != NULL) + { + all_build_options += " "; + all_build_options += build_options; + } + filename = binpath + kernelName + "_" + ctx->getDeviceInfo().deviceName + all_build_options + ".clb"; + + FILE *fp = enable_disk_cache ? fopen(filename.c_str(), "rb") : NULL; + if(fp == NULL || update_disk_cache) + { + if(fp != NULL) + fclose(fp); + + program = clCreateProgramWithSource( + getClContext(ctx), 1, source, NULL, &status); + openCLVerifyCall(status); + cl_device_id device = getClDeviceID(ctx); + status = clBuildProgram(program, 1, &device, all_build_options.c_str(), NULL, NULL); + if(status == CL_SUCCESS && enable_disk_cache) + savetofile(ctx, program, filename.c_str()); + } + else + { + fseek(fp, 0, SEEK_END); + size_t binarySize = ftell(fp); + fseek(fp, 0, SEEK_SET); + char *binary = new char[binarySize]; + CV_Assert(1 == fread(binary, binarySize, 1, fp)); + fclose(fp); + cl_int status = 0; + cl_device_id device = getClDeviceID(ctx); + program = clCreateProgramWithBinary(getClContext(ctx), + 1, + &device, + (const size_t *)&binarySize, + (const unsigned char **)&binary, + NULL, + &status); + openCLVerifyCall(status); + status = clBuildProgram(program, 1, &device, all_build_options.c_str(), NULL, NULL); + delete[] binary; + } + + if(status != CL_SUCCESS) + { + if(status == CL_BUILD_PROGRAM_FAILURE) + { + cl_int logStatus; + char *buildLog = NULL; + size_t buildLogSize = 0; + logStatus = clGetProgramBuildInfo(program, + getClDeviceID(ctx), CL_PROGRAM_BUILD_LOG, buildLogSize, + buildLog, &buildLogSize); + if(logStatus != CL_SUCCESS) + std::cout << "Failed to build the program and get the build info." << endl; + buildLog = new char[buildLogSize]; + CV_DbgAssert(!!buildLog); + memset(buildLog, 0, buildLogSize); + openCLSafeCall(clGetProgramBuildInfo(program, getClDeviceID(ctx), + CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL)); + std::cout << "\n\t\t\tBUILD LOG\n"; + std::cout << buildLog << endl; + delete [] buildLog; + } + openCLVerifyCall(status); + } + //Cache the binary for future use if build_options is null + if( (this->cacheSize += 1) < MAX_PROG_CACHE_SIZE) + this->addProgram(srcsign, program); + else + cout << "Warning: code cache has been full.\n"; + } + return program; +} + +//// Converts the contents of a file into a string +//static int convertToString(const char *filename, std::string& s) +//{ +// size_t size; +// char* str; +// +// std::fstream f(filename, (std::fstream::in | std::fstream::binary)); +// if(f.is_open()) +// { +// size_t fileSize; +// f.seekg(0, std::fstream::end); +// size = fileSize = (size_t)f.tellg(); +// f.seekg(0, std::fstream::beg); +// +// str = new char[size+1]; +// if(!str) +// { +// f.close(); +// return -1; +// } +// +// f.read(str, fileSize); +// f.close(); +// str[size] = '\0'; +// +// s = str; +// delete[] str; +// return 0; +// } +// printf("Error: Failed to open file %s\n", filename); +// return -1; +//} + +} // namespace ocl +} // namespace cv diff --git a/modules/ocl/src/error.cpp b/modules/ocl/src/error.cpp index e854e70cd0..cd6d3d5346 100644 --- a/modules/ocl/src/error.cpp +++ b/modules/ocl/src/error.cpp @@ -152,19 +152,19 @@ namespace cv case CL_INVALID_GLOBAL_WORK_SIZE: return "CL_INVALID_GLOBAL_WORK_SIZE"; //case CL_INVALID_PROPERTY: - // return "CL_INVALID_PROPERTY"; + // return "CL_INVALID_PROPERTY"; //case CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR: - // return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR"; + // return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR"; //case CL_PLATFORM_NOT_FOUND_KHR: - // return "CL_PLATFORM_NOT_FOUND_KHR"; - // //case CL_INVALID_PROPERTY_EXT: - // // return "CL_INVALID_PROPERTY_EXT"; + // return "CL_PLATFORM_NOT_FOUND_KHR"; + // //case CL_INVALID_PROPERTY_EXT: + // // return "CL_INVALID_PROPERTY_EXT"; //case CL_DEVICE_PARTITION_FAILED_EXT: - // return "CL_DEVICE_PARTITION_FAILED_EXT"; + // return "CL_DEVICE_PARTITION_FAILED_EXT"; //case CL_INVALID_PARTITION_COUNT_EXT: - // return "CL_INVALID_PARTITION_COUNT_EXT"; + // return "CL_INVALID_PARTITION_COUNT_EXT"; //default: - // return "unknown error code"; + // return "unknown error code"; } static char buf[256]; sprintf(buf, "%d", err); diff --git a/modules/ocl/src/fft.cpp b/modules/ocl/src/fft.cpp index b6cc070fb5..c0785ac9d8 100644 --- a/modules/ocl/src/fft.cpp +++ b/modules/ocl/src/fft.cpp @@ -156,25 +156,25 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla { fft_setup(); - bool is_1d_input = (_dft_size.height == 1); - int is_row_dft = flags & DFT_ROWS; + bool is_1d_input = (_dft_size.height == 1); + int is_row_dft = flags & DFT_ROWS; int is_scaled_dft = flags & DFT_SCALE; - int is_inverse = flags & DFT_INVERSE; + int is_inverse = flags & DFT_INVERSE; - //clAmdFftResultLocation place; - clAmdFftLayout inLayout; - clAmdFftLayout outLayout; - clAmdFftDim dim = is_1d_input || is_row_dft ? CLFFT_1D : CLFFT_2D; + //clAmdFftResultLocation place; + clAmdFftLayout inLayout; + clAmdFftLayout outLayout; + clAmdFftDim dim = is_1d_input || is_row_dft ? CLFFT_1D : CLFFT_2D; - size_t batchSize = is_row_dft ? dft_size.height : 1; + size_t batchSize = is_row_dft ? dft_size.height : 1; size_t clLengthsIn[ 3 ] = {1, 1, 1}; size_t clStridesIn[ 3 ] = {1, 1, 1}; //size_t clLengthsOut[ 3 ] = {1, 1, 1}; size_t clStridesOut[ 3 ] = {1, 1, 1}; - clLengthsIn[0] = dft_size.width; - clLengthsIn[1] = is_row_dft ? 1 : dft_size.height; - clStridesIn[0] = 1; - clStridesOut[0] = 1; + clLengthsIn[0] = dft_size.width; + clLengthsIn[1] = is_row_dft ? 1 : dft_size.height; + clStridesIn[0] = 1; + clStridesOut[0] = 1; switch(_type) { @@ -206,7 +206,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla clStridesIn[2] = is_row_dft ? clStridesIn[1] : dft_size.width * clStridesIn[1]; clStridesOut[2] = is_row_dft ? clStridesOut[1] : dft_size.width * clStridesOut[1]; - openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, *(cl_context*)getoclContext(), dim, clLengthsIn ) ); + openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, *(cl_context*)getClContextPtr(), dim, clLengthsIn ) ); openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) ); openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) ); @@ -220,7 +220,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla openCLSafeCall( clAmdFftSetPlanScale ( plHandle, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale_ ) ); //ready to bake - openCLSafeCall( clAmdFftBakePlan( plHandle, 1, (cl_command_queue*)getoclCommandQueue(), NULL, NULL ) ); + openCLSafeCall( clAmdFftBakePlan( plHandle, 1, (cl_command_queue*)getClCommandQueuePtr(), NULL, NULL ) ); } cv::ocl::FftPlan::~FftPlan() { @@ -296,12 +296,12 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) // similar assertions with cuda module CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2); - //bool is_1d_input = (src.rows == 1); - //int is_row_dft = flags & DFT_ROWS; - //int is_scaled_dft = flags & DFT_SCALE; - int is_inverse = flags & DFT_INVERSE; - bool is_complex_input = src.channels() == 2; - bool is_complex_output = !(flags & DFT_REAL_OUTPUT); + //bool is_1d_input = (src.rows == 1); + //int is_row_dft = flags & DFT_ROWS; + //int is_scaled_dft = flags & DFT_SCALE; + int is_inverse = flags & DFT_INVERSE; + bool is_complex_input = src.channels() == 2; + bool is_complex_output = !(flags & DFT_REAL_OUTPUT); // We don't support real-to-real transform @@ -338,10 +338,10 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) if (buffersize) { cl_int medstatus; - clMedBuffer = clCreateBuffer ( (cl_context)src.clCxt->oclContext(), CL_MEM_READ_WRITE, buffersize, 0, &medstatus); + clMedBuffer = clCreateBuffer ( *(cl_context*)(src.clCxt->getOpenCLContextPtr()), CL_MEM_READ_WRITE, buffersize, 0, &medstatus); openCLSafeCall( medstatus ); } - cl_command_queue clq = (cl_command_queue)src.clCxt->oclCommandQueue(); + cl_command_queue clq = *(cl_command_queue*)(src.clCxt->getOpenCLCommandQueuePtr()); openCLSafeCall( clAmdFftEnqueueTransform( plHandle, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, 1, diff --git a/modules/ocl/src/filtering.cpp b/modules/ocl/src/filtering.cpp index 284dc61632..caaf53d849 100644 --- a/modules/ocl/src/filtering.cpp +++ b/modules/ocl/src/filtering.cpp @@ -1430,7 +1430,7 @@ void cv::ocl::Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, double scale) { - if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; diff --git a/modules/ocl/src/gemm.cpp b/modules/ocl/src/gemm.cpp index ec03c2f932..687f26f632 100644 --- a/modules/ocl/src/gemm.cpp +++ b/modules/ocl/src/gemm.cpp @@ -134,7 +134,7 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, int offb = src2.offset; int offc = dst.offset; - cl_command_queue clq = (cl_command_queue)src1.clCxt->oclCommandQueue(); + cl_command_queue clq = *(cl_command_queue*)src1.clCxt->getOpenCLCommandQueuePtr(); switch(src1.type()) { case CV_32FC1: diff --git a/modules/ocl/src/gftt.cpp b/modules/ocl/src/gftt.cpp index 37ebaafa38..29a96ae658 100644 --- a/modules/ocl/src/gftt.cpp +++ b/modules/ocl/src/gftt.cpp @@ -338,7 +338,7 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &poin CV_DbgAssert(points.type() == CV_32FC2); points_v.resize(points.cols); openCLSafeCall(clEnqueueReadBuffer( - *reinterpret_cast(getoclCommandQueue()), + *(cl_command_queue*)getClCommandQueuePtr(), reinterpret_cast(points.data), CL_TRUE, 0, diff --git a/modules/ocl/src/haar.cpp b/modules/ocl/src/haar.cpp index 212fd2c444..e3e73b3c3d 100644 --- a/modules/ocl/src/haar.cpp +++ b/modules/ocl/src/haar.cpp @@ -745,7 +745,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS if( gimg.cols < minSize.width || gimg.rows < minSize.height ) CV_Error(CV_StsError, "Image too small"); - cl_command_queue qu = reinterpret_cast(Context::getContext()->oclCommandQueue()); + cl_command_queue qu = getClCommandQueue(Context::getContext()); if( (flags & CV_HAAR_SCALE_IMAGE) ) { CvSize winSize0 = cascade->orig_window_size; @@ -788,7 +788,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS size_t blocksize = 8; size_t localThreads[3] = { blocksize, blocksize , 1 }; - size_t globalThreads[3] = { grp_per_CU *(gsum.clCxt->computeUnits()) *localThreads[0], + size_t globalThreads[3] = { grp_per_CU *(gsum.clCxt->getDeviceInfo().maxComputeUnits) *localThreads[0], localThreads[1], 1 }; int outputsz = 256 * globalThreads[0] / localThreads[0]; @@ -949,7 +949,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS int grp_per_CU = 12; size_t blocksize = 8; size_t localThreads[3] = { blocksize, blocksize , 1 }; - size_t globalThreads[3] = { grp_per_CU *gsum.clCxt->computeUnits() *localThreads[0], + size_t globalThreads[3] = { grp_per_CU *gsum.clCxt->getDeviceInfo().maxComputeUnits *localThreads[0], localThreads[1], 1 }; int outputsz = 256 * globalThreads[0] / localThreads[0]; int nodenum = (datasize - sizeof(GpuHidHaarClassifierCascade) - @@ -1120,7 +1120,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std int blocksize = 8; int grp_per_CU = 12; size_t localThreads[3] = { blocksize, blocksize, 1 }; - size_t globalThreads[3] = { grp_per_CU * cv::ocl::Context::getContext()->computeUnits() *localThreads[0], + size_t globalThreads[3] = { grp_per_CU * cv::ocl::Context::getContext()->getDeviceInfo().maxComputeUnits *localThreads[0], localThreads[1], 1 }; int outputsz = 256 * globalThreads[0] / localThreads[0]; @@ -1148,7 +1148,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std } int *candidate; - cl_command_queue qu = reinterpret_cast(Context::getContext()->oclCommandQueue()); + cl_command_queue qu = getClCommandQueue(Context::getContext()); if( (flags & CV_HAAR_SCALE_IMAGE) ) { int indexy = 0; @@ -1340,7 +1340,7 @@ void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols, GpuHidHaarStageClassifier *stage; GpuHidHaarClassifier *classifier; GpuHidHaarTreeNode *node; - cl_command_queue qu = reinterpret_cast(Context::getContext()->oclCommandQueue()); + cl_command_queue qu = getClCommandQueue(Context::getContext()); if( (flags & CV_HAAR_SCALE_IMAGE) ) { gcascade = (GpuHidHaarClassifierCascade *)(cascade->hid_cascade); @@ -1505,7 +1505,7 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs( CvSize sz; CvSize winSize0 = oldCascade->orig_window_size; detect_piramid_info *scaleinfo; - cl_command_queue qu = reinterpret_cast(Context::getContext()->oclCommandQueue()); + cl_command_queue qu = getClCommandQueue(Context::getContext()); if (flags & CV_HAAR_SCALE_IMAGE) { for(factor = 1.f;; factor *= scaleFactor) diff --git a/modules/ocl/src/hog.cpp b/modules/ocl/src/hog.cpp index 55872829a9..563172bc13 100644 --- a/modules/ocl/src/hog.cpp +++ b/modules/ocl/src/hog.cpp @@ -157,7 +157,7 @@ cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size blo effect_size = Size(0, 0); - if (queryDeviceInfo()) + if (isCpuDevice()) hog_device_cpu = true; else hog_device_cpu = false; @@ -1670,9 +1670,9 @@ void cv::ocl::device::hog::compute_hists(int nbins, else { cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName); - int wave_size = queryDeviceInfo(kernel); + size_t wave_size = queryWaveFrontSize(kernel); char opt[32] = {0}; - sprintf(opt, "-D WAVE_SIZE=%d", wave_size); + sprintf(opt, "-D WAVE_SIZE=%d", (int)wave_size); openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1, opt); } @@ -1734,9 +1734,9 @@ void cv::ocl::device::hog::normalize_hists(int nbins, else { cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName); - int wave_size = queryDeviceInfo(kernel); + size_t wave_size = queryWaveFrontSize(kernel); char opt[32] = {0}; - sprintf(opt, "-D WAVE_SIZE=%d", wave_size); + sprintf(opt, "-D WAVE_SIZE=%d", (int)wave_size); openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1, opt); } @@ -1803,9 +1803,9 @@ void cv::ocl::device::hog::classify_hists(int win_height, int win_width, else { cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName); - int wave_size = queryDeviceInfo(kernel); + size_t wave_size = queryWaveFrontSize(kernel); char opt[32] = {0}; - sprintf(opt, "-D WAVE_SIZE=%d", wave_size); + sprintf(opt, "-D WAVE_SIZE=%d", (int)wave_size); openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1, opt); } diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp index 7d0d941dfa..0949605e15 100644 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@ -289,7 +289,7 @@ namespace cv args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows)); args.push_back( make_pair(sizeof(cl_int), (void *)&cols)); - if(src.clCxt->supportsFeature(Context::CL_DOUBLE)) + if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) { args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue)); } @@ -317,7 +317,7 @@ namespace cv args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols)); args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows)); args.push_back( make_pair(sizeof(cl_int), (void *)&cols)); - if(src.clCxt->supportsFeature(Context::CL_DOUBLE)) + if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) { args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue)); } @@ -380,7 +380,7 @@ namespace cv args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows)); args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols)); args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows)); - if(src.clCxt->supportsFeature(Context::CL_DOUBLE)) + if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) { args.push_back( make_pair(sizeof(cl_double), (void *)&ifx_d)); args.push_back( make_pair(sizeof(cl_double), (void *)&ify_d)); @@ -802,12 +802,12 @@ namespace cv string kernelName = "warpAffine" + s[interpolation]; - if(src.clCxt->supportsFeature(Context::CL_DOUBLE)) + if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) { cl_int st; - coeffs_cm = clCreateBuffer( (cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st ); + coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st ); openCLVerifyCall(st); - openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(F) * 2 * 3, coeffs, 0, 0, 0)); + openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0, sizeof(F) * 2 * 3, coeffs, 0, 0, 0)); } else { @@ -817,8 +817,8 @@ namespace cv { float_coeffs[m][n] = coeffs[m][n]; } - coeffs_cm = clCreateBuffer( (cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(float) * 2 * 3, NULL, &st ); - openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 2 * 3, float_coeffs, 0, 0, 0)); + coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(float) * 2 * 3, NULL, &st ); + openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 2 * 3, float_coeffs, 0, 0, 0)); } //TODO: improve this kernel @@ -872,12 +872,12 @@ namespace cv string s[3] = {"NN", "Linear", "Cubic"}; string kernelName = "warpPerspective" + s[interpolation]; - if(src.clCxt->supportsFeature(Context::CL_DOUBLE)) + if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) { cl_int st; - coeffs_cm = clCreateBuffer((cl_context) clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st ); + coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st ); openCLVerifyCall(st); - openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(double) * 3 * 3, coeffs, 0, 0, 0)); + openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0, sizeof(double) * 3 * 3, coeffs, 0, 0, 0)); } else { @@ -886,9 +886,9 @@ namespace cv for(int n = 0; n < 3; n++) float_coeffs[m][n] = coeffs[m][n]; - coeffs_cm = clCreateBuffer((cl_context) clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(float) * 3 * 3, NULL, &st ); + coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(float) * 3 * 3, NULL, &st ); openCLVerifyCall(st); - openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 3 * 3, float_coeffs, 0, 0, 0)); + openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 3 * 3, float_coeffs, 0, 0, 0)); } //TODO: improve this kernel size_t blkSizeX = 16, blkSizeY = 16; @@ -994,7 +994,7 @@ namespace cv void integral(const oclMat &src, oclMat &sum, oclMat &sqsum) { CV_Assert(src.type() == CV_8UC1); - if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "select device don't support double"); } @@ -1192,7 +1192,7 @@ namespace cv void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize, double k, int borderType) { - if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "select device don't support double"); } @@ -1211,7 +1211,7 @@ namespace cv void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize, int borderType) { - if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "select device don't support double"); } @@ -1512,17 +1512,17 @@ namespace cv String kernelName = "calcLut"; size_t localThreads[3] = { 32, 8, 1 }; size_t globalThreads[3] = { tilesX * localThreads[0], tilesY * localThreads[1], 1 }; - bool is_cpu = queryDeviceInfo(); + bool is_cpu = isCpuDevice(); if (is_cpu) openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, (char*)" -D CPU"); else { cl_kernel kernel = openCLGetKernelFromSource(Context::getContext(), &imgproc_clahe, kernelName); - int wave_size = queryDeviceInfo(kernel); + size_t wave_size = queryWaveFrontSize(kernel); openCLSafeCall(clReleaseKernel(kernel)); static char opt[20] = {0}; - sprintf(opt, " -D WAVE_SIZE=%d", wave_size); + sprintf(opt, " -D WAVE_SIZE=%d", (int)wave_size); openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, opt); } } diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp deleted file mode 100644 index c18984b078..0000000000 --- a/modules/ocl/src/initialization.cpp +++ /dev/null @@ -1,1090 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// @Authors -// Guoping Long, longguoping@gmail.com -// Niko Li, newlife20080214@gmail.com -// Yao Wang, bitwangyaoyao@gmail.com -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other oclMaterials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" -#include -#include -#include "binarycaching.hpp" - -using namespace cv; -using namespace cv::ocl; -using namespace std; -using std::cout; -using std::endl; - -//#define PRINT_KERNEL_RUN_TIME -#define RUN_TIMES 100 -#ifndef CL_MEM_USE_PERSISTENT_MEM_AMD -#define CL_MEM_USE_PERSISTENT_MEM_AMD 0 -#endif -//#define AMD_DOUBLE_DIFFER - -namespace cv -{ - namespace ocl - { - extern void fft_teardown(); - extern void clBlasTeardown(); - /* - * The binary caching system to eliminate redundant program source compilation. - * Strictly, this is not a cache because we do not implement evictions right now. - * We shall add such features to trade-off memory consumption and performance when necessary. - */ - auto_ptr ProgramCache::programCache; - ProgramCache *programCache = NULL; - DevMemType gDeviceMemType = DEVICE_MEM_DEFAULT; - DevMemRW gDeviceMemRW = DEVICE_MEM_R_W; - int gDevMemTypeValueMap[5] = {0, - CL_MEM_ALLOC_HOST_PTR, - CL_MEM_USE_HOST_PTR, - CL_MEM_COPY_HOST_PTR, - CL_MEM_USE_PERSISTENT_MEM_AMD}; - int gDevMemRWValueMap[3] = {CL_MEM_READ_WRITE, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY}; - - ProgramCache::ProgramCache() - { - codeCache.clear(); - cacheSize = 0; - } - - ProgramCache::~ProgramCache() - { - releaseProgram(); - } - - cl_program ProgramCache::progLookup(string srcsign) - { - map::iterator iter; - iter = codeCache.find(srcsign); - if(iter != codeCache.end()) - return iter->second; - else - return NULL; - } - - void ProgramCache::addProgram(string srcsign , cl_program program) - { - if(!progLookup(srcsign)) - { - codeCache.insert(map::value_type(srcsign, program)); - } - } - - void ProgramCache::releaseProgram() - { - map::iterator iter; - for(iter = codeCache.begin(); iter != codeCache.end(); iter++) - { - openCLSafeCall(clReleaseProgram(iter->second)); - } - codeCache.clear(); - cacheSize = 0; - } - struct Info::Impl - { - cl_platform_id oclplatform; - std::vector devices; - std::vector devName; - std::string clVersion; - - cl_context oclcontext; - cl_command_queue clCmdQueue; - int devnum; - size_t maxWorkGroupSize; - cl_uint maxDimensions; // == maxWorkItemSizes.size() - std::vector maxWorkItemSizes; - cl_uint maxComputeUnits; - char extra_options[512]; - int double_support; - int unified_memory; //1 means integrated GPU, otherwise this value is 0 - int refcounter; - - Impl(); - - void setDevice(void *ctx, void *q, int devnum); - - void release() - { - if(1 == CV_XADD(&refcounter, -1)) - { - releaseResources(); - delete this; - } - } - - Impl* copy() - { - CV_XADD(&refcounter, 1); - return this; - } - - private: - Impl(const Impl&); - Impl& operator=(const Impl&); - void releaseResources(); - }; - - // global variables to hold binary cache properties - static int enable_disk_cache = -#ifdef _DEBUG - false; -#else - true; -#endif - static int update_disk_cache = false; - static String binpath = ""; - - Info::Impl::Impl() - :oclplatform(0), - oclcontext(0), - clCmdQueue(0), - devnum(-1), - maxWorkGroupSize(0), - maxDimensions(0), - maxComputeUnits(0), - double_support(0), - unified_memory(0), - refcounter(1) - { - memset(extra_options, 0, 512); - } - - void Info::Impl::releaseResources() - { - devnum = -1; - - if(clCmdQueue) - { - //temporarily disable command queue release as it causes program hang at exit - //openCLSafeCall(clReleaseCommandQueue(clCmdQueue)); - clCmdQueue = 0; - } - - if(oclcontext) - { - openCLSafeCall(clReleaseContext(oclcontext)); - oclcontext = 0; - } - } - - void Info::Impl::setDevice(void *ctx, void *q, int dnum) - { - if((ctx && q) || devnum != dnum) - releaseResources(); - - CV_Assert(dnum >= 0 && dnum < (int)devices.size()); - devnum = dnum; - if(ctx && q) - { - oclcontext = (cl_context)ctx; - clCmdQueue = (cl_command_queue)q; - clRetainContext(oclcontext); - clRetainCommandQueue(clCmdQueue); - } - else - { - cl_int status = 0; - cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(oclplatform), 0 }; - oclcontext = clCreateContext(cps, 1, &devices[devnum], 0, 0, &status); - openCLVerifyCall(status); - clCmdQueue = clCreateCommandQueue(oclcontext, devices[devnum], CL_QUEUE_PROFILING_ENABLE, &status); - openCLVerifyCall(status); - } - - openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&maxWorkGroupSize, 0)); - openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), (void *)&maxDimensions, 0)); - maxWorkItemSizes.resize(maxDimensions); - openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*maxDimensions, (void *)&maxWorkItemSizes[0], 0)); - openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), (void *)&maxComputeUnits, 0)); - - cl_bool unfymem = false; - openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(cl_bool), (void *)&unfymem, 0)); - unified_memory = unfymem ? 1 : 0; - - //initialize extra options for compilation. Currently only fp64 is included. - //Assume 4KB is enough to store all possible extensions. - const int EXT_LEN = 4096 + 1 ; - char extends_set[EXT_LEN]; - size_t extends_size; - openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_EXTENSIONS, EXT_LEN, (void *)extends_set, &extends_size)); - extends_set[EXT_LEN - 1] = 0; - size_t fp64_khr = std::string(extends_set).find("cl_khr_fp64"); - - if(fp64_khr != std::string::npos) - { - sprintf(extra_options, "-D DOUBLE_SUPPORT"); - double_support = 1; - } - else - { - memset(extra_options, 0, 512); - double_support = 0; - } - } - - ////////////////////////Common OpenCL specific calls/////////////// - int getDevMemType(DevMemRW& rw_type, DevMemType& mem_type) - { - rw_type = gDeviceMemRW; - mem_type = gDeviceMemType; - return Context::getContext()->impl->unified_memory; - } - - int setDevMemType(DevMemRW rw_type, DevMemType mem_type) - { - if( (mem_type == DEVICE_MEM_PM && Context::getContext()->impl->unified_memory == 0) || - mem_type == DEVICE_MEM_UHP || - mem_type == DEVICE_MEM_CHP ) - return -1; - gDeviceMemRW = rw_type; - gDeviceMemType = mem_type; - return 0; - } - - int getDevice(std::vector &oclinfo, int devicetype) - { - //TODO: cache oclinfo vector - oclinfo.clear(); - - switch(devicetype) - { - case CVCL_DEVICE_TYPE_DEFAULT: - case CVCL_DEVICE_TYPE_CPU: - case CVCL_DEVICE_TYPE_GPU: - case CVCL_DEVICE_TYPE_ACCELERATOR: - case CVCL_DEVICE_TYPE_ALL: - break; - default: - return 0; - } - - // Platform info - cl_uint numPlatforms; - openCLSafeCall(clGetPlatformIDs(0, 0, &numPlatforms)); - if(numPlatforms < 1) return 0; - - std::vector platforms(numPlatforms); - openCLSafeCall(clGetPlatformIDs(numPlatforms, &platforms[0], 0)); - - char deviceName[256]; - int devcienums = 0; - char clVersion[256]; - for (unsigned i = 0; i < numPlatforms; ++i) - { - cl_uint numsdev = 0; - cl_int status = clGetDeviceIDs(platforms[i], devicetype, 0, NULL, &numsdev); - if(status != CL_DEVICE_NOT_FOUND) - openCLVerifyCall(status); - - if(numsdev > 0) - { - devcienums += numsdev; - std::vector devices(numsdev); - openCLSafeCall(clGetDeviceIDs(platforms[i], devicetype, numsdev, &devices[0], 0)); - - Info ocltmpinfo; - ocltmpinfo.impl->oclplatform = platforms[i]; - openCLSafeCall(clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, sizeof(clVersion), clVersion, NULL)); - ocltmpinfo.impl->clVersion = clVersion; - for(unsigned j = 0; j < numsdev; ++j) - { - ocltmpinfo.impl->devices.push_back(devices[j]); - openCLSafeCall(clGetDeviceInfo(devices[j], CL_DEVICE_NAME, sizeof(deviceName), deviceName, 0)); - ocltmpinfo.impl->devName.push_back(deviceName); - ocltmpinfo.DeviceName.push_back(deviceName); - } - oclinfo.push_back(ocltmpinfo); - } - } - if(devcienums > 0) - { - setDevice(oclinfo[0]); - } - return devcienums; - } - - void setDevice(Info &oclinfo, int devnum) - { - oclinfo.impl->setDevice(0, 0, devnum); - Context::setContext(oclinfo); - } - - void setDeviceEx(Info &oclinfo, void *ctx, void *q, int devnum) - { - oclinfo.impl->setDevice(ctx, q, devnum); - Context::setContext(oclinfo); - } - - void *getoclContext() - { - return &(Context::getContext()->impl->oclcontext); - } - - void *getoclCommandQueue() - { - return &(Context::getContext()->impl->clCmdQueue); - } - - void finish() - { - clFinish(Context::getContext()->impl->clCmdQueue); - } - - //template specializations of queryDeviceInfo - template<> - bool queryDeviceInfo(cl_kernel) - { - Info::Impl* impl = Context::getContext()->impl; - cl_device_type devicetype; - openCLSafeCall(clGetDeviceInfo(impl->devices[impl->devnum], - CL_DEVICE_TYPE, sizeof(cl_device_type), - &devicetype, NULL)); - return (devicetype == CVCL_DEVICE_TYPE_CPU); - } - - template - static _ty queryWavesize(cl_kernel kernel) - { - size_t info = 0; - Info::Impl* impl = Context::getContext()->impl; - bool is_cpu = queryDeviceInfo(); - if(is_cpu) - { - return 1; - } - CV_Assert(kernel != NULL); - openCLSafeCall(clGetKernelWorkGroupInfo(kernel, impl->devices[impl->devnum], - CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &info, NULL)); - return static_cast<_ty>(info); - } - - template<> - size_t queryDeviceInfo(cl_kernel kernel) - { - return queryWavesize(kernel); - } - template<> - int queryDeviceInfo(cl_kernel kernel) - { - return queryWavesize(kernel); - } - - void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size) - { - cl_int status; - status = clEnqueueReadBuffer(clCxt->impl->clCmdQueue, dst_buffer, CL_TRUE, 0, - size, host_buffer, 0, NULL, NULL); - openCLVerifyCall(status); - } - - cl_mem openCLCreateBuffer(Context *clCxt, size_t flag , size_t size) - { - cl_int status; - cl_mem buffer = clCreateBuffer(clCxt->impl->oclcontext, (cl_mem_flags)flag, size, NULL, &status); - openCLVerifyCall(status); - return buffer; - } - - void openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch, - size_t widthInBytes, size_t height) - { - openCLMallocPitchEx(clCxt, dev_ptr, pitch, widthInBytes, height, gDeviceMemRW, gDeviceMemType); - } - - void openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch, - size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type) - { - cl_int status; - *dev_ptr = clCreateBuffer(clCxt->impl->oclcontext, gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type], - widthInBytes * height, 0, &status); - openCLVerifyCall(status); - *pitch = widthInBytes; - } - - void openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch, - const void *src, size_t spitch, - size_t width, size_t height, openCLMemcpyKind kind, int channels) - { - size_t buffer_origin[3] = {0, 0, 0}; - size_t host_origin[3] = {0, 0, 0}; - size_t region[3] = {width, height, 1}; - if(kind == clMemcpyHostToDevice) - { - if(dpitch == width || channels == 3 || height == 1) - { - openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)dst, CL_TRUE, - 0, width * height, src, 0, NULL, NULL)); - } - else - { - openCLSafeCall(clEnqueueWriteBufferRect(clCxt->impl->clCmdQueue, (cl_mem)dst, CL_TRUE, - buffer_origin, host_origin, region, dpitch, 0, spitch, 0, src, 0, 0, 0)); - } - } - else if(kind == clMemcpyDeviceToHost) - { - if(spitch == width || channels == 3 || height == 1) - { - openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)src, CL_TRUE, - 0, width * height, dst, 0, NULL, NULL)); - } - else - { - openCLSafeCall(clEnqueueReadBufferRect(clCxt->impl->clCmdQueue, (cl_mem)src, CL_TRUE, - buffer_origin, host_origin, region, spitch, 0, dpitch, 0, dst, 0, 0, 0)); - } - } - } - - void openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset, - const void *src, size_t spitch, - size_t width, size_t height, int src_offset) - { - size_t src_origin[3] = {src_offset % spitch, src_offset / spitch, 0}; - size_t dst_origin[3] = {dst_offset % dpitch, dst_offset / dpitch, 0}; - size_t region[3] = {width, height, 1}; - - openCLSafeCall(clEnqueueCopyBufferRect(clCxt->impl->clCmdQueue, (cl_mem)src, (cl_mem)dst, src_origin, dst_origin, - region, spitch, 0, dpitch, 0, 0, 0, 0)); - } - - void openCLFree(void *devPtr) - { - openCLSafeCall(clReleaseMemObject((cl_mem)devPtr)); - } - cl_kernel openCLGetKernelFromSource(const Context *clCxt, const char **source, string kernelName) - { - return openCLGetKernelFromSource(clCxt, source, kernelName, NULL); - } - - void setBinaryDiskCache(int mode, String path) - { - if(mode == CACHE_NONE) - { - update_disk_cache = 0; - enable_disk_cache = 0; - return; - } - update_disk_cache |= (mode & CACHE_UPDATE) == CACHE_UPDATE; - enable_disk_cache |= -#ifdef _DEBUG - (mode & CACHE_DEBUG) == CACHE_DEBUG; -#else - (mode & CACHE_RELEASE) == CACHE_RELEASE; -#endif - if(enable_disk_cache && !path.empty()) - { - binpath = path; - } - } - - void setBinpath(const char *path) - { - binpath = path; - } - - int savetofile(const Context*, cl_program &program, const char *fileName) - { - size_t binarySize; - openCLSafeCall(clGetProgramInfo(program, - CL_PROGRAM_BINARY_SIZES, - sizeof(size_t), - &binarySize, NULL)); - char* binary = (char*)malloc(binarySize); - if(binary == NULL) - { - CV_Error(CV_StsNoMem, "Failed to allocate host memory."); - } - openCLSafeCall(clGetProgramInfo(program, - CL_PROGRAM_BINARIES, - sizeof(char *), - &binary, - NULL)); - - FILE *fp = fopen(fileName, "wb+"); - if(fp != NULL) - { - fwrite(binary, binarySize, 1, fp); - free(binary); - fclose(fp); - } - return 1; - } - - cl_kernel openCLGetKernelFromSource(const Context *clCxt, const char **source, string kernelName, - const char *build_options) - { - cl_kernel kernel; - cl_program program ; - cl_int status = 0; - stringstream src_sign; - string srcsign; - string filename; - CV_Assert(programCache != NULL); - - if(NULL != build_options) - { - src_sign << (int64)(*source) << clCxt->impl->oclcontext << "_" << build_options; - } - else - { - src_sign << (int64)(*source) << clCxt->impl->oclcontext; - } - srcsign = src_sign.str(); - - program = NULL; - program = programCache->progLookup(srcsign); - - if(!program) - { - //config build programs - char all_build_options[1024]; - memset(all_build_options, 0, 1024); - char zeromem[512] = {0}; - if(0 != memcmp(clCxt -> impl->extra_options, zeromem, 512)) - strcat(all_build_options, clCxt -> impl->extra_options); - strcat(all_build_options, " "); - if(build_options != NULL) - strcat(all_build_options, build_options); - if(all_build_options != NULL) - { - filename = binpath + kernelName + "_" + clCxt->impl->devName[clCxt->impl->devnum] + all_build_options + ".clb"; - } - else - { - filename = binpath + kernelName + "_" + clCxt->impl->devName[clCxt->impl->devnum] + ".clb"; - } - - FILE *fp = enable_disk_cache ? fopen(filename.c_str(), "rb") : NULL; - if(fp == NULL || update_disk_cache) - { - if(fp != NULL) - fclose(fp); - - program = clCreateProgramWithSource( - clCxt->impl->oclcontext, 1, source, NULL, &status); - openCLVerifyCall(status); - status = clBuildProgram(program, 1, &(clCxt->impl->devices[clCxt->impl->devnum]), all_build_options, NULL, NULL); - if(status == CL_SUCCESS && enable_disk_cache) - savetofile(clCxt, program, filename.c_str()); - } - else - { - fseek(fp, 0, SEEK_END); - size_t binarySize = ftell(fp); - fseek(fp, 0, SEEK_SET); - char *binary = new char[binarySize]; - CV_Assert(1 == fread(binary, binarySize, 1, fp)); - fclose(fp); - cl_int status = 0; - program = clCreateProgramWithBinary(clCxt->impl->oclcontext, - 1, - &(clCxt->impl->devices[clCxt->impl->devnum]), - (const size_t *)&binarySize, - (const unsigned char **)&binary, - NULL, - &status); - openCLVerifyCall(status); - status = clBuildProgram(program, 1, &(clCxt->impl->devices[clCxt->impl->devnum]), all_build_options, NULL, NULL); - delete[] binary; - } - - if(status != CL_SUCCESS) - { - if(status == CL_BUILD_PROGRAM_FAILURE) - { - cl_int logStatus; - char *buildLog = NULL; - size_t buildLogSize = 0; - logStatus = clGetProgramBuildInfo(program, - clCxt->impl->devices[clCxt->impl->devnum], CL_PROGRAM_BUILD_LOG, buildLogSize, - buildLog, &buildLogSize); - if(logStatus != CL_SUCCESS) - cout << "Failed to build the program and get the build info." << endl; - buildLog = new char[buildLogSize]; - CV_DbgAssert(!!buildLog); - memset(buildLog, 0, buildLogSize); - openCLSafeCall(clGetProgramBuildInfo(program, clCxt->impl->devices[clCxt->impl->devnum], - CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL)); - cout << "\n\t\t\tBUILD LOG\n"; - cout << buildLog << endl; - delete [] buildLog; - } - openCLVerifyCall(status); - } - //Cache the binary for future use if build_options is null - if( (programCache->cacheSize += 1) < programCache->MAX_PROG_CACHE_SIZE) - programCache->addProgram(srcsign, program); - else - cout << "Warning: code cache has been full.\n"; - } - kernel = clCreateKernel(program, kernelName.c_str(), &status); - openCLVerifyCall(status); - return kernel; - } - - void openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads) - { - size_t kernelWorkGroupSize; - openCLSafeCall(clGetKernelWorkGroupInfo(kernel, clCxt->impl->devices[clCxt->impl->devnum], - CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0)); - CV_Assert( localThreads[0] <= clCxt->impl->maxWorkItemSizes[0] ); - CV_Assert( localThreads[1] <= clCxt->impl->maxWorkItemSizes[1] ); - CV_Assert( localThreads[2] <= clCxt->impl->maxWorkItemSizes[2] ); - CV_Assert( localThreads[0] * localThreads[1] * localThreads[2] <= kernelWorkGroupSize ); - CV_Assert( localThreads[0] * localThreads[1] * localThreads[2] <= clCxt->impl->maxWorkGroupSize ); - } - - static inline size_t roundUp(size_t sz, size_t n) - { - // we don't assume that n is a power of 2 (see alignSize) - // equal to divUp(sz, n) * n - size_t t = sz + n - 1; - size_t rem = t % n; - size_t result = t - rem; - return result; - } - -#ifdef PRINT_KERNEL_RUN_TIME - static double total_execute_time = 0; - static double total_kernel_time = 0; -#endif - void openCLExecuteKernel_(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3], - size_t localThreads[3], vector< pair > &args, int channels, - int depth, const char *build_options) - { - //construct kernel name - //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number - //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char) - stringstream idxStr; - if(channels != -1) - idxStr << "_C" << channels; - if(depth != -1) - idxStr << "_D" << depth; - kernelName += idxStr.str(); - - cl_kernel kernel; - kernel = openCLGetKernelFromSource(clCxt, source, kernelName, build_options); - - if ( localThreads != NULL) - { - globalThreads[0] = roundUp(globalThreads[0], localThreads[0]); - globalThreads[1] = roundUp(globalThreads[1], localThreads[1]); - globalThreads[2] = roundUp(globalThreads[2], localThreads[2]); - - cv::ocl::openCLVerifyKernel(clCxt, kernel, localThreads); - } - for(size_t i = 0; i < args.size(); i ++) - openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second)); - -#ifndef PRINT_KERNEL_RUN_TIME - openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads, - localThreads, 0, NULL, NULL)); -#else - cl_event event = NULL; - openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads, - localThreads, 0, NULL, &event)); - - cl_ulong start_time, end_time, queue_time; - double execute_time = 0; - double total_time = 0; - - openCLSafeCall(clWaitForEvents(1, &event)); - openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, - sizeof(cl_ulong), &start_time, 0)); - - openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, - sizeof(cl_ulong), &end_time, 0)); - - openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, - sizeof(cl_ulong), &queue_time, 0)); - - execute_time = (double)(end_time - start_time) / (1000 * 1000); - total_time = (double)(end_time - queue_time) / (1000 * 1000); - - total_execute_time += execute_time; - total_kernel_time += total_time; - clReleaseEvent(event); -#endif - - clFlush(clCxt->impl->clCmdQueue); - openCLSafeCall(clReleaseKernel(kernel)); - } - - void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, - size_t globalThreads[3], size_t localThreads[3], - vector< pair > &args, int channels, int depth) - { - openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, - channels, depth, NULL); - } - void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, - size_t globalThreads[3], size_t localThreads[3], - vector< pair > &args, int channels, int depth, const char *build_options) - - { -#ifndef PRINT_KERNEL_RUN_TIME - openCLExecuteKernel_(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, - build_options); -#else - string data_type[] = { "uchar", "char", "ushort", "short", "int", "float", "double"}; - cout << endl; - cout << "Function Name: " << kernelName; - if(depth >= 0) - cout << " |data type: " << data_type[depth]; - cout << " |channels: " << channels; - cout << " |Time Unit: " << "ms" << endl; - - total_execute_time = 0; - total_kernel_time = 0; - cout << "-------------------------------------" << endl; - - cout << setiosflags(ios::left) << setw(15) << "excute time"; - cout << setiosflags(ios::left) << setw(15) << "lauch time"; - cout << setiosflags(ios::left) << setw(15) << "kernel time" << endl; - int i = 0; - for(i = 0; i < RUN_TIMES; i++) - openCLExecuteKernel_(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, - build_options); - - cout << "average kernel excute time: " << total_execute_time / RUN_TIMES << endl; // "ms" << endl; - cout << "average kernel total time: " << total_kernel_time / RUN_TIMES << endl; // "ms" << endl; -#endif - } - - double openCLExecuteKernelInterop(Context *clCxt , const char **source, string kernelName, - size_t globalThreads[3], size_t localThreads[3], - vector< pair > &args, int channels, int depth, const char *build_options, - bool finish, bool measureKernelTime, bool cleanUp) - - { - //construct kernel name - //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number - //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char) - stringstream idxStr; - if(channels != -1) - idxStr << "_C" << channels; - if(depth != -1) - idxStr << "_D" << depth; - kernelName += idxStr.str(); - - cl_kernel kernel; - kernel = openCLGetKernelFromSource(clCxt, source, kernelName, build_options); - - double kernelTime = 0.0; - - if( globalThreads != NULL) - { - if ( localThreads != NULL) - { - globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0]; - globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1]; - globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2]; - - //size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2]; - cv::ocl::openCLVerifyKernel(clCxt, kernel, localThreads); - } - for(size_t i = 0; i < args.size(); i ++) - openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second)); - - if(measureKernelTime == false) - { - openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads, - localThreads, 0, NULL, NULL)); - } - else - { - cl_event event = NULL; - openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads, - localThreads, 0, NULL, &event)); - - cl_ulong end_time, queue_time; - - openCLSafeCall(clWaitForEvents(1, &event)); - - openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, - sizeof(cl_ulong), &end_time, 0)); - - openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, - sizeof(cl_ulong), &queue_time, 0)); - - kernelTime = (double)(end_time - queue_time) / (1000 * 1000); - - clReleaseEvent(event); - } - } - - if(finish) - { - clFinish(clCxt->impl->clCmdQueue); - } - - if(cleanUp) - { - openCLSafeCall(clReleaseKernel(kernel)); - } - - return kernelTime; - } - - // Converts the contents of a file into a string - static int convertToString(const char *filename, std::string& s) - { - size_t size; - char* str; - - std::fstream f(filename, (std::fstream::in | std::fstream::binary)); - if(f.is_open()) - { - size_t fileSize; - f.seekg(0, std::fstream::end); - size = fileSize = (size_t)f.tellg(); - f.seekg(0, std::fstream::beg); - - str = new char[size+1]; - if(!str) - { - f.close(); - return -1; - } - - f.read(str, fileSize); - f.close(); - str[size] = '\0'; - - s = str; - delete[] str; - return 0; - } - printf("Error: Failed to open file %s\n", filename); - return -1; - } - - double openCLExecuteKernelInterop(Context *clCxt , const char **fileName, const int numFiles, string kernelName, - size_t globalThreads[3], size_t localThreads[3], - vector< pair > &args, int channels, int depth, const char *build_options, - bool finish, bool measureKernelTime, bool cleanUp) - - { - std::vector fsource; - for (int i = 0 ; i < numFiles ; i++) - { - std::string str; - if (convertToString(fileName[i], str) >= 0) - fsource.push_back(str); - } - const char **source = new const char *[numFiles]; - for (int i = 0 ; i < numFiles ; i++) - source[i] = fsource[i].c_str(); - double kernelTime = openCLExecuteKernelInterop(clCxt ,source, kernelName, globalThreads, localThreads, - args, channels, depth, build_options, finish, measureKernelTime, cleanUp); - fsource.clear(); - delete []source; - return kernelTime; - } - - cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value, - const size_t size) - { - int status; - cl_mem con_struct; - - con_struct = clCreateBuffer(context, CL_MEM_READ_ONLY, size, NULL, &status); - openCLSafeCall(status); - - openCLSafeCall(clEnqueueWriteBuffer(command_queue, con_struct, 1, 0, size, - value, 0, 0, 0)); - - return con_struct; - - } - - /////////////////////////////OpenCL initialization///////////////// - auto_ptr Context::clCxt; - int Context::val = 0; - static Mutex cs; - static volatile int context_tear_down = 0; - - bool initialized() - { - return *((volatile int*)&Context::val) != 0 && - Context::clCxt->impl->clCmdQueue != NULL&& - Context::clCxt->impl->oclcontext != NULL; - } - - Context* Context::getContext() - { - if(*((volatile int*)&val) != 1) - { - AutoLock al(cs); - if(*((volatile int*)&val) != 1) - { - if (context_tear_down) - return clCxt.get(); - if( 0 == clCxt.get()) - clCxt.reset(new Context); - std::vector oclinfo; - CV_Assert(getDevice(oclinfo, CVCL_DEVICE_TYPE_ALL) > 0); - - *((volatile int*)&val) = 1; - } - } - return clCxt.get(); - } - - void Context::setContext(Info &oclinfo) - { - AutoLock guard(cs); - if(*((volatile int*)&val) != 1) - { - if( 0 == clCxt.get()) - clCxt.reset(new Context); - - clCxt.get()->impl = oclinfo.impl->copy(); - - *((volatile int*)&val) = 1; - } - else - { - clCxt.get()->impl->release(); - clCxt.get()->impl = oclinfo.impl->copy(); - } - } - - Context::Context() - { - impl = 0; - programCache = ProgramCache::getProgramCache(); - } - - Context::~Context() - { - release(); - } - - void Context::release() - { - if (impl) - impl->release(); - programCache->releaseProgram(); - } - - bool Context::supportsFeature(int ftype) const - { - switch(ftype) - { - case CL_DOUBLE: - return impl->double_support == 1; - case CL_UNIFIED_MEM: - return impl->unified_memory == 1; - case CL_VER_1_2: - return impl->clVersion.find("OpenCL 1.2") != string::npos; - default: - return false; - } - } - - size_t Context::computeUnits() const - { - return impl->maxComputeUnits; - } - - unsigned long queryLocalMemInfo() - { - Info::Impl* impl = Context::getContext()->impl; - cl_ulong local_memory_size = 0; - clGetDeviceInfo(impl->devices[impl->devnum], CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), (void*)&local_memory_size, 0); - return local_memory_size; - } - - void* Context::oclContext() - { - return impl->oclcontext; - } - - void* Context::oclCommandQueue() - { - return impl->clCmdQueue; - } - - Info::Info() - { - impl = new Impl; - } - - void Info::release() - { - fft_teardown(); - clBlasTeardown(); - impl->release(); - impl = new Impl; - DeviceName.clear(); - } - - Info::~Info() - { - fft_teardown(); - clBlasTeardown(); - impl->release(); - } - - Info &Info::operator = (const Info &m) - { - impl->release(); - impl = m.impl->copy(); - DeviceName = m.DeviceName; - return *this; - } - - Info::Info(const Info &m) - { - impl = m.impl->copy(); - DeviceName = m.DeviceName; - } - }//namespace ocl - -}//namespace cv diff --git a/modules/ocl/src/knearest.cpp b/modules/ocl/src/knearest.cpp index fd9f2fed57..02dc72c4ea 100644 --- a/modules/ocl/src/knearest.cpp +++ b/modules/ocl/src/knearest.cpp @@ -44,17 +44,11 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" + using namespace cv; using namespace cv::ocl; -namespace cv -{ - namespace ocl - { - extern const char* knearest;//knearest - } -} - KNearestNeighbour::KNearestNeighbour() { clear(); @@ -112,7 +106,7 @@ void KNearestNeighbour::find_nearest(const oclMat& samples, int k, oclMat& lable k1 = MIN( k1, k ); String kernel_name = "knn_find_nearest"; - cl_ulong local_memory_size = queryLocalMemInfo(); + cl_ulong local_memory_size = (cl_ulong)Context::getContext()->getDeviceInfo().localMemorySize; int nThreads = local_memory_size / (2 * k * 4); if(nThreads >= 256) nThreads = 256; @@ -122,7 +116,7 @@ void KNearestNeighbour::find_nearest(const oclMat& samples, int k, oclMat& lable size_t global_thread[] = {1, samples.rows, 1}; char build_option[50]; - if(!Context::getContext()->supportsFeature(Context::CL_DOUBLE)) + if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) { sprintf(build_option, " "); }else diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp index 3ae14eb48d..d247a14794 100644 --- a/modules/ocl/src/matrix_operations.cpp +++ b/modules/ocl/src/matrix_operations.cpp @@ -134,7 +134,6 @@ void cv::ocl::oclMat::upload(const Mat &m) Size wholeSize; Point ofs; m.locateROI(wholeSize, ofs); - create(wholeSize, m.type()); if (m.channels() == 3) @@ -142,13 +141,12 @@ void cv::ocl::oclMat::upload(const Mat &m) int pitch = wholeSize.width * 3 * m.elemSize1(); int tail_padding = m.elemSize1() * 3072; int err; - cl_mem temp = clCreateBuffer((cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE, + cl_mem temp = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, (pitch * wholeSize.height + tail_padding - 1) / tail_padding * tail_padding, 0, &err); openCLVerifyCall(err); openCLMemcpy2D(clCxt, temp, pitch, m.datastart, m.step, wholeSize.width * m.elemSize(), wholeSize.height, clMemcpyHostToDevice, 3); convert_C3C4(temp, *this); - openCLSafeCall(clReleaseMemObject(temp)); } else @@ -197,13 +195,12 @@ void cv::ocl::oclMat::download(cv::Mat &m) const int pitch = wholecols * 3 * m.elemSize1(); int tail_padding = m.elemSize1() * 3072; int err; - cl_mem temp = clCreateBuffer((cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE, + cl_mem temp = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, (pitch * wholerows + tail_padding - 1) / tail_padding * tail_padding, 0, &err); openCLVerifyCall(err); convert_C4C3(*this, temp); openCLMemcpy2D(clCxt, m.data, m.step, temp, pitch, wholecols * m.elemSize(), wholerows, clMemcpyDeviceToHost, 3); - openCLSafeCall(clReleaseMemObject(temp)); } else @@ -319,7 +316,7 @@ static void convert_run(const oclMat &src, oclMat &dst, double alpha, double bet void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double beta ) const { - if (!clCxt->supportsFeature(Context::CL_DOUBLE) && + if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && (depth() == CV_64F || dst.depth() == CV_64F)) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); @@ -380,7 +377,7 @@ static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, stri #ifdef CL_VERSION_1_2 // this enables backwards portability to // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support - if (Context::getContext()->supportsFeature(Context::CL_VER_1_2) && + if (Context::getContext()->supportsFeature(FEATURE_CL_VER_1_2) && dst.offset == 0 && dst.cols == dst.wholecols) { const int sizeofMap[][7] = @@ -392,7 +389,7 @@ static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, stri }; int sizeofGeneric = sizeofMap[dst.oclchannels() - 1][dst.depth()]; - clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(), + clEnqueueFillBuffer(getClCommandQueue(dst.clCxt), (cl_mem)dst.data, (void*)mat.data, sizeofGeneric, 0, dst.step * dst.rows, 0, NULL, NULL); } diff --git a/modules/ocl/src/mcwutil.cpp b/modules/ocl/src/mcwutil.cpp index fc94e2f3d8..e4e2e918fb 100644 --- a/modules/ocl/src/mcwutil.cpp +++ b/modules/ocl/src/mcwutil.cpp @@ -101,15 +101,15 @@ namespace cv for(size_t i = 0; i < args.size(); i ++) openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second)); - openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 3, NULL, globalThreads, + openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), kernel, 3, NULL, globalThreads, localThreads, 0, NULL, NULL)); switch(finish_mode) { case CLFINISH: - clFinish((cl_command_queue)clCxt->oclCommandQueue()); + clFinish(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr()); case CLFLUSH: - clFlush((cl_command_queue)clCxt->oclCommandQueue()); + clFlush(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr()); break; case DISABLE: default: @@ -178,7 +178,7 @@ namespace cv #ifdef CL_VERSION_1_2 //this enables backwards portability to //run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support - if(Context::getContext()->supportsFeature(Context::CL_VER_1_2)) + if(Context::getContext()->supportsFeature(FEATURE_CL_VER_1_2)) { cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; @@ -191,13 +191,13 @@ namespace cv desc.buffer = NULL; desc.num_mip_levels = 0; desc.num_samples = 0; - texture = clCreateImage((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err); + texture = clCreateImage(*(cl_context*)mat.clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err); } else #endif { texture = clCreateImage2D( - (cl_context)mat.clCxt->oclContext(), + *(cl_context*)mat.clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, &format, mat.cols, @@ -212,22 +212,22 @@ namespace cv cl_mem devData; if (mat.cols * mat.elemSize() != mat.step) { - devData = clCreateBuffer((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_ONLY, mat.cols * mat.rows + devData = clCreateBuffer(*(cl_context*)mat.clCxt->getOpenCLContextPtr(), CL_MEM_READ_ONLY, mat.cols * mat.rows * mat.elemSize(), NULL, NULL); const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1}; - clEnqueueCopyBufferRect((cl_command_queue)mat.clCxt->oclCommandQueue(), (cl_mem)mat.data, devData, origin, origin, + clEnqueueCopyBufferRect(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr(), (cl_mem)mat.data, devData, origin, origin, regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL); - clFlush((cl_command_queue)mat.clCxt->oclCommandQueue()); + clFlush(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr()); } else { devData = (cl_mem)mat.data; } - clEnqueueCopyBufferToImage((cl_command_queue)mat.clCxt->oclCommandQueue(), devData, texture, 0, origin, region, 0, NULL, 0); + clEnqueueCopyBufferToImage(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr(), devData, texture, 0, origin, region, 0, NULL, 0); if ((mat.cols * mat.elemSize() != mat.step)) { - clFlush((cl_command_queue)mat.clCxt->oclCommandQueue()); + clFlush(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr()); clReleaseMemObject(devData); } @@ -259,7 +259,7 @@ namespace cv try { cv::ocl::openCLGetKernelFromSource(clCxt, &_kernel_string, "test_func"); - finish(); + cv::ocl::finish(); _support = true; } catch (const cv::Exception& e) diff --git a/modules/ocl/src/moments.cpp b/modules/ocl/src/moments.cpp index 926b94c9b3..24e8b3e0f6 100644 --- a/modules/ocl/src/moments.cpp +++ b/modules/ocl/src/moments.cpp @@ -106,7 +106,7 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom ) bool is_float = CV_SEQ_ELTYPE(contour) == CV_32FC2; - if (!cv::ocl::Context::getContext()->supportsFeature(Context::CL_DOUBLE) && is_float) + if (!cv::ocl::Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE) && is_float) { CV_Error(CV_StsUnsupportedFormat, "Moments - double is not supported by your GPU!"); } @@ -146,7 +146,7 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom ) cv::Mat dst(dst_a); a00 = a10 = a01 = a20 = a11 = a02 = a30 = a21 = a12 = a03 = 0.0; - if (!cv::ocl::Context::getContext()->supportsFeature(Context::CL_DOUBLE)) + if (!cv::ocl::Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) { for (int i = 0; i < contour->total; ++i) { diff --git a/modules/ocl/src/pyrdown.cpp b/modules/ocl/src/pyrdown.cpp index 5043da05dc..89df73e9a8 100644 --- a/modules/ocl/src/pyrdown.cpp +++ b/modules/ocl/src/pyrdown.cpp @@ -15,8 +15,8 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Dachuan Zhao, dachuan@multicorewareinc.com -// Yao Wang, yao@multicorewareinc.com +// Dachuan Zhao, dachuan@multicorewareinc.com +// Yao Wang, yao@multicorewareinc.com // // // Redistribution and use in source and binary forms, with or without modification, diff --git a/modules/ocl/src/pyrlk.cpp b/modules/ocl/src/pyrlk.cpp index cdcc8f231f..a69015d190 100644 --- a/modules/ocl/src/pyrlk.cpp +++ b/modules/ocl/src/pyrlk.cpp @@ -125,7 +125,7 @@ static void lkSparse_run(oclMat &I, oclMat &J, args.push_back( make_pair( sizeof(cl_int), (void *)&iters )); args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr )); - bool is_cpu = queryDeviceInfo(); + bool is_cpu = isCpuDevice(); if (is_cpu) { openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), (char*)" -D CPU"); @@ -139,7 +139,7 @@ static void lkSparse_run(oclMat &I, oclMat &J, stringstream idxStr; idxStr << kernelName << "_C" << I.oclchannels() << "_D" << I.depth(); cl_kernel kernel = openCLGetKernelFromSource(clCxt, &pyrlk, idxStr.str()); - int wave_size = queryDeviceInfo(kernel); + int wave_size = (int)queryWaveFrontSize(kernel); openCLSafeCall(clReleaseKernel(kernel)); static char opt[32] = {0}; diff --git a/modules/ocl/src/pyrup.cpp b/modules/ocl/src/pyrup.cpp index 043031072c..01df30c518 100644 --- a/modules/ocl/src/pyrup.cpp +++ b/modules/ocl/src/pyrup.cpp @@ -15,8 +15,8 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Zhang Chunpeng chunpeng@multicorewareinc.com -// Yao Wang, yao@multicorewareinc.com +// Zhang Chunpeng chunpeng@multicorewareinc.com +// Yao Wang, yao@multicorewareinc.com // // // Redistribution and use in source and binary forms, with or without modification, diff --git a/modules/ocl/src/split_merge.cpp b/modules/ocl/src/split_merge.cpp index 79bd0f0e21..fb8d05aaa7 100644 --- a/modules/ocl/src/split_merge.cpp +++ b/modules/ocl/src/split_merge.cpp @@ -75,7 +75,7 @@ namespace cv { static void merge_vector_run(const oclMat *mat_src, size_t n, oclMat &mat_dst) { - if(!mat_dst.clCxt->supportsFeature(Context::CL_DOUBLE) && mat_dst.type() == CV_64F) + if(!mat_dst.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && mat_dst.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; @@ -170,7 +170,7 @@ namespace cv static void split_vector_run(const oclMat &mat_src, oclMat *mat_dst) { - if(!mat_src.clCxt->supportsFeature(Context::CL_DOUBLE) && mat_src.type() == CV_64F) + if(!mat_src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && mat_src.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; diff --git a/modules/ocl/src/stereo_csbp.cpp b/modules/ocl/src/stereo_csbp.cpp index 9052dc82bd..c8334cca42 100644 --- a/modules/ocl/src/stereo_csbp.cpp +++ b/modules/ocl/src/stereo_csbp.cpp @@ -150,10 +150,10 @@ namespace cv openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int), (void *)&rthis.min_disp_th)); openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&left.step)); openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int), (void *)&rthis.ndisp)); - openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL, + openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL)); - clFinish(*(cl_command_queue*)getoclCommandQueue()); + clFinish(*(cl_command_queue*)getClCommandQueuePtr()); openCLSafeCall(clReleaseKernel(kernel)); } @@ -200,9 +200,9 @@ namespace cv openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_int), (void *)&rthis.min_disp_th)); openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int), (void *)&cdisp_step1)); openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_int), (void *)&msg_step)); - openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 3, NULL, + openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 3, NULL, globalThreads, localThreads, 0, NULL, NULL)); - clFinish(*(cl_command_queue*)getoclCommandQueue()); + clFinish(*(cl_command_queue*)getClCommandQueuePtr()); openCLSafeCall(clReleaseKernel(kernel)); } @@ -235,10 +235,10 @@ namespace cv openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&msg_step)); openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&disp_step)); openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&rthis.ndisp)); - openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL, + openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL)); - clFinish(*(cl_command_queue*)getoclCommandQueue()); + clFinish(*(cl_command_queue*)getClCommandQueuePtr()); openCLSafeCall(clReleaseKernel(kernel)); } static void get_first_initial_global_caller(uchar *data_cost_selected, uchar *disp_selected_pyr, @@ -270,10 +270,10 @@ namespace cv openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&msg_step)); openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&disp_step)); openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&rthis.ndisp)); - openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL, + openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL)); - clFinish(*(cl_command_queue*)getoclCommandQueue()); + clFinish(*(cl_command_queue*)getClCommandQueuePtr()); openCLSafeCall(clReleaseKernel(kernel)); } @@ -340,10 +340,10 @@ namespace cv openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_float), (void *)&rthis.max_data_term)); openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int), (void *)&left.step)); openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_int), (void *)&rthis.min_disp_th)); - openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL, + openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL)); - clFinish(*(cl_command_queue*)getoclCommandQueue()); + clFinish(*(cl_command_queue*)getClCommandQueuePtr()); openCLSafeCall(clReleaseKernel(kernel)); } static void compute_data_cost_reduce_caller(uchar *disp_selected_pyr, uchar *data_cost, @@ -391,10 +391,10 @@ namespace cv openCLSafeCall(clSetKernelArg(kernel, 17, sizeof(cl_float), (void *)&rthis.max_data_term)); openCLSafeCall(clSetKernelArg(kernel, 18, sizeof(cl_int), (void *)&left.step)); openCLSafeCall(clSetKernelArg(kernel, 19, sizeof(cl_int), (void *)&rthis.min_disp_th)); - openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 3, NULL, + openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 3, NULL, globalThreads, localThreads, 0, NULL, NULL)); - clFinish(*(cl_command_queue*)getoclCommandQueue()); + clFinish(*(cl_command_queue*)getClCommandQueuePtr()); openCLSafeCall(clReleaseKernel(kernel)); } static void compute_data_cost(uchar *disp_selected_pyr, uchar *data_cost, StereoConstantSpaceBP &rthis, @@ -458,10 +458,10 @@ namespace cv openCLSafeCall(clSetKernelArg(kernel, 20, sizeof(cl_int), (void *)&disp_step2)); openCLSafeCall(clSetKernelArg(kernel, 21, sizeof(cl_int), (void *)&msg_step1)); openCLSafeCall(clSetKernelArg(kernel, 22, sizeof(cl_int), (void *)&msg_step2)); - openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL, + openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL)); - clFinish(*(cl_command_queue*)getoclCommandQueue()); + clFinish(*(cl_command_queue*)getClCommandQueuePtr()); openCLSafeCall(clReleaseKernel(kernel)); } //////////////////////////////////////////////////////////////////////////////////////////////// @@ -500,10 +500,10 @@ namespace cv openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&disp_step)); openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int), (void *)&msg_step)); openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_float), (void *)&rthis.disc_single_jump)); - openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL, + openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL)); - clFinish(*(cl_command_queue*)getoclCommandQueue()); + clFinish(*(cl_command_queue*)getClCommandQueuePtr()); openCLSafeCall(clReleaseKernel(kernel)); } static void calc_all_iterations(uchar *u, uchar *d, uchar *l, uchar *r, uchar *data_cost_selected, @@ -552,10 +552,10 @@ namespace cv openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&nr_plane)); openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int), (void *)&msg_step)); openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&disp_step)); - openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL, + openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL)); - clFinish(*(cl_command_queue*)getoclCommandQueue()); + clFinish(*(cl_command_queue*)getClCommandQueuePtr()); openCLSafeCall(clReleaseKernel(kernel)); } } diff --git a/modules/ocl/src/stereobp.cpp b/modules/ocl/src/stereobp.cpp index fe9136057b..5bc93aa3f5 100644 --- a/modules/ocl/src/stereobp.cpp +++ b/modules/ocl/src/stereobp.cpp @@ -95,7 +95,10 @@ namespace cv con_struct -> cmax_disc_term = max_disc_term; con_struct -> cdisc_single_jump = disc_single_jump; - cl_con_struct = load_constant(*((cl_context*)getoclContext()), *((cl_command_queue*)getoclCommandQueue()), (void *)con_struct, + Context* clCtx = Context::getContext(); + cl_context clContext = *(cl_context*)(clCtx->getOpenCLContextPtr()); + cl_command_queue clCmdQueue = *(cl_command_queue*)(clCtx->getOpenCLCommandQueuePtr()); + cl_con_struct = load_constant(clContext, clCmdQueue, (void *)con_struct, sizeof(con_struct_t)); delete con_struct; diff --git a/modules/ocl/src/tvl1flow.cpp b/modules/ocl/src/tvl1flow.cpp index 606ac530f7..c9a3f7abc1 100644 --- a/modules/ocl/src/tvl1flow.cpp +++ b/modules/ocl/src/tvl1flow.cpp @@ -15,7 +15,7 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Jin Ma, jin@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // diff --git a/modules/ocl/test/main.cpp b/modules/ocl/test/main.cpp index 594c196a59..8071102bad 100644 --- a/modules/ocl/test/main.cpp +++ b/modules/ocl/test/main.cpp @@ -80,18 +80,18 @@ int main(int argc, char **argv) const char *keys = "{ h | help | false | print help message }" "{ t | type | gpu | set device type:cpu or gpu}" - "{ p | platform | 0 | set platform id }" + "{ p | platform | -1 | set platform id }" "{ d | device | 0 | set device id }"; CommandLineParser cmd(argc, argv, keys); if (cmd.get("help")) { - cout << "Avaible options besides goole test option:" << endl; + cout << "Available options besides google test option:" << endl; cmd.printParams(); return 0; } string type = cmd.get("type"); - unsigned int pid = cmd.get("platform"); + int pid = cmd.get("platform"); int device = cmd.get("device"); print_info(); @@ -100,24 +100,29 @@ int main(int argc, char **argv) { flag = CVCL_DEVICE_TYPE_CPU; } - std::vector oclinfo; - int devnums = getDevice(oclinfo, flag); - if(devnums <= device || device < 0) + + cv::ocl::PlatformsInfo platformsInfo; + cv::ocl::getOpenCLPlatforms(platformsInfo); + if (pid >= (int)platformsInfo.size()) { - std::cout << "device invalid\n"; - return -1; + std::cout << "platform is invalid\n"; + return 1; } - if(pid >= oclinfo.size()) + + cv::ocl::DevicesInfo devicesInfo; + int devnums = cv::ocl::getOpenCLDevices(devicesInfo, flag, (pid < 0) ? NULL : platformsInfo[pid]); + if (device < 0 || device >= devnums) { - std::cout << "platform invalid\n"; - return -1; + std::cout << "device/platform invalid\n"; + return 1; } - setDevice(oclinfo[pid], device); - + cv::ocl::setDevice(devicesInfo[device]); setBinaryDiskCache(CACHE_UPDATE); - cout << "Device type:" << type << endl << "Device name:" << oclinfo[pid].DeviceName[device] << endl; + cout << "Device type: " << type << endl + << "Platform name: " << devicesInfo[device]->platform->platformName << endl + << "Device name: " << devicesInfo[device]->deviceName << endl; return RUN_ALL_TESTS(); } diff --git a/modules/superres/perf/perf_superres_ocl.cpp b/modules/superres/perf/perf_superres_ocl.cpp index 0b9864cbd3..822b87f441 100644 --- a/modules/superres/perf/perf_superres_ocl.cpp +++ b/modules/superres/perf/perf_superres_ocl.cpp @@ -107,9 +107,6 @@ PERF_TEST_P(Size_MatType, SuperResolution_BTVL1_OCL, Combine(Values(szSmall64, szSmall128), Values(MatType(CV_8UC1), MatType(CV_8UC3)))) { - std::vectorinfo; - cv::ocl::getDevice(info); - declare.time(5 * 60); const Size size = std::tr1::get<0>(GetParam()); diff --git a/modules/superres/src/btv_l1_ocl.cpp b/modules/superres/src/btv_l1_ocl.cpp index 2f27d50259..5aecca0630 100644 --- a/modules/superres/src/btv_l1_ocl.cpp +++ b/modules/superres/src/btv_l1_ocl.cpp @@ -232,7 +232,7 @@ void btv_l1_device_ocl::calcBtvRegularization(const oclMat& src, oclMat& dst, in cl_mem c_btvRegWeights; size_t count = btvWeights_size * sizeof(float); c_btvRegWeights = openCLCreateBuffer(clCxt, CL_MEM_READ_ONLY, count); - int cl_safe_check = clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), c_btvRegWeights, 1, 0, count, btvWeights_, 0, NULL, NULL); + int cl_safe_check = clEnqueueWriteBuffer(getClCommandQueue(clCxt), c_btvRegWeights, 1, 0, count, btvWeights_, 0, NULL, NULL); CV_Assert(cl_safe_check == CL_SUCCESS); args.push_back(make_pair(sizeof(cl_mem), (void*)&src_.data)); diff --git a/modules/superres/test/test_superres.cpp b/modules/superres/test/test_superres.cpp index 1530d6d667..5cb078f77c 100644 --- a/modules/superres/test/test_superres.cpp +++ b/modules/superres/test/test_superres.cpp @@ -278,8 +278,6 @@ TEST_F(SuperResolution, BTVL1_GPU) #if defined(HAVE_OPENCV_OCL) && defined(HAVE_OPENCL) TEST_F(SuperResolution, BTVL1_OCL) { - std::vector infos; - cv::ocl::getDevice(infos); RunTest(cv::superres::createSuperResolution_BTVL1_OCL()); } #endif From b00f79ac5f8e8e876d9e1969b2e5f5d04c828090 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 27 Sep 2013 16:41:25 +0400 Subject: [PATCH 25/39] ocl: move program names into opencl_kernels.hpp --- cmake/OpenCVModule.cmake | 8 +- cmake/cl2cpp.cmake | 25 ++- modules/nonfree/src/surf.ocl.cpp | 171 ++++++++++----------- modules/ocl/src/bgfg_mog.cpp | 5 +- modules/ocl/src/blend.cpp | 12 +- modules/ocl/src/brute_force_matcher.cpp | 14 +- modules/ocl/src/build_warps.cpp | 11 +- modules/ocl/src/canny.cpp | 11 +- modules/ocl/src/color.cpp | 9 +- modules/ocl/src/columnsum.cpp | 11 +- modules/ocl/src/fft.cpp | 2 - modules/ocl/src/filtering.cpp | 17 +- modules/ocl/src/gemm.cpp | 1 - modules/ocl/src/gftt.cpp | 11 +- modules/ocl/src/haar.cpp | 16 +- modules/ocl/src/hog.cpp | 12 +- modules/ocl/src/imgproc.cpp | 21 +-- modules/ocl/src/interpolate_frames.cpp | 5 +- modules/ocl/src/kalman.cpp | 3 +- modules/ocl/src/kmeans.cpp | 13 +- modules/ocl/src/match_template.cpp | 13 +- modules/ocl/src/matrix_operations.cpp | 17 +- modules/ocl/src/moments.cpp | 5 +- modules/ocl/src/mssegmentation.cpp | 4 +- modules/ocl/src/optical_flow_farneback.cpp | 11 +- modules/ocl/src/pyrdown.cpp | 15 +- modules/ocl/src/pyrlk.cpp | 11 +- modules/ocl/src/pyrup.cpp | 10 +- modules/ocl/src/sort_by_key.cpp | 10 +- modules/ocl/src/split_merge.cpp | 20 +-- modules/ocl/src/stereo_csbp.cpp | 44 +----- modules/ocl/src/stereobm.cpp | 14 +- modules/ocl/src/stereobp.cpp | 18 +-- modules/ocl/src/tvl1flow.cpp | 13 +- modules/superres/src/btv_l1_ocl.cpp | 3 +- 35 files changed, 156 insertions(+), 430 deletions(-) diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index 1d87bc1b88..4ed8cf0c0f 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -450,11 +450,11 @@ macro(ocv_glob_module_sources) if(HAVE_OPENCL AND cl_kernels) ocv_include_directories(${OPENCL_INCLUDE_DIRS}) add_custom_command( - OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp" - COMMAND ${CMAKE_COMMAND} -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/opencl" -DOUTPUT="${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp" -P "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake" + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp" + COMMAND ${CMAKE_COMMAND} -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/opencl" -DOUTPUT="${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" -P "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake" DEPENDS ${cl_kernels} "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake") - source_group("Src\\OpenCL" FILES ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp") - list(APPEND lib_srcs ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp") + source_group("OpenCL" FILES ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp") + list(APPEND lib_srcs ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp") endif() source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs}) diff --git a/cmake/cl2cpp.cmake b/cmake/cl2cpp.cmake index 1e932eabdb..0733a42441 100644 --- a/cmake/cl2cpp.cmake +++ b/cmake/cl2cpp.cmake @@ -1,6 +1,20 @@ file(GLOB cl_list "${CL_DIR}/*.cl" ) +list(SORT cl_list) -file(WRITE ${OUTPUT} "// This file is auto-generated. Do not edit! +string(REPLACE ".cpp" ".hpp" OUTPUT_HPP "${OUTPUT}") +get_filename_component(OUTPUT_HPP_NAME "${OUTPUT_HPP}" NAME) + +set(STR_CPP "// This file is auto-generated. Do not edit! + +#include \"${OUTPUT_HPP_NAME}\" + +namespace cv +{ +namespace ocl +{ +") + +set(STR_HPP "// This file is auto-generated. Do not edit! namespace cv { @@ -29,7 +43,12 @@ foreach(cl ${cl_list}) string(REGEX REPLACE "\"$" "" lines "${lines}") # unneeded " at the eof - file(APPEND ${OUTPUT} "const char* ${cl_filename}=\"${lines};\n") + set(STR_CPP "${STR_CPP}const char* ${cl_filename}=\"${lines};\n") + set(STR_HPP "${STR_HPP}extern const char* ${cl_filename};\n") endforeach() -file(APPEND ${OUTPUT} "}\n}\n") +set(STR_CPP "${STR_CPP}}\n}\n") +set(STR_HPP "${STR_HPP}}\n}\n") + +file(WRITE ${OUTPUT} "${STR_CPP}") +file(WRITE ${OUTPUT_HPP} "${STR_HPP}") diff --git a/modules/nonfree/src/surf.ocl.cpp b/modules/nonfree/src/surf.ocl.cpp index 59eab705d6..d6f72bc7ad 100644 --- a/modules/nonfree/src/surf.ocl.cpp +++ b/modules/nonfree/src/surf.ocl.cpp @@ -43,27 +43,24 @@ // //M*/ #include "precomp.hpp" -#include #ifdef HAVE_OPENCV_OCL +#include +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; namespace cv { namespace ocl { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *surf; - const char noImage2dOption [] = "-D DISABLE_IMAGE2D"; static bool use_image2d = false; static void openCLExecuteKernelSURF(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3], - size_t localThreads[3], vector< pair > &args, int channels, int depth) + size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth) { char optBuf [100] = {0}; char * optBufPtr = optBuf; @@ -486,26 +483,26 @@ void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat &det, oclMat &trace, i Context *clCxt = det.clCxt; string kernelName = "icvCalcLayerDetAndTrace"; - vector< pair > args; + std::vector< std::pair > args; if(sumTex) { - args.push_back( make_pair( sizeof(cl_mem), (void *)&sumTex)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&sumTex)); } else { - args.push_back( make_pair( sizeof(cl_mem), (void *)&surf_.sum.data)); // if image2d is not supported + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&surf_.sum.data)); // if image2d is not supported } - args.push_back( make_pair( sizeof(cl_mem), (void *)&det.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&trace.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&det.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&trace.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&img_rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&nOctaveLayers)); - args.push_back( make_pair( sizeof(cl_int), (void *)&octave)); - args.push_back( make_pair( sizeof(cl_int), (void *)&c_layer_rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&surf_.sum.step)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&det.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trace.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&det.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&trace.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&nOctaveLayers)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&octave)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&c_layer_rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&surf_.sum.step)); size_t localThreads[3] = {16, 16, 1}; size_t globalThreads[3] = @@ -524,35 +521,35 @@ void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat Context *clCxt = det.clCxt; string kernelName = useMask ? "icvFindMaximaInLayer_withmask" : "icvFindMaximaInLayer"; - vector< pair > args; - - args.push_back( make_pair( sizeof(cl_mem), (void *)&det.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&trace.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&maxPosBuffer.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&maxCounter.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&counterOffset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&det.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&trace.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&img_rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&nLayers)); - args.push_back( make_pair( sizeof(cl_int), (void *)&octave)); - args.push_back( make_pair( sizeof(cl_int), (void *)&layer_rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&layer_cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&maxCandidates)); - args.push_back( make_pair( sizeof(cl_float), (void *)&surf_.hessianThreshold)); + std::vector< std::pair > args; + + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&det.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trace.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&maxPosBuffer.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&maxCounter.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&counterOffset)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&det.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&trace.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&nLayers)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&octave)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&layer_rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&layer_cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&maxCandidates)); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&surf_.hessianThreshold)); if(useMask) { if(maskSumTex) { - args.push_back( make_pair( sizeof(cl_mem), (void *)&maskSumTex)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&maskSumTex)); } else { - args.push_back( make_pair( sizeof(cl_mem), (void *)&surf_.maskSum.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&surf_.maskSum.data)); } - args.push_back( make_pair( sizeof(cl_mem), (void *)&surf_.maskSum.step)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&surf_.maskSum.step)); } size_t localThreads[3] = {16, 16, 1}; size_t globalThreads[3] = {divUp(layer_cols - 2 * min_margin, localThreads[0] - 2) *localThreads[0], @@ -568,19 +565,19 @@ void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMa { Context *clCxt = det.clCxt; string kernelName = "icvInterpolateKeypoint"; - vector< pair > args; - - args.push_back( make_pair( sizeof(cl_mem), (void *)&det.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&maxPosBuffer.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&counters_.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&det.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&img_rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&octave)); - args.push_back( make_pair( sizeof(cl_int), (void *)&layer_rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&max_features)); + std::vector< std::pair > args; + + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&det.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&maxPosBuffer.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counters_.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&det.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypoints.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&octave)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&layer_rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&max_features)); size_t localThreads[3] = {3, 3, 3}; size_t globalThreads[3] = {maxCounter *localThreads[0], localThreads[1], 1}; @@ -593,21 +590,21 @@ void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeat Context *clCxt = counters.clCxt; string kernelName = "icvCalcOrientation"; - vector< pair > args; + std::vector< std::pair > args; if(sumTex) { - args.push_back( make_pair( sizeof(cl_mem), (void *)&sumTex)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&sumTex)); } else { - args.push_back( make_pair( sizeof(cl_mem), (void *)&surf_.sum.data)); // if image2d is not supported + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&surf_.sum.data)); // if image2d is not supported } - args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&img_rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&surf_.sum.step)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypoints.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&surf_.sum.step)); size_t localThreads[3] = {32, 4, 1}; size_t globalThreads[3] = {nFeatures *localThreads[0], localThreads[1], 1}; @@ -620,11 +617,11 @@ void SURF_OCL_Invoker::icvSetUpright_gpu(const oclMat &keypoints, int nFeatures) Context *clCxt = counters.clCxt; string kernelName = "icvSetUpright"; - vector< pair > args; + std::vector< std::pair > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&nFeatures)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypoints.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&nFeatures)); size_t localThreads[3] = {256, 1, 1}; size_t globalThreads[3] = {saturate_cast(nFeatures), 1, 1}; @@ -638,7 +635,7 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const // compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D Context *clCxt = descriptors.clCxt; string kernelName; - vector< pair > args; + std::vector< std::pair > args; size_t localThreads[3] = {1, 1, 1}; size_t globalThreads[3] = {1, 1, 1}; @@ -655,19 +652,19 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const args.clear(); if(imgTex) { - args.push_back( make_pair( sizeof(cl_mem), (void *)&imgTex)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&imgTex)); } else { - args.push_back( make_pair( sizeof(cl_mem), (void *)&_img.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&_img.data)); } - args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&_img.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&_img.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&_img.step)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypoints.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.step)); openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1); @@ -680,8 +677,8 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const globalThreads[1] = localThreads[1]; args.clear(); - args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step)); openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1); } @@ -698,19 +695,19 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const args.clear(); if(imgTex) { - args.push_back( make_pair( sizeof(cl_mem), (void *)&imgTex)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&imgTex)); } else { - args.push_back( make_pair( sizeof(cl_mem), (void *)&_img.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&_img.data)); } - args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&_img.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&_img.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&_img.step)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypoints.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.step)); openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1); @@ -723,8 +720,8 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const globalThreads[1] = localThreads[1]; args.clear(); - args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step)); openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1); } diff --git a/modules/ocl/src/bgfg_mog.cpp b/modules/ocl/src/bgfg_mog.cpp index cb0dee80f8..064fef8d30 100644 --- a/modules/ocl/src/bgfg_mog.cpp +++ b/modules/ocl/src/bgfg_mog.cpp @@ -44,14 +44,15 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" + using namespace cv; using namespace cv::ocl; + namespace cv { namespace ocl { - extern const char* bgfg_mog; - typedef struct _contant_struct { cl_float c_Tb; diff --git a/modules/ocl/src/blend.cpp b/modules/ocl/src/blend.cpp index ec73c8662c..58b91d8c3f 100644 --- a/modules/ocl/src/blend.cpp +++ b/modules/ocl/src/blend.cpp @@ -44,20 +44,10 @@ //M*/ #include "precomp.hpp" -#include +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; - -namespace cv -{ - namespace ocl - { - ////////////////////////////////////OpenCL kernel strings////////////////////////// - extern const char *blend_linear; - } -} void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result) diff --git a/modules/ocl/src/brute_force_matcher.cpp b/modules/ocl/src/brute_force_matcher.cpp index 0273ed5891..c348db8f30 100644 --- a/modules/ocl/src/brute_force_matcher.cpp +++ b/modules/ocl/src/brute_force_matcher.cpp @@ -45,22 +45,10 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" -#include -#include -#include using namespace cv; using namespace cv::ocl; -using namespace std; - -namespace cv -{ - namespace ocl - { - ////////////////////////////////////OpenCL kernel strings////////////////////////// - extern const char *brute_force_match; - } -} static const int OPT_SIZE = 100; diff --git a/modules/ocl/src/build_warps.cpp b/modules/ocl/src/build_warps.cpp index c4a092993a..4c400a2b68 100644 --- a/modules/ocl/src/build_warps.cpp +++ b/modules/ocl/src/build_warps.cpp @@ -44,19 +44,10 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; - -namespace cv -{ - namespace ocl - { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *build_warps; - } -} ////////////////////////////////////////////////////////////////////////////// // buildWarpPlaneMaps diff --git a/modules/ocl/src/canny.cpp b/modules/ocl/src/canny.cpp index a25c1973ef..9fc6f65b44 100644 --- a/modules/ocl/src/canny.cpp +++ b/modules/ocl/src/canny.cpp @@ -44,19 +44,10 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; - -namespace cv -{ - namespace ocl - { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *imgproc_canny; - } -} cv::ocl::CannyBuf::CannyBuf(const oclMat &dx_, const oclMat &dy_) : dx(dx_), dy(dy_), counter(NULL) { diff --git a/modules/ocl/src/color.cpp b/modules/ocl/src/color.cpp index 493dbc33c4..443065077c 100644 --- a/modules/ocl/src/color.cpp +++ b/modules/ocl/src/color.cpp @@ -45,6 +45,7 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; @@ -57,14 +58,6 @@ using namespace cv::ocl; #define FLT_EPSILON 1.192092896e-07F #endif -namespace cv -{ -namespace ocl -{ -extern const char *cvt_color; -} -} - namespace { void RGB2Gray_caller(const oclMat &src, oclMat &dst, int bidx) diff --git a/modules/ocl/src/columnsum.cpp b/modules/ocl/src/columnsum.cpp index 1d6939f4e1..46ff73d224 100644 --- a/modules/ocl/src/columnsum.cpp +++ b/modules/ocl/src/columnsum.cpp @@ -43,20 +43,11 @@ // //M*/ -#include #include "precomp.hpp" +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; - -namespace cv -{ - namespace ocl - { - extern const char *imgproc_columnsum; - } -} void cv::ocl::columnSum(const oclMat &src, oclMat &dst) { diff --git a/modules/ocl/src/fft.cpp b/modules/ocl/src/fft.cpp index c0785ac9d8..e39a4443c4 100644 --- a/modules/ocl/src/fft.cpp +++ b/modules/ocl/src/fft.cpp @@ -42,12 +42,10 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ -#include #include "precomp.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; #if !defined HAVE_CLAMDFFT void cv::ocl::dft(const oclMat&, oclMat&, Size, int) diff --git a/modules/ocl/src/filtering.cpp b/modules/ocl/src/filtering.cpp index caaf53d849..758923f55c 100644 --- a/modules/ocl/src/filtering.cpp +++ b/modules/ocl/src/filtering.cpp @@ -48,26 +48,11 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" -using namespace std; using namespace cv; using namespace cv::ocl; -//helper routines -namespace cv -{ -namespace ocl -{ -///////////////////////////OpenCL kernel strings/////////////////////////// -extern const char *filtering_boxFilter; -extern const char *filter_sep_row; -extern const char *filter_sep_col; -extern const char *filtering_laplacian; -extern const char *filtering_morph; -extern const char *filtering_adaptive_bilateral; -} -} - namespace { inline void normalizeAnchor(int &anchor, int ksize) diff --git a/modules/ocl/src/gemm.cpp b/modules/ocl/src/gemm.cpp index 687f26f632..837fd1fa30 100644 --- a/modules/ocl/src/gemm.cpp +++ b/modules/ocl/src/gemm.cpp @@ -43,7 +43,6 @@ // //M*/ -#include #include "precomp.hpp" namespace cv { namespace ocl { diff --git a/modules/ocl/src/gftt.cpp b/modules/ocl/src/gftt.cpp index 29a96ae658..e24c0a5856 100644 --- a/modules/ocl/src/gftt.cpp +++ b/modules/ocl/src/gftt.cpp @@ -42,23 +42,14 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ -#include #include "precomp.hpp" +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; static bool use_cpu_sorter = true; -namespace cv -{ - namespace ocl - { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *imgproc_gftt; - } -} - namespace { enum SortMethod diff --git a/modules/ocl/src/haar.cpp b/modules/ocl/src/haar.cpp index e3e73b3c3d..aac3785e79 100644 --- a/modules/ocl/src/haar.cpp +++ b/modules/ocl/src/haar.cpp @@ -49,24 +49,10 @@ //M*/ #include "precomp.hpp" -#include -#include +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; - - -namespace cv -{ -namespace ocl -{ -///////////////////////////OpenCL kernel strings/////////////////////////// -extern const char *haarobjectdetect; -extern const char *haarobjectdetectbackup; -extern const char *haarobjectdetect_scaled2; -} -} /* these settings affect the quality of detection: change with care */ #define CV_ADJUST_FEATURES 1 diff --git a/modules/ocl/src/hog.cpp b/modules/ocl/src/hog.cpp index 563172bc13..2d2de9a2be 100644 --- a/modules/ocl/src/hog.cpp +++ b/modules/ocl/src/hog.cpp @@ -44,9 +44,10 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" + using namespace cv; using namespace cv::ocl; -using namespace std; #define CELL_WIDTH 8 #define CELL_HEIGHT 8 @@ -57,15 +58,6 @@ using namespace std; static oclMat gauss_w_lut; static bool hog_device_cpu; -namespace cv -{ - namespace ocl - { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *objdetect_hog; - } -} - namespace cv { namespace ocl diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp index 0949605e15..b4d2b70a0d 100644 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@ -54,34 +54,15 @@ //M*/ #include "precomp.hpp" -#include +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; namespace cv { namespace ocl { - - ////////////////////////////////////OpenCL kernel strings////////////////////////// - extern const char *meanShift; - extern const char *imgproc_copymakeboder; - extern const char *imgproc_median; - extern const char *imgproc_threshold; - extern const char *imgproc_resize; - extern const char *imgproc_remap; - extern const char *imgproc_warpAffine; - extern const char *imgproc_warpPerspective; - extern const char *imgproc_integral_sum; - extern const char *imgproc_integral; - extern const char *imgproc_histogram; - extern const char *imgproc_bilateral; - extern const char *imgproc_calcHarris; - extern const char *imgproc_calcMinEigenVal; - extern const char *imgproc_convolve; - extern const char *imgproc_clahe; ////////////////////////////////////OpenCL call wrappers//////////////////////////// template struct index_and_sizeof; diff --git a/modules/ocl/src/interpolate_frames.cpp b/modules/ocl/src/interpolate_frames.cpp index 43b766054f..54063cd7f7 100644 --- a/modules/ocl/src/interpolate_frames.cpp +++ b/modules/ocl/src/interpolate_frames.cpp @@ -44,8 +44,8 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" -using namespace std; using namespace cv; using namespace cv::ocl; @@ -53,9 +53,6 @@ namespace cv { namespace ocl { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *interpolate_frames; - namespace interpolate { //The following are ported from NPP_staging.cu diff --git a/modules/ocl/src/kalman.cpp b/modules/ocl/src/kalman.cpp index 8a5b0d4c2c..6f8243457c 100644 --- a/modules/ocl/src/kalman.cpp +++ b/modules/ocl/src/kalman.cpp @@ -44,7 +44,6 @@ //M*/ #include "precomp.hpp" -using namespace std; using namespace cv; using namespace cv::ocl; @@ -132,4 +131,4 @@ CV_EXPORTS const oclMat& KalmanFilter::correct(const oclMat& measurement) gemm(gain, temp5, 1, statePre, 1, statePost); gemm(gain, temp2, -1, errorCovPre, 1, errorCovPost); return statePost; -} \ No newline at end of file +} diff --git a/modules/ocl/src/kmeans.cpp b/modules/ocl/src/kmeans.cpp index 4de42fce55..06ed8b36ab 100644 --- a/modules/ocl/src/kmeans.cpp +++ b/modules/ocl/src/kmeans.cpp @@ -43,20 +43,11 @@ // //M*/ -#include #include "precomp.hpp" +#include "opencl_kernels.hpp" using namespace cv; -using namespace ocl; - -namespace cv -{ -namespace ocl -{ -////////////////////////////////////OpenCL kernel strings////////////////////////// -extern const char *kmeans_kernel; -} -} +using namespace cv::ocl; static void generateRandomCenter(const vector& box, float* center, RNG& rng) { diff --git a/modules/ocl/src/match_template.cpp b/modules/ocl/src/match_template.cpp index 7c0a7ac5db..ba84043fc7 100644 --- a/modules/ocl/src/match_template.cpp +++ b/modules/ocl/src/match_template.cpp @@ -44,22 +44,11 @@ //M*/ -#include #include "precomp.hpp" +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; - -//helper routines -namespace cv -{ - namespace ocl - { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *match_template; - } -} namespace cv { diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp index d247a14794..80b2f7d81c 100644 --- a/modules/ocl/src/matrix_operations.cpp +++ b/modules/ocl/src/matrix_operations.cpp @@ -46,30 +46,19 @@ //M*/ #include "precomp.hpp" - -#define ALIGN 32 -#define GPU_MATRIX_MALLOC_STEP(step) (((step) + ALIGN - 1) / ALIGN) * ALIGN +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; -//////////////////////////////////////////////////////////////////////// -//////////////////////////////// oclMat //////////////////////////////// -//////////////////////////////////////////////////////////////////////// +#define ALIGN 32 +#define GPU_MATRIX_MALLOC_STEP(step) (((step) + ALIGN - 1) / ALIGN) * ALIGN // helper routines namespace cv { namespace ocl { - /////////////////////////// OpenCL kernel strings /////////////////////////// - - extern const char *operator_copyToM; - extern const char *operator_convertTo; - extern const char *operator_setTo; - extern const char *operator_setToM; - extern const char *convertC3C4; extern DevMemType gDeviceMemType; extern DevMemRW gDeviceMemRW; } diff --git a/modules/ocl/src/moments.cpp b/modules/ocl/src/moments.cpp index 24e8b3e0f6..377af28634 100644 --- a/modules/ocl/src/moments.cpp +++ b/modules/ocl/src/moments.cpp @@ -44,13 +44,12 @@ // //M*/ #include "precomp.hpp" -#include +#include "opencl_kernels.hpp" + namespace cv { namespace ocl { -extern const char *moments; - // The function calculates center of gravity and the central second order moments static void icvCompleteMomentState( CvMoments* moments ) { diff --git a/modules/ocl/src/mssegmentation.cpp b/modules/ocl/src/mssegmentation.cpp index 300265bc2e..248f134705 100644 --- a/modules/ocl/src/mssegmentation.cpp +++ b/modules/ocl/src/mssegmentation.cpp @@ -43,8 +43,10 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" -using namespace std; +using namespace cv; +using namespace cv::ocl; // Auxiliray stuff namespace diff --git a/modules/ocl/src/optical_flow_farneback.cpp b/modules/ocl/src/optical_flow_farneback.cpp index 618a637f09..05a850bd17 100644 --- a/modules/ocl/src/optical_flow_farneback.cpp +++ b/modules/ocl/src/optical_flow_farneback.cpp @@ -45,23 +45,14 @@ #include "precomp.hpp" +#include "opencl_kernels.hpp" #include "opencv2/video/tracking.hpp" -using namespace std; using namespace cv; using namespace cv::ocl; #define MIN_SIZE 32 -namespace cv -{ -namespace ocl -{ -///////////////////////////OpenCL kernel strings/////////////////////////// -extern const char *optical_flow_farneback; -} -} - namespace cv { namespace ocl { namespace optflow_farneback diff --git a/modules/ocl/src/pyrdown.cpp b/modules/ocl/src/pyrdown.cpp index 89df73e9a8..6071fc5987 100644 --- a/modules/ocl/src/pyrdown.cpp +++ b/modules/ocl/src/pyrdown.cpp @@ -45,23 +45,10 @@ // //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; - -using std::cout; -using std::endl; - -namespace cv -{ - namespace ocl - { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *pyr_down; - - } -} ////////////////////////////////////////////////////////////////////////////// /////////////////////// add subtract multiply divide ///////////////////////// diff --git a/modules/ocl/src/pyrlk.cpp b/modules/ocl/src/pyrlk.cpp index a69015d190..8e8692e77f 100644 --- a/modules/ocl/src/pyrlk.cpp +++ b/modules/ocl/src/pyrlk.cpp @@ -45,21 +45,12 @@ // //M*/ - #include "precomp.hpp" +#include "opencl_kernels.hpp" -using namespace std; using namespace cv; using namespace cv::ocl; -namespace cv -{ -namespace ocl -{ -extern const char *pyrlk; -extern const char *pyrlk_no_image; -} -} struct dim3 { unsigned int x, y, z; diff --git a/modules/ocl/src/pyrup.cpp b/modules/ocl/src/pyrup.cpp index 01df30c518..025348194d 100644 --- a/modules/ocl/src/pyrup.cpp +++ b/modules/ocl/src/pyrup.cpp @@ -45,21 +45,19 @@ // //M*/ -/* Haar features calculation */ -//#define EMU - #include "precomp.hpp" +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; + +/* Haar features calculation */ +//#define EMU namespace cv { namespace ocl { - extern const char *pyr_up; - void pyrUp(const cv::ocl::oclMat &src, cv::ocl::oclMat &dst) { int depth = src.depth(), channels = src.channels(), oclChannels = src.oclchannels(); diff --git a/modules/ocl/src/sort_by_key.cpp b/modules/ocl/src/sort_by_key.cpp index 0025f0d911..c2ab00452c 100644 --- a/modules/ocl/src/sort_by_key.cpp +++ b/modules/ocl/src/sort_by_key.cpp @@ -43,18 +43,16 @@ // //M*/ -#include #include "precomp.hpp" +#include "opencl_kernels.hpp" + +using namespace cv; +using namespace cv::ocl; namespace cv { namespace ocl { - -extern const char * kernel_sort_by_key; -extern const char * kernel_stablesort_by_key; -extern const char * kernel_radix_sort_by_key; - void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, int method, bool isGreaterThan); //TODO(pengx17): change this value depending on device other than a constant diff --git a/modules/ocl/src/split_merge.cpp b/modules/ocl/src/split_merge.cpp index fb8d05aaa7..9c9639fd42 100644 --- a/modules/ocl/src/split_merge.cpp +++ b/modules/ocl/src/split_merge.cpp @@ -44,29 +44,11 @@ //M*/ #include "precomp.hpp" -#include +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; - -using std::cout; -using std::endl; - -//////////////////////////////////////////////////////////////////////// -///////////////// oclMat merge and split /////////////////////////////// -//////////////////////////////////////////////////////////////////////// - -namespace cv -{ - namespace ocl - { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *merge_mat; - extern const char *split_mat; - } -} namespace cv { namespace ocl diff --git a/modules/ocl/src/stereo_csbp.cpp b/modules/ocl/src/stereo_csbp.cpp index c8334cca42..f9e86442bb 100644 --- a/modules/ocl/src/stereo_csbp.cpp +++ b/modules/ocl/src/stereo_csbp.cpp @@ -45,51 +45,11 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; -#if !defined (HAVE_OPENCL) - -namespace cv -{ - namespace ocl - { - - void cv::ocl::StereoConstantSpaceBP::estimateRecommendedParams(int, int, int &, int &, int &, int &) - { - throw_nogpu(); - } - cv::ocl::StereoConstantSpaceBP::StereoConstantSpaceBP(int, int, int, int, int) - { - throw_nogpu(); - } - cv::ocl::StereoConstantSpaceBP::StereoConstantSpaceBP(int, int, int, int, float, float, - float, float, int, int) - { - throw_nogpu(); - } - - void cv::ocl::StereoConstantSpaceBP::operator()(const oclMat &, const oclMat &, oclMat &) - { - throw_nogpu(); - } - } -} - -#else /* !defined (HAVE_OPENCL) */ - -namespace cv -{ - namespace ocl - { - - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *stereocsbp; - } - -} namespace cv { namespace ocl @@ -755,5 +715,3 @@ void cv::ocl::StereoConstantSpaceBP::operator()(const oclMat &left, const oclMat operators[msg_type](*this, u, d, l, r, disp_selected_pyr, data_cost, data_cost_selected, temp, out, left, right, disp); } - -#endif /* !defined (HAVE_OPENCL) */ diff --git a/modules/ocl/src/stereobm.cpp b/modules/ocl/src/stereobm.cpp index 8195346c00..716a2f1613 100644 --- a/modules/ocl/src/stereobm.cpp +++ b/modules/ocl/src/stereobm.cpp @@ -46,23 +46,11 @@ //M*/ #include "precomp.hpp" -#include +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; - -namespace cv -{ -namespace ocl -{ - -///////////////////////////OpenCL kernel strings/////////////////////////// -extern const char *stereobm; - -} -} namespace cv { namespace ocl diff --git a/modules/ocl/src/stereobp.cpp b/modules/ocl/src/stereobp.cpp index 5bc93aa3f5..ef7fff4359 100644 --- a/modules/ocl/src/stereobp.cpp +++ b/modules/ocl/src/stereobp.cpp @@ -45,27 +45,11 @@ //M*/ #include "precomp.hpp" -#include -#include +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; -//////////////////////////////////////////////////////////////////////// -///////////////// stereoBP ///////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////// - -namespace cv -{ - namespace ocl - { - - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *stereobp; - } - -} namespace cv { namespace ocl diff --git a/modules/ocl/src/tvl1flow.cpp b/modules/ocl/src/tvl1flow.cpp index c9a3f7abc1..bbeecb4748 100644 --- a/modules/ocl/src/tvl1flow.cpp +++ b/modules/ocl/src/tvl1flow.cpp @@ -42,21 +42,12 @@ // //M*/ - #include "precomp.hpp" -using namespace std; +#include "opencl_kernels.hpp" + using namespace cv; using namespace cv::ocl; -namespace cv -{ - namespace ocl - { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char* tvl1flow; - } -} - cv::ocl::OpticalFlowDual_TVL1_OCL::OpticalFlowDual_TVL1_OCL() { tau = 0.25; diff --git a/modules/superres/src/btv_l1_ocl.cpp b/modules/superres/src/btv_l1_ocl.cpp index 5aecca0630..432d2368a3 100644 --- a/modules/superres/src/btv_l1_ocl.cpp +++ b/modules/superres/src/btv_l1_ocl.cpp @@ -56,6 +56,7 @@ cv::Ptr cv::superres::createSuperResolution_BTVL1 } #else +#include "opencl_kernels.hpp" using namespace std; using namespace cv; @@ -67,8 +68,6 @@ namespace cv { namespace ocl { - extern const char* superres_btvl1; - float* btvWeights_ = NULL; size_t btvWeights_size = 0; } From dd9ff587dca19807c43e9c16ffb80bb072a71e35 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 25 Sep 2013 19:07:14 +0400 Subject: [PATCH 26/39] ocl: file-based ProgramCache refactoring --- cmake/OpenCVModule.cmake | 3 +- cmake/cl2cpp.cmake | 19 +- modules/nonfree/src/surf.ocl.cpp | 4 +- modules/ocl/include/opencv2/ocl/ocl.hpp | 21 +- .../ocl/include/opencv2/ocl/private/util.hpp | 76 ++- modules/ocl/perf/main.cpp | 1 - modules/ocl/src/brute_force_matcher.cpp | 4 + modules/ocl/src/cl_context.cpp | 57 +- modules/ocl/src/cl_operations.cpp | 47 +- modules/ocl/src/cl_programcache.cpp | 498 +++++++++++++----- ...{binarycaching.hpp => cl_programcache.hpp} | 47 +- modules/ocl/src/imgproc.cpp | 12 +- modules/ocl/src/mcwutil.cpp | 10 +- modules/ocl/src/moments.cpp | 2 +- modules/ocl/test/main.cpp | 1 - 15 files changed, 522 insertions(+), 280 deletions(-) rename modules/ocl/src/{binarycaching.hpp => cl_programcache.hpp} (71%) diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index 4ed8cf0c0f..cc17f5b244 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -445,6 +445,8 @@ macro(ocv_glob_module_sources) source_group("Src\\Cuda" FILES ${lib_cuda_srcs} ${lib_cuda_hdrs}) endif() + source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs}) + file(GLOB cl_kernels "src/opencl/*.cl") if(HAVE_OPENCL AND cl_kernels) @@ -457,7 +459,6 @@ macro(ocv_glob_module_sources) list(APPEND lib_srcs ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp") endif() - source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs}) source_group("Include" FILES ${lib_hdrs}) source_group("Include\\detail" FILES ${lib_hdrs_detail}) diff --git a/cmake/cl2cpp.cmake b/cmake/cl2cpp.cmake index 0733a42441..825172b73c 100644 --- a/cmake/cl2cpp.cmake +++ b/cmake/cl2cpp.cmake @@ -20,6 +20,7 @@ namespace cv { namespace ocl { + ") foreach(cl ${cl_list}) @@ -43,12 +44,22 @@ foreach(cl ${cl_list}) string(REGEX REPLACE "\"$" "" lines "${lines}") # unneeded " at the eof - set(STR_CPP "${STR_CPP}const char* ${cl_filename}=\"${lines};\n") - set(STR_HPP "${STR_HPP}extern const char* ${cl_filename};\n") + string(MD5 hash "${lines}") + + set(STR_CPP "${STR_CPP}const struct ProgramEntry ${cl_filename}={\"${cl_filename}\",\n\"${lines}, \"${hash}\"};\n") + set(STR_HPP "${STR_HPP}extern const struct ProgramEntry ${cl_filename};\n") endforeach() set(STR_CPP "${STR_CPP}}\n}\n") set(STR_HPP "${STR_HPP}}\n}\n") -file(WRITE ${OUTPUT} "${STR_CPP}") -file(WRITE ${OUTPUT_HPP} "${STR_HPP}") +file(WRITE "${OUTPUT}" "${STR_CPP}") + +if(EXISTS "${OUTPUT_HPP}") + file(READ "${OUTPUT_HPP}" hpp_lines) +endif() +if("${hpp_lines}" STREQUAL "${STR_HPP}") + message(STATUS "${OUTPUT_HPP} contains same content") +else() + file(WRITE "${OUTPUT_HPP}" "${STR_HPP}") +endif() diff --git a/modules/nonfree/src/surf.ocl.cpp b/modules/nonfree/src/surf.ocl.cpp index d6f72bc7ad..3d5cb4e083 100644 --- a/modules/nonfree/src/surf.ocl.cpp +++ b/modules/nonfree/src/surf.ocl.cpp @@ -55,11 +55,11 @@ namespace cv { namespace ocl { - const char noImage2dOption [] = "-D DISABLE_IMAGE2D"; + static const char noImage2dOption[] = "-D DISABLE_IMAGE2D"; static bool use_image2d = false; - static void openCLExecuteKernelSURF(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3], + static void openCLExecuteKernelSURF(Context *clCxt, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3], size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth) { char optBuf [100] = {0}; diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index 21bb607471..aece2e1427 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -199,24 +199,6 @@ namespace cv void CV_EXPORTS finish(); - //! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing. - CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt , - const char **source, string kernelName, - size_t globalThreads[3], size_t localThreads[3], - std::vector< std::pair > &args, - int channels, int depth, const char *build_options, - bool finish = true, bool measureKernelTime = false, - bool cleanUp = true); - - //! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing. - CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt , - const char **fileName, const int numFiles, string kernelName, - size_t globalThreads[3], size_t localThreads[3], - std::vector< std::pair > &args, - int channels, int depth, const char *build_options, - bool finish = true, bool measureKernelTime = false, - bool cleanUp = true); - //! Enable or disable OpenCL program binary caching onto local disk // After a program (*.cl files in opencl/ folder) is built at runtime, we allow the // compiled OpenCL program to be cached to the path automatically as "path/*.clb" @@ -233,12 +215,11 @@ namespace cv CACHE_DEBUG = 0x1 << 0, // cache OpenCL binary when built in debug mode (only work with MSVC) CACHE_RELEASE = 0x1 << 1, // default behavior, only cache when built in release mode (only work with MSVC) CACHE_ALL = CACHE_DEBUG | CACHE_RELEASE, // always cache opencl binary - CACHE_UPDATE = 0x1 << 2 // if the binary cache file with the same name is already on the disk, it will be updated. }; CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./"); //! set where binary cache to be saved to - CV_EXPORTS void setBinpath(const char *path); + CV_EXPORTS void setBinaryPath(const char *path); class CV_EXPORTS oclMatExpr; //////////////////////////////// oclMat //////////////////////////////// diff --git a/modules/ocl/include/opencv2/ocl/private/util.hpp b/modules/ocl/include/opencv2/ocl/private/util.hpp index 2aba472f66..30288a6cff 100644 --- a/modules/ocl/include/opencv2/ocl/private/util.hpp +++ b/modules/ocl/include/opencv2/ocl/private/util.hpp @@ -55,6 +55,13 @@ namespace cv namespace ocl { +struct ProgramEntry +{ + const char* name; + const char* programStr; + const char* programHash; +}; + inline cl_device_id getClDeviceID(const Context *ctx) { return *(cl_device_id*)(ctx->getOpenCLDeviceIDPtr()); @@ -78,41 +85,39 @@ enum openCLMemcpyKind }; ///////////////////////////OpenCL call wrappers//////////////////////////// void CV_EXPORTS openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch, - size_t widthInBytes, size_t height); + size_t widthInBytes, size_t height); void CV_EXPORTS openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch, - size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type); + size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type); void CV_EXPORTS openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch, - const void *src, size_t spitch, - size_t width, size_t height, openCLMemcpyKind kind, int channels = -1); + const void *src, size_t spitch, + size_t width, size_t height, openCLMemcpyKind kind, int channels = -1); void CV_EXPORTS openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset, - const void *src, size_t spitch, - size_t width, size_t height, int src_offset); + const void *src, size_t spitch, + size_t width, size_t height, int src_offset); void CV_EXPORTS openCLFree(void *devPtr); cl_mem CV_EXPORTS openCLCreateBuffer(Context *clCxt, size_t flag, size_t size); void CV_EXPORTS openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size); cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt, - const char **source, std::string kernelName); + const cv::ocl::ProgramEntry* source, std::string kernelName); cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt, - const char **source, std::string kernelName, const char *build_options); + const cv::ocl::ProgramEntry* source, std::string kernelName, const char *build_options); void CV_EXPORTS openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads); -void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, std::vector< std::pair > &args, - int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1); -void CV_EXPORTS openCLExecuteKernel_(Context *clCxt , const char **source, std::string kernelName, - size_t globalThreads[3], size_t localThreads[3], - std::vector< std::pair > &args, int channels, int depth, const char *build_options); -void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], - size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth); -void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], - size_t localThreads[3], std::vector< std::pair > &args, int channels, - int depth, const char *build_options); +void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const cv::ocl::ProgramEntry* source, string kernelName, std::vector< std::pair > &args, + int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1); +void CV_EXPORTS openCLExecuteKernel_(Context *clCxt, const cv::ocl::ProgramEntry* source, std::string kernelName, + size_t globalThreads[3], size_t localThreads[3], + std::vector< std::pair > &args, int channels, int depth, const char *build_options); +void CV_EXPORTS openCLExecuteKernel(Context *clCxt, const cv::ocl::ProgramEntry* source, std::string kernelName, size_t globalThreads[3], + size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth); +void CV_EXPORTS openCLExecuteKernel(Context *clCxt, const cv::ocl::ProgramEntry* source, std::string kernelName, size_t globalThreads[3], + size_t localThreads[3], std::vector< std::pair > &args, int channels, + int depth, const char *build_options); cl_mem CV_EXPORTS load_constant(cl_context context, cl_command_queue command_queue, const void *value, - const size_t size); + const size_t size); cl_mem CV_EXPORTS openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr); -int CV_EXPORTS savetofile(const Context *clcxt, cl_program &program, const char *fileName); - enum FLUSH_MODE { CLFINISH = 0, @@ -120,11 +125,12 @@ enum FLUSH_MODE DISABLE }; -void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], - size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE); -void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], - size_t localThreads[3], std::vector< std::pair > &args, int channels, - int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE); +void CV_EXPORTS openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, std::string kernelName, size_t globalThreads[3], + size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE); +void CV_EXPORTS openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, std::string kernelName, size_t globalThreads[3], + size_t localThreads[3], std::vector< std::pair > &args, int channels, + int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE); + // bind oclMat to OpenCL image textures // note: // 1. there is no memory management. User need to explicitly release the resource @@ -183,6 +189,24 @@ inline size_t roundUp(size_t sz, size_t n) return result; } +//! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing. +CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt, + const cv::ocl::ProgramEntry* source, string kernelName, + size_t globalThreads[3], size_t localThreads[3], + std::vector< std::pair > &args, + int channels, int depth, const char *build_options, + bool finish = true, bool measureKernelTime = false, + bool cleanUp = true); + +//! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing. +CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt, + const cv::ocl::ProgramEntry* source, const int numFiles, string kernelName, + size_t globalThreads[3], size_t localThreads[3], + std::vector< std::pair > &args, + int channels, int depth, const char *build_options, + bool finish = true, bool measureKernelTime = false, + bool cleanUp = true); + }//namespace ocl }//namespace cv diff --git a/modules/ocl/perf/main.cpp b/modules/ocl/perf/main.cpp index e82af4e322..fc71906293 100644 --- a/modules/ocl/perf/main.cpp +++ b/modules/ocl/perf/main.cpp @@ -91,7 +91,6 @@ int main(int argc, char ** argv) } cv::ocl::setDevice(devicesInfo[device]); - cv::ocl::setBinaryDiskCache(cv::ocl::CACHE_UPDATE); cout << "Device type:" << type << endl << "Platform name:" << devicesInfo[device]->platform->platformName << endl diff --git a/modules/ocl/src/brute_force_matcher.cpp b/modules/ocl/src/brute_force_matcher.cpp index c348db8f30..aaf0703249 100644 --- a/modules/ocl/src/brute_force_matcher.cpp +++ b/modules/ocl/src/brute_force_matcher.cpp @@ -45,10 +45,14 @@ //M*/ #include "precomp.hpp" +#include +#include +#include #include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; +using namespace std; static const int OPT_SIZE = 100; diff --git a/modules/ocl/src/cl_context.cpp b/modules/ocl/src/cl_context.cpp index 6413465f65..135110077c 100644 --- a/modules/ocl/src/cl_context.cpp +++ b/modules/ocl/src/cl_context.cpp @@ -48,15 +48,16 @@ #include "precomp.hpp" #include #include -#include "binarycaching.hpp" +#include "cl_programcache.hpp" +#if defined _MSC_VER && _MSC_VER >= 1200 +# pragma warning( disable: 4100 4101 4127 4244 4267 4510 4512 4610) +#endif #undef __CL_ENABLE_EXCEPTIONS #include -namespace cv { namespace ocl { - -extern void fft_teardown(); -extern void clBlasTeardown(); +namespace cv { +namespace ocl { struct PlatformInfoImpl { @@ -174,7 +175,7 @@ static int initializeOpenCLDevices() deviceInfo.info.platform = &platformInfo.info; platformInfo.deviceIDs[j] = deviceInfo.info._id; - cl_device_type type = -1; + cl_device_type type = cl_device_type(-1); openCLSafeCall(device.getInfo(CL_DEVICE_TYPE, &type)); deviceInfo.info.deviceType = DeviceType(type); @@ -182,7 +183,7 @@ static int initializeOpenCLDevices() openCLSafeCall(device.getInfo(CL_DEVICE_VERSION, &deviceInfo.info.deviceVersion)); openCLSafeCall(device.getInfo(CL_DEVICE_NAME, &deviceInfo.info.deviceName)); openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR, &deviceInfo.info.deviceVendor)); - cl_uint vendorID = -1; + cl_uint vendorID = 0; openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR_ID, &vendorID)); deviceInfo.info.deviceVendorId = vendorID; openCLSafeCall(device.getInfo(CL_DRIVER_VERSION, &deviceInfo.info.deviceDriverVersion)); @@ -347,9 +348,6 @@ static bool __termination = false; ContextImpl::~ContextImpl() { - fft_teardown(); - clBlasTeardown(); - #ifdef WIN32 // if process is on termination stage (ExitProcess was called and other threads were terminated) // then disable command queue release because it may cause program hang @@ -370,8 +368,14 @@ ContextImpl::~ContextImpl() clContext = NULL; } +void fft_teardown(); +void clBlasTeardown(); + void ContextImpl::cleanupContext(void) { + fft_teardown(); + clBlasTeardown(); + cv::AutoLock lock(currentContextMutex); if (currentContext) delete currentContext; @@ -382,6 +386,15 @@ void ContextImpl::setContext(const DeviceInfo* deviceInfo) { CV_Assert(deviceInfo->_id >= 0 && deviceInfo->_id < (int)global_devices.size()); + { + cv::AutoLock lock(currentContextMutex); + if (currentContext) + { + if (currentContext->deviceInfo._id == deviceInfo->_id) + return; + } + } + DeviceInfoImpl& infoImpl = global_devices[deviceInfo->_id]; CV_Assert(deviceInfo == &infoImpl.info); @@ -466,6 +479,30 @@ int getOpenCLDevices(std::vector &devices, int deviceType, co } } + if (currentContext == NULL) + { + // select default device + const DeviceInfo* selectedDevice = NULL; + for (size_t i = 0; i < devices.size(); i++) + { + const DeviceInfo* dev = devices[i]; + if (dev->deviceType == CL_DEVICE_TYPE_GPU) + { + selectedDevice = dev; + break; + } + else if (dev->deviceType == CL_DEVICE_TYPE_CPU && (selectedDevice == NULL)) + { + selectedDevice = dev; + } + } + + if (selectedDevice) + { + setDevice(selectedDevice); + } + } + return (int)devices.size(); } diff --git a/modules/ocl/src/cl_operations.cpp b/modules/ocl/src/cl_operations.cpp index 42138adbe0..25d7454a2a 100644 --- a/modules/ocl/src/cl_operations.cpp +++ b/modules/ocl/src/cl_operations.cpp @@ -48,10 +48,7 @@ #include "precomp.hpp" #include #include -#include "binarycaching.hpp" - -#undef __CL_ENABLE_EXCEPTIONS -#include +#include "cl_programcache.hpp" //#define PRINT_KERNEL_RUN_TIME #define RUN_TIMES 100 @@ -60,7 +57,8 @@ #endif //#define AMD_DOUBLE_DIFFER -namespace cv { namespace ocl { +namespace cv { +namespace ocl { DevMemType gDeviceMemType = DEVICE_MEM_DEFAULT; DevMemRW gDeviceMemRW = DEVICE_MEM_R_W; @@ -179,21 +177,22 @@ void openCLFree(void *devPtr) openCLSafeCall(clReleaseMemObject((cl_mem)devPtr)); } -cl_kernel openCLGetKernelFromSource(const Context *ctx, const char **source, string kernelName) +cl_kernel openCLGetKernelFromSource(const Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName) { return openCLGetKernelFromSource(ctx, source, kernelName, NULL); } -cl_kernel openCLGetKernelFromSource(const Context *ctx, const char **source, string kernelName, +cl_kernel openCLGetKernelFromSource(const Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName, const char *build_options) { cl_kernel kernel; cl_int status = 0; CV_Assert(ProgramCache::getProgramCache() != NULL); - cl_program program = ProgramCache::getProgramCache()->getProgram(ctx, source, kernelName, build_options); + cl_program program = ProgramCache::getProgramCache()->getProgram(ctx, source, build_options); CV_Assert(program != NULL); kernel = clCreateKernel(program, kernelName.c_str(), &status); openCLVerifyCall(status); + openCLVerifyCall(clReleaseProgram(program)); return kernel; } @@ -213,7 +212,7 @@ void openCLVerifyKernel(const Context *ctx, cl_kernel kernel, size_t *localThrea static double total_execute_time = 0; static double total_kernel_time = 0; #endif -void openCLExecuteKernel_(Context *ctx , const char **source, string kernelName, size_t globalThreads[3], +void openCLExecuteKernel_(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3], size_t localThreads[3], vector< pair > &args, int channels, int depth, const char *build_options) { @@ -275,14 +274,14 @@ void openCLExecuteKernel_(Context *ctx , const char **source, string kernelName, openCLSafeCall(clReleaseKernel(kernel)); } -void openCLExecuteKernel(Context *ctx , const char **source, string kernelName, +void openCLExecuteKernel(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3], size_t localThreads[3], vector< pair > &args, int channels, int depth) { openCLExecuteKernel(ctx, source, kernelName, globalThreads, localThreads, args, channels, depth, NULL); } -void openCLExecuteKernel(Context *ctx , const char **source, string kernelName, +void openCLExecuteKernel(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3], size_t localThreads[3], vector< pair > &args, int channels, int depth, const char *build_options) @@ -316,7 +315,7 @@ void openCLExecuteKernel(Context *ctx , const char **source, string kernelName, #endif } -double openCLExecuteKernelInterop(Context *ctx , const char **source, string kernelName, +double openCLExecuteKernelInterop(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3], size_t localThreads[3], vector< pair > &args, int channels, int depth, const char *build_options, bool finish, bool measureKernelTime, bool cleanUp) @@ -391,29 +390,6 @@ double openCLExecuteKernelInterop(Context *ctx , const char **source, string ker return kernelTime; } -//double openCLExecuteKernelInterop(Context *ctx , const char **fileName, const int numFiles, string kernelName, -// size_t globalThreads[3], size_t localThreads[3], -// vector< pair > &args, int channels, int depth, const char *build_options, -// bool finish, bool measureKernelTime, bool cleanUp) -// -//{ -// std::vector fsource; -// for (int i = 0 ; i < numFiles ; i++) -// { -// std::string str; -// if (convertToString(fileName[i], str) >= 0) -// fsource.push_back(str); -// } -// const char **source = new const char *[numFiles]; -// for (int i = 0 ; i < numFiles ; i++) -// source[i] = fsource[i].c_str(); -// double kernelTime = openCLExecuteKernelInterop(ctx ,source, kernelName, globalThreads, localThreads, -// args, channels, depth, build_options, finish, measureKernelTime, cleanUp); -// fsource.clear(); -// delete []source; -// return kernelTime; -//} - cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value, const size_t size) { @@ -427,7 +403,6 @@ cl_mem load_constant(cl_context context, cl_command_queue command_queue, const v value, 0, 0, 0)); return con_struct; - } }//namespace ocl diff --git a/modules/ocl/src/cl_programcache.cpp b/modules/ocl/src/cl_programcache.cpp index 3261319c34..7c58e7c489 100644 --- a/modules/ocl/src/cl_programcache.cpp +++ b/modules/ocl/src/cl_programcache.cpp @@ -48,85 +48,93 @@ #include "precomp.hpp" #include #include -#include "binarycaching.hpp" +#include "cl_programcache.hpp" +#if defined _MSC_VER && _MSC_VER >= 1200 +# pragma warning( disable: 4100 4244 4267 4510 4512 4610) +#endif #undef __CL_ENABLE_EXCEPTIONS #include namespace cv { namespace ocl { + +#define MAX_PROG_CACHE_SIZE 1024 /* * The binary caching system to eliminate redundant program source compilation. * Strictly, this is not a cache because we do not implement evictions right now. * We shall add such features to trade-off memory consumption and performance when necessary. */ +cv::Mutex ProgramCache::mutexFiles; +cv::Mutex ProgramCache::mutexCache; + std::auto_ptr _programCache; ProgramCache* ProgramCache::getProgramCache() { - if (NULL == _programCache.get()) - _programCache.reset(new ProgramCache()); - return _programCache.get(); + if (NULL == _programCache.get()) + _programCache.reset(new ProgramCache()); + return _programCache.get(); } ProgramCache::ProgramCache() { - codeCache.clear(); - cacheSize = 0; + codeCache.clear(); + cacheSize = 0; } ProgramCache::~ProgramCache() { - releaseProgram(); + releaseProgram(); } -cl_program ProgramCache::progLookup(string srcsign) +cl_program ProgramCache::progLookup(const string& srcsign) { - map::iterator iter; - iter = codeCache.find(srcsign); - if(iter != codeCache.end()) - return iter->second; - else - return NULL; + map::iterator iter; + iter = codeCache.find(srcsign); + if(iter != codeCache.end()) + return iter->second; + else + return NULL; } -void ProgramCache::addProgram(string srcsign , cl_program program) +void ProgramCache::addProgram(const string& srcsign, cl_program program) { - if(!progLookup(srcsign)) - { - codeCache.insert(map::value_type(srcsign, program)); - } + if (!progLookup(srcsign)) + { + clRetainProgram(program); + codeCache.insert(map::value_type(srcsign, program)); + } } void ProgramCache::releaseProgram() { - map::iterator iter; - for(iter = codeCache.begin(); iter != codeCache.end(); iter++) - { - openCLSafeCall(clReleaseProgram(iter->second)); - } - codeCache.clear(); - cacheSize = 0; + map::iterator iter; + for(iter = codeCache.begin(); iter != codeCache.end(); iter++) + { + openCLSafeCall(clReleaseProgram(iter->second)); + } + codeCache.clear(); + cacheSize = 0; } -static int enable_disk_cache = +static int enable_disk_cache = true || #ifdef _DEBUG false; #else true; #endif -static int update_disk_cache = false; static String binpath = ""; void setBinaryDiskCache(int mode, String path) { + enable_disk_cache = 0; + binpath = ""; + if(mode == CACHE_NONE) { - update_disk_cache = 0; - enable_disk_cache = 0; return; } - update_disk_cache |= (mode & CACHE_UPDATE) == CACHE_UPDATE; - enable_disk_cache |= + enable_disk_cache = #ifdef _DEBUG (mode & CACHE_DEBUG) == CACHE_DEBUG; #else @@ -138,108 +146,286 @@ void setBinaryDiskCache(int mode, String path) } } -void setBinpath(const char *path) +void setBinaryPath(const char *path) { binpath = path; } -int savetofile(const Context*, cl_program &program, const char *fileName) +static const int MAX_ENTRIES = 64; + +struct ProgramFileCache { - size_t binarySize; - openCLSafeCall(clGetProgramInfo(program, - CL_PROGRAM_BINARY_SIZES, - sizeof(size_t), - &binarySize, NULL)); - char* binary = (char*)malloc(binarySize); - if(binary == NULL) + struct CV_DECL_ALIGNED(1) ProgramFileHeader { - CV_Error(CV_StsNoMem, "Failed to allocate host memory."); - } - openCLSafeCall(clGetProgramInfo(program, - CL_PROGRAM_BINARIES, - sizeof(char *), - &binary, - NULL)); - - FILE *fp = fopen(fileName, "wb+"); - if(fp != NULL) + int hashLength; + //char hash[]; + }; + + struct CV_DECL_ALIGNED(1) ProgramFileTable { - fwrite(binary, binarySize, 1, fp); - free(binary); - fclose(fp); - } - return 1; -} + int numberOfEntries; + //int firstEntryOffset[]; + }; -cl_program ProgramCache::getProgram(const Context *ctx, const char **source, string kernelName, - const char *build_options) -{ - cl_program program; - cl_int status = 0; - stringstream src_sign; - string srcsign; - string filename; + struct CV_DECL_ALIGNED(1) ProgramFileConfigurationEntry + { + int nextEntry; + int dataSize; + int optionsLength; + //char options[]; + // char data[]; + }; - if (NULL != build_options) + string fileName_; + const char* hash_; + std::fstream f; + + ProgramFileCache(const string& fileName, const char* hash) + : fileName_(fileName), hash_(hash) { - src_sign << (int64)(*source) << getClContext(ctx) << "_" << build_options; + if (hash_ != NULL) + { + f.open(fileName_.c_str(), ios::in|ios::out|ios::binary); + if(f.is_open()) + { + int hashLength = 0; + f.read((char*)&hashLength, sizeof(int)); + std::vector fhash(hashLength + 1); + f.read(&fhash[0], hashLength); + if (f.eof() || strncmp(hash_, &fhash[0], hashLength) != 0) + { + f.close(); + remove(fileName_.c_str()); + return; + } + } + } } - else + + int getHash(const string& options) { - src_sign << (int64)(*source) << getClContext(ctx); + int hash = 0; + for (size_t i = 0; i < options.length(); i++) + { + hash = (hash << 2) ^ (hash >> 17) ^ options[i]; + } + return (hash + (hash >> 16)) & (MAX_ENTRIES - 1); } - srcsign = src_sign.str(); - program = NULL; - program = ProgramCache::getProgramCache()->progLookup(srcsign); + bool readConfigurationFromFile(const string& options, std::vector& buf) + { + if (hash_ == NULL) + return false; + + if (!f.is_open()) + return false; + + f.seekg(0, std::fstream::end); + size_t fileSize = (size_t)f.tellg(); + if (fileSize == 0) + { + std::cerr << "Invalid file (empty): " << fileName_ << std::endl; + f.close(); + remove(fileName_.c_str()); + return false; + } + f.seekg(0, std::fstream::beg); + + int hashLength = 0; + f.read((char*)&hashLength, sizeof(int)); + CV_Assert(hashLength > 0); + f.seekg(sizeof(hashLength) + hashLength, std::fstream::beg); + + int numberOfEntries = 0; + f.read((char*)&numberOfEntries, sizeof(int)); + CV_Assert(numberOfEntries > 0); + if (numberOfEntries != MAX_ENTRIES) + { + std::cerr << "Invalid file: " << fileName_ << std::endl; + f.close(); + remove(fileName_.c_str()); + return false; + } + + std::vector firstEntryOffset(numberOfEntries); + f.read((char*)&firstEntryOffset[0], sizeof(int)*numberOfEntries); + + int entryNum = getHash(options); - if (!program) + int entryOffset = firstEntryOffset[entryNum]; + ProgramFileConfigurationEntry entry; + while (entryOffset > 0) + { + f.seekg(entryOffset, std::fstream::beg); + assert(sizeof(entry) == sizeof(int)*3); + f.read((char*)&entry, sizeof(entry)); + std::vector foptions(entry.optionsLength); + if ((int)options.length() == entry.optionsLength) + { + if (entry.optionsLength > 0) + f.read(&foptions[0], entry.optionsLength); + if (memcmp(&foptions[0], options.c_str(), entry.optionsLength) == 0) + { + buf.resize(entry.dataSize); + f.read(&buf[0], entry.dataSize); + f.seekg(0, std::fstream::beg); + return true; + } + } + if (entry.nextEntry <= 0) + break; + entryOffset = entry.nextEntry; + } + return false; + } + + bool writeConfigurationToFile(const string& options, std::vector& buf) { - //config build programs - std::string all_build_options; - if (!ctx->getDeviceInfo().compilationExtraOptions.empty()) - all_build_options += ctx->getDeviceInfo().compilationExtraOptions; - if (build_options != NULL) + if (hash_ == NULL) + return true; // don't save dynamic kernels + + if (!f.is_open()) { - all_build_options += " "; - all_build_options += build_options; + f.open(fileName_.c_str(), ios::in|ios::out|ios::binary); + if (!f.is_open()) + { + f.open(fileName_.c_str(), ios::out|ios::binary); + if (!f.is_open()) + return false; + } } - filename = binpath + kernelName + "_" + ctx->getDeviceInfo().deviceName + all_build_options + ".clb"; - FILE *fp = enable_disk_cache ? fopen(filename.c_str(), "rb") : NULL; - if(fp == NULL || update_disk_cache) + f.seekg(0, std::fstream::end); + size_t fileSize = (size_t)f.tellg(); + if (fileSize == 0) { - if(fp != NULL) - fclose(fp); + f.seekp(0, std::fstream::beg); + int hashLength = strlen(hash_); + f.write((char*)&hashLength, sizeof(int)); + f.write(hash_, hashLength); + + int numberOfEntries = MAX_ENTRIES; + f.write((char*)&numberOfEntries, sizeof(int)); + std::vector firstEntryOffset(MAX_ENTRIES, 0); + f.write((char*)&firstEntryOffset[0], sizeof(int)*numberOfEntries); + f.close(); + f.open(fileName_.c_str(), ios::in|ios::out|ios::binary); + CV_Assert(f.is_open()); + f.seekg(0, std::fstream::end); + fileSize = (size_t)f.tellg(); + } + f.seekg(0, std::fstream::beg); + + int hashLength = 0; + f.read((char*)&hashLength, sizeof(int)); + CV_Assert(hashLength > 0); + f.seekg(sizeof(hashLength) + hashLength, std::fstream::beg); + + int numberOfEntries = 0; + f.read((char*)&numberOfEntries, sizeof(int)); + CV_Assert(numberOfEntries > 0); + if (numberOfEntries != MAX_ENTRIES) + { + std::cerr << "Invalid file: " << fileName_ << std::endl; + f.close(); + remove(fileName_.c_str()); + return false; + } + + size_t tableEntriesOffset = (size_t)f.tellg(); + std::vector firstEntryOffset(numberOfEntries); + f.read((char*)&firstEntryOffset[0], sizeof(int)*numberOfEntries); - program = clCreateProgramWithSource( - getClContext(ctx), 1, source, NULL, &status); + int entryNum = getHash(options); + + int entryOffset = firstEntryOffset[entryNum]; + ProgramFileConfigurationEntry entry; + while (entryOffset > 0) + { + f.seekg(entryOffset, std::fstream::beg); + assert(sizeof(entry) == sizeof(int)*3); + f.read((char*)&entry, sizeof(entry)); + std::vector foptions(entry.optionsLength); + if ((int)options.length() == entry.optionsLength) + { + if (entry.optionsLength > 0) + f.read(&foptions[0], entry.optionsLength); + CV_Assert(memcmp(&foptions, options.c_str(), entry.optionsLength) != 0); + } + if (entry.nextEntry <= 0) + break; + entryOffset = entry.nextEntry; + } + if (entryOffset > 0) + { + f.seekp(entryOffset, std::fstream::beg); + entry.nextEntry = fileSize; + f.write((char*)&entry, sizeof(entry)); + } + else + { + firstEntryOffset[entryNum] = fileSize; + f.seekp(tableEntriesOffset, std::fstream::beg); + f.write((char*)&firstEntryOffset[0], sizeof(int)*numberOfEntries); + } + f.seekp(fileSize, std::fstream::beg); + entry.nextEntry = 0; + entry.dataSize = buf.size(); + entry.optionsLength = options.length(); + f.write((char*)&entry, sizeof(entry)); + f.write(options.c_str(), entry.optionsLength); + f.write(&buf[0], entry.dataSize); + return true; + } + + cl_program getOrBuildProgram(const Context* ctx, const cv::ocl::ProgramEntry* source, const string& options) + { + cl_int status = 0; + cl_program program = NULL; + std::vector binary; + if (!enable_disk_cache || !readConfigurationFromFile(options, binary)) + { + program = clCreateProgramWithSource(getClContext(ctx), 1, (const char**)&source->programStr, NULL, &status); openCLVerifyCall(status); cl_device_id device = getClDeviceID(ctx); - status = clBuildProgram(program, 1, &device, all_build_options.c_str(), NULL, NULL); - if(status == CL_SUCCESS && enable_disk_cache) - savetofile(ctx, program, filename.c_str()); + status = clBuildProgram(program, 1, &device, options.c_str(), NULL, NULL); + if(status == CL_SUCCESS) + { + if (enable_disk_cache) + { + size_t binarySize; + openCLSafeCall(clGetProgramInfo(program, + CL_PROGRAM_BINARY_SIZES, + sizeof(size_t), + &binarySize, NULL)); + + std::vector binary(binarySize); + + char* ptr = &binary[0]; + openCLSafeCall(clGetProgramInfo(program, + CL_PROGRAM_BINARIES, + sizeof(char*), + &ptr, + NULL)); + + if (!writeConfigurationToFile(options, binary)) + { + std::cerr << "Can't write data to file: " << fileName_ << std::endl; + } + } + } } else { - fseek(fp, 0, SEEK_END); - size_t binarySize = ftell(fp); - fseek(fp, 0, SEEK_SET); - char *binary = new char[binarySize]; - CV_Assert(1 == fread(binary, binarySize, 1, fp)); - fclose(fp); - cl_int status = 0; cl_device_id device = getClDeviceID(ctx); + size_t size = binary.size(); + const char* ptr = &binary[0]; program = clCreateProgramWithBinary(getClContext(ctx), - 1, - &device, - (const size_t *)&binarySize, - (const unsigned char **)&binary, - NULL, - &status); + 1, &device, + (const size_t *)&size, (const unsigned char **)&ptr, + NULL, &status); openCLVerifyCall(status); - status = clBuildProgram(program, 1, &device, all_build_options.c_str(), NULL, NULL); - delete[] binary; + status = clBuildProgram(program, 1, &device, options.c_str(), NULL, NULL); } if(status != CL_SUCCESS) @@ -259,53 +445,77 @@ cl_program ProgramCache::getProgram(const Context *ctx, const char **source, str memset(buildLog, 0, buildLogSize); openCLSafeCall(clGetProgramBuildInfo(program, getClDeviceID(ctx), CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL)); - std::cout << "\n\t\t\tBUILD LOG\n"; + std::cout << "\nBUILD LOG: " << options << "\n"; std::cout << buildLog << endl; delete [] buildLog; } openCLVerifyCall(status); } - //Cache the binary for future use if build_options is null - if( (this->cacheSize += 1) < MAX_PROG_CACHE_SIZE) - this->addProgram(srcsign, program); - else - cout << "Warning: code cache has been full.\n"; + return program; + } +}; + +cl_program ProgramCache::getProgram(const Context *ctx, const cv::ocl::ProgramEntry* source, + const char *build_options) +{ + stringstream src_sign; + + src_sign << (int64)(source->programStr); + src_sign << getClContext(ctx); + if (NULL != build_options) + { + src_sign << "_" << build_options; + } + + { + cv::AutoLock lockCache(mutexCache); + cl_program program = ProgramCache::getProgramCache()->progLookup(src_sign.str()); + if (!!program) + { + clRetainProgram(program); + return program; + } + } + + cv::AutoLock lockCache(mutexFiles); + + // second check + { + cv::AutoLock lockCache(mutexCache); + cl_program program = ProgramCache::getProgramCache()->progLookup(src_sign.str()); + if (!!program) + { + clRetainProgram(program); + return program; + } + } + + string all_build_options; + if (!ctx->getDeviceInfo().compilationExtraOptions.empty()) + all_build_options += ctx->getDeviceInfo().compilationExtraOptions; + if (build_options != NULL) + { + all_build_options += " "; + all_build_options += build_options; + } + const DeviceInfo& devInfo = ctx->getDeviceInfo(); + string filename = binpath + (source->name ? source->name : "NULL") + "_" + devInfo.platform->platformName + "_" + devInfo.deviceName + ".clb"; + + ProgramFileCache programFileCache(filename, source->programHash); + cl_program program = programFileCache.getOrBuildProgram(ctx, source, all_build_options); + + //Cache the binary for future use if build_options is null + if( (this->cacheSize += 1) < MAX_PROG_CACHE_SIZE) + { + cv::AutoLock lockCache(mutexCache); + this->addProgram(src_sign.str(), program); + } + else + { + cout << "Warning: code cache has been full.\n"; } return program; } -//// Converts the contents of a file into a string -//static int convertToString(const char *filename, std::string& s) -//{ -// size_t size; -// char* str; -// -// std::fstream f(filename, (std::fstream::in | std::fstream::binary)); -// if(f.is_open()) -// { -// size_t fileSize; -// f.seekg(0, std::fstream::end); -// size = fileSize = (size_t)f.tellg(); -// f.seekg(0, std::fstream::beg); -// -// str = new char[size+1]; -// if(!str) -// { -// f.close(); -// return -1; -// } -// -// f.read(str, fileSize); -// f.close(); -// str[size] = '\0'; -// -// s = str; -// delete[] str; -// return 0; -// } -// printf("Error: Failed to open file %s\n", filename); -// return -1; -//} - } // namespace ocl } // namespace cv diff --git a/modules/ocl/src/binarycaching.hpp b/modules/ocl/src/cl_programcache.hpp similarity index 71% rename from modules/ocl/src/binarycaching.hpp rename to modules/ocl/src/cl_programcache.hpp index cc9e71a330..ea2ab400c6 100644 --- a/modules/ocl/src/binarycaching.hpp +++ b/modules/ocl/src/cl_programcache.hpp @@ -44,41 +44,42 @@ #include "precomp.hpp" -using namespace cv; -using namespace cv::ocl; -using namespace std; -using std::cout; -using std::endl; - -namespace cv { namespace ocl { +namespace cv { +namespace ocl { class ProgramCache { protected: - ProgramCache(); - ~ProgramCache(); - friend class std::auto_ptr; + ProgramCache(); + ~ProgramCache(); + friend class std::auto_ptr; public: - static ProgramCache *getProgramCache(); + static ProgramCache *getProgramCache(); - cl_program getProgram(const Context *ctx, const char **source, string kernelName, + cl_program getProgram(const Context *ctx, const cv::ocl::ProgramEntry* source, const char *build_options); - void releaseProgram(); + void releaseProgram(); protected: - //lookup the binary given the file name - cl_program progLookup(string srcsign); + //lookup the binary given the file name + // (with acquired mutexCache) + cl_program progLookup(const string& srcsign); + + //add program to the cache + // (with acquired mutexCache) + void addProgram(const string& srcsign, cl_program program); - //add program to the cache - void addProgram(string srcsign, cl_program program); + map codeCache; + unsigned int cacheSize; - map codeCache; - unsigned int cacheSize; + //The presumed watermark for the cache volume (256MB). Is it enough? + //We may need more delicate algorithms when necessary later. + //Right now, let's just leave it along. + static const unsigned MAX_PROG_CACHE_SIZE = 1024; - //The presumed watermark for the cache volume (256MB). Is it enough? - //We may need more delicate algorithms when necessary later. - //Right now, let's just leave it along. - static const unsigned MAX_PROG_CACHE_SIZE = 1024; + // acquire both mutexes in this order: 1) mutexFiles 2) mutexCache + static cv::Mutex mutexFiles; + static cv::Mutex mutexCache; }; }//namespace ocl diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp index b4d2b70a0d..5e0f54fab5 100644 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@ -1108,7 +1108,7 @@ namespace cv CV_Assert(Dx.offset == 0 && Dy.offset == 0); } - static void corner_ocl(const char *src_str, string kernelName, int block_size, float k, oclMat &Dx, oclMat &Dy, + static void corner_ocl(const cv::ocl::ProgramEntry* source, string kernelName, int block_size, float k, oclMat &Dx, oclMat &Dy, oclMat &dst, int border_type) { char borderType[30]; @@ -1160,7 +1160,7 @@ namespace cv args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols)); args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step)); args.push_back( make_pair( sizeof(cl_float) , (void *)&k)); - openCLExecuteKernel(dst.clCxt, &src_str, kernelName, gt, lt, args, -1, -1, build_options); + openCLExecuteKernel(dst.clCxt, source, kernelName, gt, lt, args, -1, -1, build_options); } void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, @@ -1181,7 +1181,7 @@ namespace cv CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT); extractCovData(src, dx, dy, blockSize, ksize, borderType); dst.create(src.size(), CV_32F); - corner_ocl(imgproc_calcHarris, "calcHarris", blockSize, static_cast(k), dx, dy, dst, borderType); + corner_ocl(&imgproc_calcHarris, "calcHarris", blockSize, static_cast(k), dx, dy, dst, borderType); } void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType) @@ -1200,7 +1200,7 @@ namespace cv CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT); extractCovData(src, dx, dy, blockSize, ksize, borderType); dst.create(src.size(), CV_32F); - corner_ocl(imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, dx, dy, dst, borderType); + corner_ocl(&imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, dx, dy, dst, borderType); } /////////////////////////////////// MeanShiftfiltering /////////////////////////////////////////////// static void meanShiftFiltering_gpu(const oclMat &src, oclMat dst, int sp, int sr, int maxIter, float eps) @@ -1749,7 +1749,7 @@ namespace cv } //////////////////////////////////convolve//////////////////////////////////////////////////// -static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, string kernelName, const char **kernelString) +static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source) { CV_Assert(src.depth() == CV_32FC1); CV_Assert(temp1.depth() == CV_32F); @@ -1784,7 +1784,7 @@ static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, st args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.rows )); args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.cols )); - openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth); + openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth); } void cv::ocl::convolve(const oclMat &x, const oclMat &t, oclMat &y) { diff --git a/modules/ocl/src/mcwutil.cpp b/modules/ocl/src/mcwutil.cpp index e4e2e918fb..66aa76560c 100644 --- a/modules/ocl/src/mcwutil.cpp +++ b/modules/ocl/src/mcwutil.cpp @@ -72,7 +72,7 @@ namespace cv namespace ocl { // provide additional methods for the user to interact with the command queue after a task is fired - static void openCLExecuteKernel_2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3], + static void openCLExecuteKernel_2(Context *clCxt, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3], size_t localThreads[3], vector< pair > &args, int channels, int depth, char *build_options, FLUSH_MODE finish_mode) { @@ -118,14 +118,14 @@ namespace cv openCLSafeCall(clReleaseKernel(kernel)); } - void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, + void openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3], size_t localThreads[3], vector< pair > &args, int channels, int depth, FLUSH_MODE finish_mode) { openCLExecuteKernel2(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, NULL, finish_mode); } - void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, + void openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3], size_t localThreads[3], vector< pair > &args, int channels, int depth, char *build_options, FLUSH_MODE finish_mode) @@ -249,7 +249,7 @@ namespace cv bool support_image2d(Context *clCxt) { - static const char * _kernel_string = "__kernel void test_func(image2d_t img) {}"; + const cv::ocl::ProgramEntry _kernel = {NULL, "__kernel void test_func(image2d_t img) {}", NULL}; static bool _isTested = false; static bool _support = false; if(_isTested) @@ -258,7 +258,7 @@ namespace cv } try { - cv::ocl::openCLGetKernelFromSource(clCxt, &_kernel_string, "test_func"); + cv::ocl::openCLGetKernelFromSource(clCxt, &_kernel, "test_func"); cv::ocl::finish(); _support = true; } diff --git a/modules/ocl/src/moments.cpp b/modules/ocl/src/moments.cpp index 377af28634..a19f7fc516 100644 --- a/modules/ocl/src/moments.cpp +++ b/modules/ocl/src/moments.cpp @@ -229,7 +229,7 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary ) CV_Error( CV_StsBadArg, "The passed sequence is not a valid contour" ); } - if( !moments ) + if( !mom ) CV_Error( CV_StsNullPtr, "" ); memset( mom, 0, sizeof(*mom)); diff --git a/modules/ocl/test/main.cpp b/modules/ocl/test/main.cpp index 8071102bad..4061c2579e 100644 --- a/modules/ocl/test/main.cpp +++ b/modules/ocl/test/main.cpp @@ -118,7 +118,6 @@ int main(int argc, char **argv) } cv::ocl::setDevice(devicesInfo[device]); - setBinaryDiskCache(CACHE_UPDATE); cout << "Device type: " << type << endl << "Platform name: " << devicesInfo[device]->platform->platformName << endl From 16adbda4d3c0d788f2924f2fe881c08ee001228d Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 30 Sep 2013 19:07:37 +0400 Subject: [PATCH 27/39] ocl: added OpenCL device selection via OPENCV_OPENCL_DEVICE environment variable --- modules/ocl/perf/main.cpp | 65 ++++---- modules/ocl/src/cl_context.cpp | 275 ++++++++++++++++++++++++++++----- modules/ocl/test/main.cpp | 71 +++++---- 3 files changed, 315 insertions(+), 96 deletions(-) diff --git a/modules/ocl/perf/main.cpp b/modules/ocl/perf/main.cpp index fc71906293..9f87054e6d 100644 --- a/modules/ocl/perf/main.cpp +++ b/modules/ocl/perf/main.cpp @@ -51,6 +51,8 @@ const char * impls[] = #endif }; +using namespace cv::ocl; + int main(int argc, char ** argv) { const char * keys = @@ -59,42 +61,49 @@ int main(int argc, char ** argv) "{ p | platform | -1 | set platform id }" "{ d | device | 0 | set device id }"; - CommandLineParser cmd(argc, argv, keys); - if (cmd.get("help")) + if (getenv("OPENCV_OPENCL_DEVICE") == NULL) // TODO Remove this after buildbot updates { - cout << "Available options besides google test option:" << endl; - cmd.printParams(); - return 0; - } + CommandLineParser cmd(argc, argv, keys); + if (cmd.get("help")) + { + cout << "Available options besides google test option:" << endl; + cmd.printParams(); + return 0; + } - string type = cmd.get("type"); - int pid = cmd.get("platform"); - int device = cmd.get("device"); + string type = cmd.get("type"); + int pid = cmd.get("platform"); + int device = cmd.get("device"); - int flag = type == "cpu" ? cv::ocl::CVCL_DEVICE_TYPE_CPU : - cv::ocl::CVCL_DEVICE_TYPE_GPU; + int flag = type == "cpu" ? cv::ocl::CVCL_DEVICE_TYPE_CPU : + cv::ocl::CVCL_DEVICE_TYPE_GPU; - cv::ocl::PlatformsInfo platformsInfo; - cv::ocl::getOpenCLPlatforms(platformsInfo); - if (pid >= (int)platformsInfo.size()) - { - std::cout << "platform is invalid\n"; - return 1; - } + cv::ocl::PlatformsInfo platformsInfo; + cv::ocl::getOpenCLPlatforms(platformsInfo); + if (pid >= (int)platformsInfo.size()) + { + std::cout << "platform is invalid\n"; + return 1; + } - cv::ocl::DevicesInfo devicesInfo; - int devnums = cv::ocl::getOpenCLDevices(devicesInfo, flag, (pid < 0) ? NULL : platformsInfo[pid]); - if (device < 0 || device >= devnums) - { - std::cout << "device/platform invalid\n"; - return 1; + cv::ocl::DevicesInfo devicesInfo; + int devnums = cv::ocl::getOpenCLDevices(devicesInfo, flag, (pid < 0) ? NULL : platformsInfo[pid]); + if (device < 0 || device >= devnums) + { + std::cout << "device/platform invalid\n"; + return 1; + } + + cv::ocl::setDevice(devicesInfo[device]); } - cv::ocl::setDevice(devicesInfo[device]); + const DeviceInfo& deviceInfo = cv::ocl::Context::getContext()->getDeviceInfo(); - cout << "Device type:" << type << endl - << "Platform name:" << devicesInfo[device]->platform->platformName << endl - << "Device name:" << devicesInfo[device]->deviceName << endl; + cout << "Device type: " << (deviceInfo.deviceType == CVCL_DEVICE_TYPE_CPU ? + "CPU" : + (deviceInfo.deviceType == CVCL_DEVICE_TYPE_GPU ? "GPU" : "unknown")) << endl + << "Platform name: " << deviceInfo.platform->platformName << endl + << "Device name: " << deviceInfo.deviceName << endl; CV_PERF_TEST_MAIN_INTERNALS(ocl, impls) } diff --git a/modules/ocl/src/cl_context.cpp b/modules/ocl/src/cl_context.cpp index 135110077c..e24cc8b358 100644 --- a/modules/ocl/src/cl_context.cpp +++ b/modules/ocl/src/cl_context.cpp @@ -51,7 +51,7 @@ #include "cl_programcache.hpp" #if defined _MSC_VER && _MSC_VER >= 1200 -# pragma warning( disable: 4100 4101 4127 4244 4267 4510 4512 4610) +#pragma warning( disable: 4100 4101 4127 4244 4267 4510 4512 4610) #endif #undef __CL_ENABLE_EXCEPTIONS #include @@ -118,8 +118,211 @@ static bool parseOpenCLVersion(const std::string& versionStr, int& major, int& m return true; } +static void split(const std::string &s, char delim, std::vector &elems) { + std::stringstream ss(s); + std::string item; + while (std::getline(ss, item, delim)) { + elems.push_back(item); + } +} + +static std::vector split(const std::string &s, char delim) { + std::vector elems; + split(s, delim, elems); + return elems; +} + +// Layout: :: +// Sample: AMD:GPU: +// Sample: AMD:GPU:Tahiti +// Sample: :GPU|CPU: = '' = ':' = '::' +static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr, + std::string& platform, std::vector& deviceTypes, std::string& deviceNameOrID) +{ + std::string deviceTypesStr; + size_t p0 = configurationStr.find(':'); + if (p0 != std::string::npos) + { + size_t p1 = configurationStr.find(':', p0 + 1); + if (p1 != std::string::npos) + { + size_t p2 = configurationStr.find(':', p1 + 1); + if (p2 != std::string::npos) + { + std::cerr << "ERROR: Invalid configuration string for OpenCL device" << std::endl; + return false; + } + else + { + // assume platform + device types + device name/id + platform = configurationStr.substr(0, p0); + deviceTypesStr = configurationStr.substr(p0 + 1, p1 - (p0 + 1)); + deviceNameOrID = configurationStr.substr(p1 + 1, configurationStr.length() - (p1 + 1)); + } + } + else + { + // assume platform + device types + platform = configurationStr.substr(0, p0); + deviceTypesStr = configurationStr.substr(p0 + 1, configurationStr.length() - (p0 + 1)); + } + } + else + { + // assume only platform + platform = configurationStr; + } + deviceTypes = split(deviceTypesStr, '|'); + return true; +} + +static bool __deviceSelected = false; +static bool selectOpenCLDevice() +{ + __deviceSelected = true; + + std::string platform; + std::vector deviceTypes; + std::string deviceName; + const char* configuration = getenv("OPENCV_OPENCL_DEVICE"); + if (configuration) + { + if (!parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName)) + return false; + } + + bool isID = false; + int deviceID = -1; + if (deviceName.length() == 1) + // We limit ID range to 0..9, because we want to write: + // - '2500' to mean i5-2500 + // - '8350' to mean AMD FX-8350 + // - '650' to mean GeForce 650 + // To extend ID range change condition to '> 0' + { + isID = true; + for (size_t i = 0; i < deviceName.length(); i++) + { + if (!isdigit(deviceName[i])) + { + isID = false; + break; + } + } + if (isID) + { + deviceID = atoi(deviceName.c_str()); + CV_Assert(deviceID >= 0); + } + } + + const PlatformInfo* platformInfo = NULL; + if (platform.length() > 0) + { + PlatformsInfo platforms; + getOpenCLPlatforms(platforms); + for (size_t i = 0; i < platforms.size(); i++) + { + if (platforms[i]->platformName.find(platform) != std::string::npos) + { + platformInfo = platforms[i]; + break; + } + } + if (platformInfo == NULL) + { + std::cerr << "ERROR: Can't find OpenCL platform by name: " << platform << std::endl; + goto not_found; + } + } + + if (deviceTypes.size() == 0) + { + if (!isID) + { + deviceTypes.push_back("GPU"); + deviceTypes.push_back("CPU"); + } + else + { + deviceTypes.push_back("ALL"); + } + } + for (size_t t = 0; t < deviceTypes.size(); t++) + { + int deviceType = 0; + if (deviceTypes[t] == "GPU") + { + deviceType = CVCL_DEVICE_TYPE_GPU; + } + else if (deviceTypes[t] == "CPU") + { + deviceType = CVCL_DEVICE_TYPE_CPU; + } + else if (deviceTypes[t] == "ACCELERATOR") + { + deviceType = CVCL_DEVICE_TYPE_ACCELERATOR; + } + else if (deviceTypes[t] == "ALL") + { + deviceType = CVCL_DEVICE_TYPE_ALL; + } + else + { + std::cerr << "ERROR: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t] << std::endl; + goto not_found; + } + + DevicesInfo devices; + getOpenCLDevices(devices, deviceType, platformInfo); + + for (size_t i = (isID ? deviceID : 0); + (isID ? (i == (size_t)deviceID) : true) && (i < devices.size()); + i++) + { + if (isID || devices[i]->deviceName.find(deviceName) != std::string::npos) + { + // check for OpenCL 1.1 + if (devices[i]->deviceVersionMajor < 1 || + (devices[i]->deviceVersionMajor == 1 && devices[i]->deviceVersionMinor < 1)) + { + std::cerr << "Skip unsupported version of OpenCL device: " << devices[i]->deviceName + << "(" << devices[i]->platform->platformName << ")" << std::endl; + continue; // unsupported version of device, skip it + } + try + { + setDevice(devices[i]); + } + catch (...) + { + std::cerr << "ERROR: Can't select OpenCL device: " << devices[i]->deviceName + << "(" << devices[i]->platform->platformName << ")" << std::endl; + goto not_found; + } + return true; + } + } + } +not_found: + std::cerr << "ERROR: Required OpenCL device not found, check configuration: " << (configuration == NULL ? "" : configuration) << std::endl + << " Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl + << " Device types: "; + for (size_t t = 0; t < deviceTypes.size(); t++) + { + std::cerr << deviceTypes[t] << " "; + } + std::cerr << std::endl << " Device name: " << (deviceName.length() == 0 ? "any" : deviceName) << std::endl; + return false; +} + +static cv::Mutex __initializedMutex; +static bool __initialized = false; static int initializeOpenCLDevices() { + assert(!__initialized); + __initialized = true; + assert(global_devices.size() == 0); std::vector platforms; @@ -284,8 +487,6 @@ protected: } ~ContextImpl(); public: - - static ContextImpl* getContext(); static void setContext(const DeviceInfo* deviceInfo); bool supportsFeature(FEATURE_TYPE featureType) const; @@ -298,6 +499,28 @@ static ContextImpl* currentContext = NULL; Context* Context::getContext() { + if (currentContext == NULL) + { + if (!__initialized || !__deviceSelected) + { + cv::AutoLock lock(__initializedMutex); + if (!__initialized) + { + if (initializeOpenCLDevices() == 0) + { + CV_Error(CV_GpuNotSupported, "OpenCL not available"); + } + } + if (!__deviceSelected) + { + if (!selectOpenCLDevice()) + { + CV_Error(CV_GpuNotSupported, "Can't select OpenCL device"); + } + } + } + CV_Assert(currentContext != NULL); + } return currentContext; } @@ -422,13 +645,11 @@ void ContextImpl::setContext(const DeviceInfo* deviceInfo) } } -ContextImpl* ContextImpl::getContext() -{ - return currentContext; -} - int getOpenCLPlatforms(PlatformsInfo& platforms) { + if (!__initialized) + initializeOpenCLDevices(); + platforms.clear(); for (size_t id = 0; id < global_platforms.size(); ++id) @@ -442,6 +663,9 @@ int getOpenCLPlatforms(PlatformsInfo& platforms) int getOpenCLDevices(std::vector &devices, int deviceType, const PlatformInfo* platform) { + if (!__initialized) + initializeOpenCLDevices(); + devices.clear(); switch(deviceType) @@ -461,7 +685,7 @@ int getOpenCLDevices(std::vector &devices, int deviceType, co for (size_t id = 0; id < global_devices.size(); ++id) { DeviceInfoImpl& deviceInfo = global_devices[id]; - if (((int)deviceInfo.info.deviceType & deviceType) == deviceType) + if (((int)deviceInfo.info.deviceType & deviceType) != 0) { devices.push_back(&deviceInfo.info); } @@ -479,35 +703,14 @@ int getOpenCLDevices(std::vector &devices, int deviceType, co } } - if (currentContext == NULL) - { - // select default device - const DeviceInfo* selectedDevice = NULL; - for (size_t i = 0; i < devices.size(); i++) - { - const DeviceInfo* dev = devices[i]; - if (dev->deviceType == CL_DEVICE_TYPE_GPU) - { - selectedDevice = dev; - break; - } - else if (dev->deviceType == CL_DEVICE_TYPE_CPU && (selectedDevice == NULL)) - { - selectedDevice = dev; - } - } - - if (selectedDevice) - { - setDevice(selectedDevice); - } - } - return (int)devices.size(); } void setDevice(const DeviceInfo* info) { + if (!__deviceSelected) + __deviceSelected = true; + ContextImpl::setContext(info); } @@ -518,14 +721,14 @@ bool supportsFeature(FEATURE_TYPE featureType) struct __Module { - __Module() { initializeOpenCLDevices(); } + __Module() { /* moved to Context::getContext(): initializeOpenCLDevices(); */ } ~__Module() { ContextImpl::cleanupContext(); } }; static __Module __module; -}//namespace ocl -}//namespace cv +} // namespace ocl +} // namespace cv #if defined(WIN32) && defined(CVAPI_EXPORTS) diff --git a/modules/ocl/test/main.cpp b/modules/ocl/test/main.cpp index 4061c2579e..02df8419ca 100644 --- a/modules/ocl/test/main.cpp +++ b/modules/ocl/test/main.cpp @@ -83,45 +83,52 @@ int main(int argc, char **argv) "{ p | platform | -1 | set platform id }" "{ d | device | 0 | set device id }"; - CommandLineParser cmd(argc, argv, keys); - if (cmd.get("help")) + if (getenv("OPENCV_OPENCL_DEVICE") == NULL) // TODO Remove this after buildbot updates { - cout << "Available options besides google test option:" << endl; - cmd.printParams(); - return 0; - } - string type = cmd.get("type"); - int pid = cmd.get("platform"); - int device = cmd.get("device"); + CommandLineParser cmd(argc, argv, keys); + if (cmd.get("help")) + { + cout << "Available options besides google test option:" << endl; + cmd.printParams(); + return 0; + } + string type = cmd.get("type"); + int pid = cmd.get("platform"); + int device = cmd.get("device"); - print_info(); - int flag = CVCL_DEVICE_TYPE_GPU; - if(type == "cpu") - { - flag = CVCL_DEVICE_TYPE_CPU; - } + print_info(); + int flag = CVCL_DEVICE_TYPE_GPU; + if(type == "cpu") + { + flag = CVCL_DEVICE_TYPE_CPU; + } - cv::ocl::PlatformsInfo platformsInfo; - cv::ocl::getOpenCLPlatforms(platformsInfo); - if (pid >= (int)platformsInfo.size()) - { - std::cout << "platform is invalid\n"; - return 1; - } + cv::ocl::PlatformsInfo platformsInfo; + cv::ocl::getOpenCLPlatforms(platformsInfo); + if (pid >= (int)platformsInfo.size()) + { + std::cout << "platform is invalid\n"; + return 1; + } - cv::ocl::DevicesInfo devicesInfo; - int devnums = cv::ocl::getOpenCLDevices(devicesInfo, flag, (pid < 0) ? NULL : platformsInfo[pid]); - if (device < 0 || device >= devnums) - { - std::cout << "device/platform invalid\n"; - return 1; + cv::ocl::DevicesInfo devicesInfo; + int devnums = cv::ocl::getOpenCLDevices(devicesInfo, flag, (pid < 0) ? NULL : platformsInfo[pid]); + if (device < 0 || device >= devnums) + { + std::cout << "device/platform invalid\n"; + return 1; + } + + cv::ocl::setDevice(devicesInfo[device]); } - cv::ocl::setDevice(devicesInfo[device]); + const DeviceInfo& deviceInfo = cv::ocl::Context::getContext()->getDeviceInfo(); - cout << "Device type: " << type << endl - << "Platform name: " << devicesInfo[device]->platform->platformName << endl - << "Device name: " << devicesInfo[device]->deviceName << endl; + cout << "Device type: " << (deviceInfo.deviceType == CVCL_DEVICE_TYPE_CPU ? + "CPU" : + (deviceInfo.deviceType == CVCL_DEVICE_TYPE_GPU ? "GPU" : "unknown")) << endl + << "Platform name: " << deviceInfo.platform->platformName << endl + << "Device name: " << deviceInfo.deviceName << endl; return RUN_ALL_TESTS(); } From 8beb514ecfacc8ddaa7c53011021a84fb56c8fe0 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 30 Sep 2013 21:32:46 +0400 Subject: [PATCH 28/39] ocl: merge with upstream/2.4 --- modules/ocl/src/arithm.cpp | 6 ++-- modules/ocl/src/svm.cpp | 36 ++++++++++------------ modules/ocl/test/test_matrix_operation.cpp | 12 ++++---- 3 files changed, 26 insertions(+), 28 deletions(-) diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index 7d97e67545..f34e0f730f 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -614,7 +614,7 @@ double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType) CV_Assert(!src1.empty()); CV_Assert(src2.empty() || (src1.type() == src2.type() && src1.size() == src2.size())); - if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.depth() == CV_64F) + if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); } @@ -1261,7 +1261,7 @@ int cv::ocl::countNonZero(const oclMat &src) CV_Error(CV_GpuNotSupported, "selected device doesn't support double"); } - size_t groupnum = src.clCxt->computeUnits(); + size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits; CV_Assert(groupnum != 0); int dbsize = groupnum; @@ -1708,7 +1708,7 @@ void cv::ocl::pow(const oclMat &x, double p, oclMat &y) void cv::ocl::setIdentity(oclMat& src, const Scalar & scalar) { Context *clCxt = Context::getContext(); - if (!clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); return; diff --git a/modules/ocl/src/svm.cpp b/modules/ocl/src/svm.cpp index c3df581f40..311bd7d98f 100644 --- a/modules/ocl/src/svm.cpp +++ b/modules/ocl/src/svm.cpp @@ -43,9 +43,13 @@ // //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" + using namespace cv; using namespace ocl; +namespace cv { namespace ocl { + #if 1 typedef float Qfloat; #define QFLOAT_TYPE CV_32F @@ -54,14 +58,6 @@ typedef double Qfloat; #define QFLOAT_TYPE CV_64F #endif -namespace cv -{ -namespace ocl -{ -///////////////////////////OpenCL kernel strings/////////////////////////// -extern const char *svm; -} -} class CvSVMKernel_ocl: public CvSVMKernel { public: @@ -612,7 +608,7 @@ static void matmul_rbf(oclMat& src, oclMat& src_e, oclMat& dst, int src_rows, in args.push_back(make_pair(sizeof(cl_int), (void* )&src2_cols)); args.push_back(make_pair(sizeof(cl_int), (void* )&width)); float gamma = 0.0f; - if(!Context::getContext()->supportsFeature(Context::CL_DOUBLE)) + if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) { gamma = (float)gamma1; args.push_back(make_pair(sizeof(cl_float), (void* )&gamma)); @@ -748,7 +744,7 @@ float CvSVM_OCL::predict(const CvMat* samples, CV_OUT CvMat* results) const if(params.kernel_type == CvSVM::RBF) { sv_.upload(sv_temp); - if(!Context::getContext()->supportsFeature(Context::CL_DOUBLE)) + if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) { dst = oclMat(sample_count, sv_total, CV_32FC1); } @@ -886,7 +882,7 @@ bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si ) if(params->kernel_type == CvSVM::RBF) { src_e = src; - if(!Context::getContext()->supportsFeature(Context::CL_DOUBLE)) + if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) { dst = oclMat(sample_count, sample_count, CV_32FC1); } @@ -1053,7 +1049,7 @@ void CvSVMKernel_ocl::calc( int vcount, const int row_idx, Qfloat* results, Mat& //int j; (this->*calc_func_ocl)( vcount, row_idx, results, src); -#if defined HAVE_CLAMDBLAS +// FIXIT #if defined HAVE_CLAMDBLAS const Qfloat max_val = (Qfloat)(FLT_MAX * 1e-3); int j; for( j = 0; j < vcount; j++ ) @@ -1063,7 +1059,7 @@ void CvSVMKernel_ocl::calc( int vcount, const int row_idx, Qfloat* results, Mat& results[j] = max_val; } } -#endif +// FIXIT #endif } bool CvSVMKernel_ocl::create( const CvSVMParams* _params, Calc_ocl _calc_func, Calc _calc_func1 ) { @@ -1115,7 +1111,7 @@ void CvSVMKernel_ocl::calc_non_rbf_base( int vcount, const int row_idx, Qfloat* } void CvSVMKernel_ocl::calc_rbf( int vcount, const int row_idx, Qfloat* results, Mat& src) { - if(!Context::getContext()->supportsFeature(Context::CL_DOUBLE)) + if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) { for(int m = 0; m < vcount; m++) { @@ -1140,14 +1136,14 @@ void CvSVMKernel_ocl::calc_poly( int vcount, const int row_idx, Qfloat* results, calc_non_rbf_base( vcount, row_idx, results, src); -#if defined HAVE_CLAMDBLAS +//FIXIT #if defined HAVE_CLAMDBLAS CvMat R = cvMat( 1, vcount, QFLOAT_TYPE, results ); if( vcount > 0 ) { cvPow( &R, &R, params->degree ); } -#endif +//FIXIT #endif } @@ -1155,11 +1151,11 @@ void CvSVMKernel_ocl::calc_sigmoid( int vcount, const int row_idx, Qfloat* resul { calc_non_rbf_base( vcount, row_idx, results, src); // TODO: speedup this -#if defined HAVE_CLAMDBLAS +//FIXIT #if defined HAVE_CLAMDBLAS for(int j = 0; j < vcount; j++ ) { Qfloat t = results[j]; - double e = exp(-fabs(t)); + double e = ::exp(-fabs(t)); if( t > 0 ) { results[j] = (Qfloat)((1. - e) / (1. + e)); @@ -1169,7 +1165,7 @@ void CvSVMKernel_ocl::calc_sigmoid( int vcount, const int row_idx, Qfloat* resul results[j] = (Qfloat)((e - 1.) / (e + 1.)); } } -#endif +//FIXIT #endif } CvSVM_OCL::CvSVM_OCL() { @@ -1199,3 +1195,5 @@ void CvSVM_OCL::create_solver( ) { solver = new CvSVMSolver_ocl(¶ms); } + +} } diff --git a/modules/ocl/test/test_matrix_operation.cpp b/modules/ocl/test/test_matrix_operation.cpp index 46e077a6bb..bc8cdf2bb3 100644 --- a/modules/ocl/test/test_matrix_operation.cpp +++ b/modules/ocl/test/test_matrix_operation.cpp @@ -132,7 +132,7 @@ typedef ConvertToTestBase ConvertTo; TEST_P(ConvertTo, Accuracy) { if((src_depth == CV_64F || dst_depth == CV_64F) && - !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::Context::CL_DOUBLE)) + !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::FEATURE_CL_DOUBLE)) { return; // returns silently } @@ -228,7 +228,7 @@ typedef CopyToTestBase CopyTo; TEST_P(CopyTo, Without_mask) { if((src.depth() == CV_64F) && - !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::Context::CL_DOUBLE)) + !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::FEATURE_CL_DOUBLE)) { return; // returns silently } @@ -246,7 +246,7 @@ TEST_P(CopyTo, Without_mask) TEST_P(CopyTo, With_mask) { if(src.depth() == CV_64F && - !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::Context::CL_DOUBLE)) + !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::FEATURE_CL_DOUBLE)) { return; // returns silently } @@ -342,7 +342,7 @@ typedef SetToTestBase SetTo; TEST_P(SetTo, Without_mask) { if(depth == CV_64F && - !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::Context::CL_DOUBLE)) + !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::FEATURE_CL_DOUBLE)) { return; // returns silently } @@ -360,7 +360,7 @@ TEST_P(SetTo, Without_mask) TEST_P(SetTo, With_mask) { if(depth == CV_64F && - !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::Context::CL_DOUBLE)) + !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::FEATURE_CL_DOUBLE)) { return; // returns silently } @@ -430,7 +430,7 @@ PARAM_TEST_CASE(convertC3C4, MatType, bool) TEST_P(convertC3C4, Accuracy) { if(depth == CV_64F && - !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::Context::CL_DOUBLE)) + !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::FEATURE_CL_DOUBLE)) { return; // returns silently } From 7f0680fc8be6ae485df2aab0e02ea92616590801 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 3 Oct 2013 17:05:00 +0400 Subject: [PATCH 29/39] ocl: workaround for OpenCL C++ bindings usage: CL/cl.hpp --- .../cl_runtime_opencl11_wrappers.hpp | 231 +++++++++++++++ .../cl_runtime_opencl12_wrappers.hpp | 273 ++++++++++++++++++ modules/ocl/src/cl_context.cpp | 9 + modules/ocl/src/cl_programcache.cpp | 9 + .../ocl/src/cl_runtime/generator/common.py | 23 ++ .../ocl/src/cl_runtime/generator/parser_cl.py | 6 + .../cl_runtime_opencl_wrappers.hpp.in | 6 + 7 files changed, 557 insertions(+) create mode 100644 modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl11_wrappers.hpp create mode 100644 modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl12_wrappers.hpp create mode 100644 modules/ocl/src/cl_runtime/generator/template/cl_runtime_opencl_wrappers.hpp.in diff --git a/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl11_wrappers.hpp b/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl11_wrappers.hpp new file mode 100644 index 0000000000..2617272b08 --- /dev/null +++ b/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl11_wrappers.hpp @@ -0,0 +1,231 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__ +#define __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__ + +// generated by parser_cl.py +#undef clGetPlatformIDs +#define clGetPlatformIDs clGetPlatformIDs_fn +inline cl_int clGetPlatformIDs(cl_uint p0, cl_platform_id* p1, cl_uint* p2) { return clGetPlatformIDs_pfn(p0, p1, p2); } +#undef clGetPlatformInfo +#define clGetPlatformInfo clGetPlatformInfo_fn +inline cl_int clGetPlatformInfo(cl_platform_id p0, cl_platform_info p1, size_t p2, void* p3, size_t* p4) { return clGetPlatformInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetDeviceIDs +#define clGetDeviceIDs clGetDeviceIDs_fn +inline cl_int clGetDeviceIDs(cl_platform_id p0, cl_device_type p1, cl_uint p2, cl_device_id* p3, cl_uint* p4) { return clGetDeviceIDs_pfn(p0, p1, p2, p3, p4); } +#undef clGetDeviceInfo +#define clGetDeviceInfo clGetDeviceInfo_fn +inline cl_int clGetDeviceInfo(cl_device_id p0, cl_device_info p1, size_t p2, void* p3, size_t* p4) { return clGetDeviceInfo_pfn(p0, p1, p2, p3, p4); } +#undef clCreateContext +#define clCreateContext clCreateContext_fn +inline cl_context clCreateContext(const cl_context_properties* p0, cl_uint p1, const cl_device_id* p2, void (CL_CALLBACK*p3) (const char*, const void*, size_t, void*), void* p4, cl_int* p5) { return clCreateContext_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCreateContextFromType +#define clCreateContextFromType clCreateContextFromType_fn +inline cl_context clCreateContextFromType(const cl_context_properties* p0, cl_device_type p1, void (CL_CALLBACK*p2) (const char*, const void*, size_t, void*), void* p3, cl_int* p4) { return clCreateContextFromType_pfn(p0, p1, p2, p3, p4); } +#undef clRetainContext +#define clRetainContext clRetainContext_fn +inline cl_int clRetainContext(cl_context p0) { return clRetainContext_pfn(p0); } +#undef clReleaseContext +#define clReleaseContext clReleaseContext_fn +inline cl_int clReleaseContext(cl_context p0) { return clReleaseContext_pfn(p0); } +#undef clGetContextInfo +#define clGetContextInfo clGetContextInfo_fn +inline cl_int clGetContextInfo(cl_context p0, cl_context_info p1, size_t p2, void* p3, size_t* p4) { return clGetContextInfo_pfn(p0, p1, p2, p3, p4); } +#undef clCreateCommandQueue +#define clCreateCommandQueue clCreateCommandQueue_fn +inline cl_command_queue clCreateCommandQueue(cl_context p0, cl_device_id p1, cl_command_queue_properties p2, cl_int* p3) { return clCreateCommandQueue_pfn(p0, p1, p2, p3); } +#undef clRetainCommandQueue +#define clRetainCommandQueue clRetainCommandQueue_fn +inline cl_int clRetainCommandQueue(cl_command_queue p0) { return clRetainCommandQueue_pfn(p0); } +#undef clReleaseCommandQueue +#define clReleaseCommandQueue clReleaseCommandQueue_fn +inline cl_int clReleaseCommandQueue(cl_command_queue p0) { return clReleaseCommandQueue_pfn(p0); } +#undef clGetCommandQueueInfo +#define clGetCommandQueueInfo clGetCommandQueueInfo_fn +inline cl_int clGetCommandQueueInfo(cl_command_queue p0, cl_command_queue_info p1, size_t p2, void* p3, size_t* p4) { return clGetCommandQueueInfo_pfn(p0, p1, p2, p3, p4); } +#undef clSetCommandQueueProperty +#define clSetCommandQueueProperty clSetCommandQueueProperty_fn +inline cl_int clSetCommandQueueProperty(cl_command_queue p0, cl_command_queue_properties p1, cl_bool p2, cl_command_queue_properties* p3) { return clSetCommandQueueProperty_pfn(p0, p1, p2, p3); } +#undef clCreateBuffer +#define clCreateBuffer clCreateBuffer_fn +inline cl_mem clCreateBuffer(cl_context p0, cl_mem_flags p1, size_t p2, void* p3, cl_int* p4) { return clCreateBuffer_pfn(p0, p1, p2, p3, p4); } +#undef clCreateSubBuffer +#define clCreateSubBuffer clCreateSubBuffer_fn +inline cl_mem clCreateSubBuffer(cl_mem p0, cl_mem_flags p1, cl_buffer_create_type p2, const void* p3, cl_int* p4) { return clCreateSubBuffer_pfn(p0, p1, p2, p3, p4); } +#undef clCreateImage2D +#define clCreateImage2D clCreateImage2D_fn +inline cl_mem clCreateImage2D(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, size_t p3, size_t p4, size_t p5, void* p6, cl_int* p7) { return clCreateImage2D_pfn(p0, p1, p2, p3, p4, p5, p6, p7); } +#undef clCreateImage3D +#define clCreateImage3D clCreateImage3D_fn +inline cl_mem clCreateImage3D(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, size_t p3, size_t p4, size_t p5, size_t p6, size_t p7, void* p8, cl_int* p9) { return clCreateImage3D_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } +#undef clRetainMemObject +#define clRetainMemObject clRetainMemObject_fn +inline cl_int clRetainMemObject(cl_mem p0) { return clRetainMemObject_pfn(p0); } +#undef clReleaseMemObject +#define clReleaseMemObject clReleaseMemObject_fn +inline cl_int clReleaseMemObject(cl_mem p0) { return clReleaseMemObject_pfn(p0); } +#undef clGetSupportedImageFormats +#define clGetSupportedImageFormats clGetSupportedImageFormats_fn +inline cl_int clGetSupportedImageFormats(cl_context p0, cl_mem_flags p1, cl_mem_object_type p2, cl_uint p3, cl_image_format* p4, cl_uint* p5) { return clGetSupportedImageFormats_pfn(p0, p1, p2, p3, p4, p5); } +#undef clGetMemObjectInfo +#define clGetMemObjectInfo clGetMemObjectInfo_fn +inline cl_int clGetMemObjectInfo(cl_mem p0, cl_mem_info p1, size_t p2, void* p3, size_t* p4) { return clGetMemObjectInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetImageInfo +#define clGetImageInfo clGetImageInfo_fn +inline cl_int clGetImageInfo(cl_mem p0, cl_image_info p1, size_t p2, void* p3, size_t* p4) { return clGetImageInfo_pfn(p0, p1, p2, p3, p4); } +#undef clSetMemObjectDestructorCallback +#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_fn +inline cl_int clSetMemObjectDestructorCallback(cl_mem p0, void (CL_CALLBACK*p1) (cl_mem, void*), void* p2) { return clSetMemObjectDestructorCallback_pfn(p0, p1, p2); } +#undef clCreateSampler +#define clCreateSampler clCreateSampler_fn +inline cl_sampler clCreateSampler(cl_context p0, cl_bool p1, cl_addressing_mode p2, cl_filter_mode p3, cl_int* p4) { return clCreateSampler_pfn(p0, p1, p2, p3, p4); } +#undef clRetainSampler +#define clRetainSampler clRetainSampler_fn +inline cl_int clRetainSampler(cl_sampler p0) { return clRetainSampler_pfn(p0); } +#undef clReleaseSampler +#define clReleaseSampler clReleaseSampler_fn +inline cl_int clReleaseSampler(cl_sampler p0) { return clReleaseSampler_pfn(p0); } +#undef clGetSamplerInfo +#define clGetSamplerInfo clGetSamplerInfo_fn +inline cl_int clGetSamplerInfo(cl_sampler p0, cl_sampler_info p1, size_t p2, void* p3, size_t* p4) { return clGetSamplerInfo_pfn(p0, p1, p2, p3, p4); } +#undef clCreateProgramWithSource +#define clCreateProgramWithSource clCreateProgramWithSource_fn +inline cl_program clCreateProgramWithSource(cl_context p0, cl_uint p1, const char** p2, const size_t* p3, cl_int* p4) { return clCreateProgramWithSource_pfn(p0, p1, p2, p3, p4); } +#undef clCreateProgramWithBinary +#define clCreateProgramWithBinary clCreateProgramWithBinary_fn +inline cl_program clCreateProgramWithBinary(cl_context p0, cl_uint p1, const cl_device_id* p2, const size_t* p3, const unsigned char** p4, cl_int* p5, cl_int* p6) { return clCreateProgramWithBinary_pfn(p0, p1, p2, p3, p4, p5, p6); } +#undef clRetainProgram +#define clRetainProgram clRetainProgram_fn +inline cl_int clRetainProgram(cl_program p0) { return clRetainProgram_pfn(p0); } +#undef clReleaseProgram +#define clReleaseProgram clReleaseProgram_fn +inline cl_int clReleaseProgram(cl_program p0) { return clReleaseProgram_pfn(p0); } +#undef clBuildProgram +#define clBuildProgram clBuildProgram_fn +inline cl_int clBuildProgram(cl_program p0, cl_uint p1, const cl_device_id* p2, const char* p3, void (CL_CALLBACK*p4) (cl_program, void*), void* p5) { return clBuildProgram_pfn(p0, p1, p2, p3, p4, p5); } +#undef clUnloadCompiler +#define clUnloadCompiler clUnloadCompiler_fn +inline cl_int clUnloadCompiler() { return clUnloadCompiler_pfn(); } +#undef clGetProgramInfo +#define clGetProgramInfo clGetProgramInfo_fn +inline cl_int clGetProgramInfo(cl_program p0, cl_program_info p1, size_t p2, void* p3, size_t* p4) { return clGetProgramInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetProgramBuildInfo +#define clGetProgramBuildInfo clGetProgramBuildInfo_fn +inline cl_int clGetProgramBuildInfo(cl_program p0, cl_device_id p1, cl_program_build_info p2, size_t p3, void* p4, size_t* p5) { return clGetProgramBuildInfo_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCreateKernel +#define clCreateKernel clCreateKernel_fn +inline cl_kernel clCreateKernel(cl_program p0, const char* p1, cl_int* p2) { return clCreateKernel_pfn(p0, p1, p2); } +#undef clCreateKernelsInProgram +#define clCreateKernelsInProgram clCreateKernelsInProgram_fn +inline cl_int clCreateKernelsInProgram(cl_program p0, cl_uint p1, cl_kernel* p2, cl_uint* p3) { return clCreateKernelsInProgram_pfn(p0, p1, p2, p3); } +#undef clRetainKernel +#define clRetainKernel clRetainKernel_fn +inline cl_int clRetainKernel(cl_kernel p0) { return clRetainKernel_pfn(p0); } +#undef clReleaseKernel +#define clReleaseKernel clReleaseKernel_fn +inline cl_int clReleaseKernel(cl_kernel p0) { return clReleaseKernel_pfn(p0); } +#undef clSetKernelArg +#define clSetKernelArg clSetKernelArg_fn +inline cl_int clSetKernelArg(cl_kernel p0, cl_uint p1, size_t p2, const void* p3) { return clSetKernelArg_pfn(p0, p1, p2, p3); } +#undef clGetKernelInfo +#define clGetKernelInfo clGetKernelInfo_fn +inline cl_int clGetKernelInfo(cl_kernel p0, cl_kernel_info p1, size_t p2, void* p3, size_t* p4) { return clGetKernelInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetKernelWorkGroupInfo +#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_fn +inline cl_int clGetKernelWorkGroupInfo(cl_kernel p0, cl_device_id p1, cl_kernel_work_group_info p2, size_t p3, void* p4, size_t* p5) { return clGetKernelWorkGroupInfo_pfn(p0, p1, p2, p3, p4, p5); } +#undef clWaitForEvents +#define clWaitForEvents clWaitForEvents_fn +inline cl_int clWaitForEvents(cl_uint p0, const cl_event* p1) { return clWaitForEvents_pfn(p0, p1); } +#undef clGetEventInfo +#define clGetEventInfo clGetEventInfo_fn +inline cl_int clGetEventInfo(cl_event p0, cl_event_info p1, size_t p2, void* p3, size_t* p4) { return clGetEventInfo_pfn(p0, p1, p2, p3, p4); } +#undef clCreateUserEvent +#define clCreateUserEvent clCreateUserEvent_fn +inline cl_event clCreateUserEvent(cl_context p0, cl_int* p1) { return clCreateUserEvent_pfn(p0, p1); } +#undef clRetainEvent +#define clRetainEvent clRetainEvent_fn +inline cl_int clRetainEvent(cl_event p0) { return clRetainEvent_pfn(p0); } +#undef clReleaseEvent +#define clReleaseEvent clReleaseEvent_fn +inline cl_int clReleaseEvent(cl_event p0) { return clReleaseEvent_pfn(p0); } +#undef clSetUserEventStatus +#define clSetUserEventStatus clSetUserEventStatus_fn +inline cl_int clSetUserEventStatus(cl_event p0, cl_int p1) { return clSetUserEventStatus_pfn(p0, p1); } +#undef clSetEventCallback +#define clSetEventCallback clSetEventCallback_fn +inline cl_int clSetEventCallback(cl_event p0, cl_int p1, void (CL_CALLBACK*p2) (cl_event, cl_int, void*), void* p3) { return clSetEventCallback_pfn(p0, p1, p2, p3); } +#undef clGetEventProfilingInfo +#define clGetEventProfilingInfo clGetEventProfilingInfo_fn +inline cl_int clGetEventProfilingInfo(cl_event p0, cl_profiling_info p1, size_t p2, void* p3, size_t* p4) { return clGetEventProfilingInfo_pfn(p0, p1, p2, p3, p4); } +#undef clFlush +#define clFlush clFlush_fn +inline cl_int clFlush(cl_command_queue p0) { return clFlush_pfn(p0); } +#undef clFinish +#define clFinish clFinish_fn +inline cl_int clFinish(cl_command_queue p0) { return clFinish_pfn(p0); } +#undef clEnqueueReadBuffer +#define clEnqueueReadBuffer clEnqueueReadBuffer_fn +inline cl_int clEnqueueReadBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, size_t p3, size_t p4, void* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueReadBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueReadBufferRect +#define clEnqueueReadBufferRect clEnqueueReadBufferRect_fn +inline cl_int clEnqueueReadBufferRect(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, void* p10, cl_uint p11, const cl_event* p12, cl_event* p13) { return clEnqueueReadBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); } +#undef clEnqueueWriteBuffer +#define clEnqueueWriteBuffer clEnqueueWriteBuffer_fn +inline cl_int clEnqueueWriteBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, size_t p3, size_t p4, const void* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueWriteBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueWriteBufferRect +#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_fn +inline cl_int clEnqueueWriteBufferRect(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, const void* p10, cl_uint p11, const cl_event* p12, cl_event* p13) { return clEnqueueWriteBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); } +#undef clEnqueueCopyBuffer +#define clEnqueueCopyBuffer clEnqueueCopyBuffer_fn +inline cl_int clEnqueueCopyBuffer(cl_command_queue p0, cl_mem p1, cl_mem p2, size_t p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueCopyBufferRect +#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_fn +inline cl_int clEnqueueCopyBufferRect(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, cl_uint p10, const cl_event* p11, cl_event* p12) { return clEnqueueCopyBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12); } +#undef clEnqueueReadImage +#define clEnqueueReadImage clEnqueueReadImage_fn +inline cl_int clEnqueueReadImage(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, size_t p5, size_t p6, void* p7, cl_uint p8, const cl_event* p9, cl_event* p10) { return clEnqueueReadImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); } +#undef clEnqueueWriteImage +#define clEnqueueWriteImage clEnqueueWriteImage_fn +inline cl_int clEnqueueWriteImage(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, size_t p5, size_t p6, const void* p7, cl_uint p8, const cl_event* p9, cl_event* p10) { return clEnqueueWriteImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); } +#undef clEnqueueCopyImage +#define clEnqueueCopyImage clEnqueueCopyImage_fn +inline cl_int clEnqueueCopyImage(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueCopyImageToBuffer +#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_fn +inline cl_int clEnqueueCopyImageToBuffer(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyImageToBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueCopyBufferToImage +#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_fn +inline cl_int clEnqueueCopyBufferToImage(cl_command_queue p0, cl_mem p1, cl_mem p2, size_t p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyBufferToImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueMapBuffer +#define clEnqueueMapBuffer clEnqueueMapBuffer_fn +inline void* clEnqueueMapBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, cl_map_flags p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8, cl_int* p9) { return clEnqueueMapBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } +#undef clEnqueueMapImage +#define clEnqueueMapImage clEnqueueMapImage_fn +inline void* clEnqueueMapImage(cl_command_queue p0, cl_mem p1, cl_bool p2, cl_map_flags p3, const size_t* p4, const size_t* p5, size_t* p6, size_t* p7, cl_uint p8, const cl_event* p9, cl_event* p10, cl_int* p11) { return clEnqueueMapImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11); } +#undef clEnqueueUnmapMemObject +#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_fn +inline cl_int clEnqueueUnmapMemObject(cl_command_queue p0, cl_mem p1, void* p2, cl_uint p3, const cl_event* p4, cl_event* p5) { return clEnqueueUnmapMemObject_pfn(p0, p1, p2, p3, p4, p5); } +#undef clEnqueueNDRangeKernel +#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_fn +inline cl_int clEnqueueNDRangeKernel(cl_command_queue p0, cl_kernel p1, cl_uint p2, const size_t* p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueNDRangeKernel_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueTask +#define clEnqueueTask clEnqueueTask_fn +inline cl_int clEnqueueTask(cl_command_queue p0, cl_kernel p1, cl_uint p2, const cl_event* p3, cl_event* p4) { return clEnqueueTask_pfn(p0, p1, p2, p3, p4); } +#undef clEnqueueNativeKernel +#define clEnqueueNativeKernel clEnqueueNativeKernel_fn +inline cl_int clEnqueueNativeKernel(cl_command_queue p0, void (CL_CALLBACK*p1) (void*), void* p2, size_t p3, cl_uint p4, const cl_mem* p5, const void** p6, cl_uint p7, const cl_event* p8, cl_event* p9) { return clEnqueueNativeKernel_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } +#undef clEnqueueMarker +#define clEnqueueMarker clEnqueueMarker_fn +inline cl_int clEnqueueMarker(cl_command_queue p0, cl_event* p1) { return clEnqueueMarker_pfn(p0, p1); } +#undef clEnqueueWaitForEvents +#define clEnqueueWaitForEvents clEnqueueWaitForEvents_fn +inline cl_int clEnqueueWaitForEvents(cl_command_queue p0, cl_uint p1, const cl_event* p2) { return clEnqueueWaitForEvents_pfn(p0, p1, p2); } +#undef clEnqueueBarrier +#define clEnqueueBarrier clEnqueueBarrier_fn +inline cl_int clEnqueueBarrier(cl_command_queue p0) { return clEnqueueBarrier_pfn(p0); } +#undef clGetExtensionFunctionAddress +#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_fn +inline void* clGetExtensionFunctionAddress(const char* p0) { return clGetExtensionFunctionAddress_pfn(p0); } + +#endif // __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__ \ No newline at end of file diff --git a/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl12_wrappers.hpp b/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl12_wrappers.hpp new file mode 100644 index 0000000000..8716450e27 --- /dev/null +++ b/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl12_wrappers.hpp @@ -0,0 +1,273 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__ +#define __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__ + +// generated by parser_cl.py +#undef clGetPlatformIDs +#define clGetPlatformIDs clGetPlatformIDs_fn +inline cl_int clGetPlatformIDs(cl_uint p0, cl_platform_id* p1, cl_uint* p2) { return clGetPlatformIDs_pfn(p0, p1, p2); } +#undef clGetPlatformInfo +#define clGetPlatformInfo clGetPlatformInfo_fn +inline cl_int clGetPlatformInfo(cl_platform_id p0, cl_platform_info p1, size_t p2, void* p3, size_t* p4) { return clGetPlatformInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetDeviceIDs +#define clGetDeviceIDs clGetDeviceIDs_fn +inline cl_int clGetDeviceIDs(cl_platform_id p0, cl_device_type p1, cl_uint p2, cl_device_id* p3, cl_uint* p4) { return clGetDeviceIDs_pfn(p0, p1, p2, p3, p4); } +#undef clGetDeviceInfo +#define clGetDeviceInfo clGetDeviceInfo_fn +inline cl_int clGetDeviceInfo(cl_device_id p0, cl_device_info p1, size_t p2, void* p3, size_t* p4) { return clGetDeviceInfo_pfn(p0, p1, p2, p3, p4); } +#undef clCreateSubDevices +#define clCreateSubDevices clCreateSubDevices_fn +inline cl_int clCreateSubDevices(cl_device_id p0, const cl_device_partition_property* p1, cl_uint p2, cl_device_id* p3, cl_uint* p4) { return clCreateSubDevices_pfn(p0, p1, p2, p3, p4); } +#undef clRetainDevice +#define clRetainDevice clRetainDevice_fn +inline cl_int clRetainDevice(cl_device_id p0) { return clRetainDevice_pfn(p0); } +#undef clReleaseDevice +#define clReleaseDevice clReleaseDevice_fn +inline cl_int clReleaseDevice(cl_device_id p0) { return clReleaseDevice_pfn(p0); } +#undef clCreateContext +#define clCreateContext clCreateContext_fn +inline cl_context clCreateContext(const cl_context_properties* p0, cl_uint p1, const cl_device_id* p2, void (CL_CALLBACK*p3) (const char*, const void*, size_t, void*), void* p4, cl_int* p5) { return clCreateContext_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCreateContextFromType +#define clCreateContextFromType clCreateContextFromType_fn +inline cl_context clCreateContextFromType(const cl_context_properties* p0, cl_device_type p1, void (CL_CALLBACK*p2) (const char*, const void*, size_t, void*), void* p3, cl_int* p4) { return clCreateContextFromType_pfn(p0, p1, p2, p3, p4); } +#undef clRetainContext +#define clRetainContext clRetainContext_fn +inline cl_int clRetainContext(cl_context p0) { return clRetainContext_pfn(p0); } +#undef clReleaseContext +#define clReleaseContext clReleaseContext_fn +inline cl_int clReleaseContext(cl_context p0) { return clReleaseContext_pfn(p0); } +#undef clGetContextInfo +#define clGetContextInfo clGetContextInfo_fn +inline cl_int clGetContextInfo(cl_context p0, cl_context_info p1, size_t p2, void* p3, size_t* p4) { return clGetContextInfo_pfn(p0, p1, p2, p3, p4); } +#undef clCreateCommandQueue +#define clCreateCommandQueue clCreateCommandQueue_fn +inline cl_command_queue clCreateCommandQueue(cl_context p0, cl_device_id p1, cl_command_queue_properties p2, cl_int* p3) { return clCreateCommandQueue_pfn(p0, p1, p2, p3); } +#undef clRetainCommandQueue +#define clRetainCommandQueue clRetainCommandQueue_fn +inline cl_int clRetainCommandQueue(cl_command_queue p0) { return clRetainCommandQueue_pfn(p0); } +#undef clReleaseCommandQueue +#define clReleaseCommandQueue clReleaseCommandQueue_fn +inline cl_int clReleaseCommandQueue(cl_command_queue p0) { return clReleaseCommandQueue_pfn(p0); } +#undef clGetCommandQueueInfo +#define clGetCommandQueueInfo clGetCommandQueueInfo_fn +inline cl_int clGetCommandQueueInfo(cl_command_queue p0, cl_command_queue_info p1, size_t p2, void* p3, size_t* p4) { return clGetCommandQueueInfo_pfn(p0, p1, p2, p3, p4); } +#undef clCreateBuffer +#define clCreateBuffer clCreateBuffer_fn +inline cl_mem clCreateBuffer(cl_context p0, cl_mem_flags p1, size_t p2, void* p3, cl_int* p4) { return clCreateBuffer_pfn(p0, p1, p2, p3, p4); } +#undef clCreateSubBuffer +#define clCreateSubBuffer clCreateSubBuffer_fn +inline cl_mem clCreateSubBuffer(cl_mem p0, cl_mem_flags p1, cl_buffer_create_type p2, const void* p3, cl_int* p4) { return clCreateSubBuffer_pfn(p0, p1, p2, p3, p4); } +#undef clCreateImage +#define clCreateImage clCreateImage_fn +inline cl_mem clCreateImage(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, const cl_image_desc* p3, void* p4, cl_int* p5) { return clCreateImage_pfn(p0, p1, p2, p3, p4, p5); } +#undef clRetainMemObject +#define clRetainMemObject clRetainMemObject_fn +inline cl_int clRetainMemObject(cl_mem p0) { return clRetainMemObject_pfn(p0); } +#undef clReleaseMemObject +#define clReleaseMemObject clReleaseMemObject_fn +inline cl_int clReleaseMemObject(cl_mem p0) { return clReleaseMemObject_pfn(p0); } +#undef clGetSupportedImageFormats +#define clGetSupportedImageFormats clGetSupportedImageFormats_fn +inline cl_int clGetSupportedImageFormats(cl_context p0, cl_mem_flags p1, cl_mem_object_type p2, cl_uint p3, cl_image_format* p4, cl_uint* p5) { return clGetSupportedImageFormats_pfn(p0, p1, p2, p3, p4, p5); } +#undef clGetMemObjectInfo +#define clGetMemObjectInfo clGetMemObjectInfo_fn +inline cl_int clGetMemObjectInfo(cl_mem p0, cl_mem_info p1, size_t p2, void* p3, size_t* p4) { return clGetMemObjectInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetImageInfo +#define clGetImageInfo clGetImageInfo_fn +inline cl_int clGetImageInfo(cl_mem p0, cl_image_info p1, size_t p2, void* p3, size_t* p4) { return clGetImageInfo_pfn(p0, p1, p2, p3, p4); } +#undef clSetMemObjectDestructorCallback +#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_fn +inline cl_int clSetMemObjectDestructorCallback(cl_mem p0, void (CL_CALLBACK*p1) (cl_mem, void*), void* p2) { return clSetMemObjectDestructorCallback_pfn(p0, p1, p2); } +#undef clCreateSampler +#define clCreateSampler clCreateSampler_fn +inline cl_sampler clCreateSampler(cl_context p0, cl_bool p1, cl_addressing_mode p2, cl_filter_mode p3, cl_int* p4) { return clCreateSampler_pfn(p0, p1, p2, p3, p4); } +#undef clRetainSampler +#define clRetainSampler clRetainSampler_fn +inline cl_int clRetainSampler(cl_sampler p0) { return clRetainSampler_pfn(p0); } +#undef clReleaseSampler +#define clReleaseSampler clReleaseSampler_fn +inline cl_int clReleaseSampler(cl_sampler p0) { return clReleaseSampler_pfn(p0); } +#undef clGetSamplerInfo +#define clGetSamplerInfo clGetSamplerInfo_fn +inline cl_int clGetSamplerInfo(cl_sampler p0, cl_sampler_info p1, size_t p2, void* p3, size_t* p4) { return clGetSamplerInfo_pfn(p0, p1, p2, p3, p4); } +#undef clCreateProgramWithSource +#define clCreateProgramWithSource clCreateProgramWithSource_fn +inline cl_program clCreateProgramWithSource(cl_context p0, cl_uint p1, const char** p2, const size_t* p3, cl_int* p4) { return clCreateProgramWithSource_pfn(p0, p1, p2, p3, p4); } +#undef clCreateProgramWithBinary +#define clCreateProgramWithBinary clCreateProgramWithBinary_fn +inline cl_program clCreateProgramWithBinary(cl_context p0, cl_uint p1, const cl_device_id* p2, const size_t* p3, const unsigned char** p4, cl_int* p5, cl_int* p6) { return clCreateProgramWithBinary_pfn(p0, p1, p2, p3, p4, p5, p6); } +#undef clCreateProgramWithBuiltInKernels +#define clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels_fn +inline cl_program clCreateProgramWithBuiltInKernels(cl_context p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_int* p4) { return clCreateProgramWithBuiltInKernels_pfn(p0, p1, p2, p3, p4); } +#undef clRetainProgram +#define clRetainProgram clRetainProgram_fn +inline cl_int clRetainProgram(cl_program p0) { return clRetainProgram_pfn(p0); } +#undef clReleaseProgram +#define clReleaseProgram clReleaseProgram_fn +inline cl_int clReleaseProgram(cl_program p0) { return clReleaseProgram_pfn(p0); } +#undef clBuildProgram +#define clBuildProgram clBuildProgram_fn +inline cl_int clBuildProgram(cl_program p0, cl_uint p1, const cl_device_id* p2, const char* p3, void (CL_CALLBACK*p4) (cl_program, void*), void* p5) { return clBuildProgram_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCompileProgram +#define clCompileProgram clCompileProgram_fn +inline cl_int clCompileProgram(cl_program p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_uint p4, const cl_program* p5, const char** p6, void (CL_CALLBACK*p7) (cl_program, void*), void* p8) { return clCompileProgram_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clLinkProgram +#define clLinkProgram clLinkProgram_fn +inline cl_program clLinkProgram(cl_context p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_uint p4, const cl_program* p5, void (CL_CALLBACK*p6) (cl_program, void*), void* p7, cl_int* p8) { return clLinkProgram_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clUnloadPlatformCompiler +#define clUnloadPlatformCompiler clUnloadPlatformCompiler_fn +inline cl_int clUnloadPlatformCompiler(cl_platform_id p0) { return clUnloadPlatformCompiler_pfn(p0); } +#undef clGetProgramInfo +#define clGetProgramInfo clGetProgramInfo_fn +inline cl_int clGetProgramInfo(cl_program p0, cl_program_info p1, size_t p2, void* p3, size_t* p4) { return clGetProgramInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetProgramBuildInfo +#define clGetProgramBuildInfo clGetProgramBuildInfo_fn +inline cl_int clGetProgramBuildInfo(cl_program p0, cl_device_id p1, cl_program_build_info p2, size_t p3, void* p4, size_t* p5) { return clGetProgramBuildInfo_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCreateKernel +#define clCreateKernel clCreateKernel_fn +inline cl_kernel clCreateKernel(cl_program p0, const char* p1, cl_int* p2) { return clCreateKernel_pfn(p0, p1, p2); } +#undef clCreateKernelsInProgram +#define clCreateKernelsInProgram clCreateKernelsInProgram_fn +inline cl_int clCreateKernelsInProgram(cl_program p0, cl_uint p1, cl_kernel* p2, cl_uint* p3) { return clCreateKernelsInProgram_pfn(p0, p1, p2, p3); } +#undef clRetainKernel +#define clRetainKernel clRetainKernel_fn +inline cl_int clRetainKernel(cl_kernel p0) { return clRetainKernel_pfn(p0); } +#undef clReleaseKernel +#define clReleaseKernel clReleaseKernel_fn +inline cl_int clReleaseKernel(cl_kernel p0) { return clReleaseKernel_pfn(p0); } +#undef clSetKernelArg +#define clSetKernelArg clSetKernelArg_fn +inline cl_int clSetKernelArg(cl_kernel p0, cl_uint p1, size_t p2, const void* p3) { return clSetKernelArg_pfn(p0, p1, p2, p3); } +#undef clGetKernelInfo +#define clGetKernelInfo clGetKernelInfo_fn +inline cl_int clGetKernelInfo(cl_kernel p0, cl_kernel_info p1, size_t p2, void* p3, size_t* p4) { return clGetKernelInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetKernelArgInfo +#define clGetKernelArgInfo clGetKernelArgInfo_fn +inline cl_int clGetKernelArgInfo(cl_kernel p0, cl_uint p1, cl_kernel_arg_info p2, size_t p3, void* p4, size_t* p5) { return clGetKernelArgInfo_pfn(p0, p1, p2, p3, p4, p5); } +#undef clGetKernelWorkGroupInfo +#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_fn +inline cl_int clGetKernelWorkGroupInfo(cl_kernel p0, cl_device_id p1, cl_kernel_work_group_info p2, size_t p3, void* p4, size_t* p5) { return clGetKernelWorkGroupInfo_pfn(p0, p1, p2, p3, p4, p5); } +#undef clWaitForEvents +#define clWaitForEvents clWaitForEvents_fn +inline cl_int clWaitForEvents(cl_uint p0, const cl_event* p1) { return clWaitForEvents_pfn(p0, p1); } +#undef clGetEventInfo +#define clGetEventInfo clGetEventInfo_fn +inline cl_int clGetEventInfo(cl_event p0, cl_event_info p1, size_t p2, void* p3, size_t* p4) { return clGetEventInfo_pfn(p0, p1, p2, p3, p4); } +#undef clCreateUserEvent +#define clCreateUserEvent clCreateUserEvent_fn +inline cl_event clCreateUserEvent(cl_context p0, cl_int* p1) { return clCreateUserEvent_pfn(p0, p1); } +#undef clRetainEvent +#define clRetainEvent clRetainEvent_fn +inline cl_int clRetainEvent(cl_event p0) { return clRetainEvent_pfn(p0); } +#undef clReleaseEvent +#define clReleaseEvent clReleaseEvent_fn +inline cl_int clReleaseEvent(cl_event p0) { return clReleaseEvent_pfn(p0); } +#undef clSetUserEventStatus +#define clSetUserEventStatus clSetUserEventStatus_fn +inline cl_int clSetUserEventStatus(cl_event p0, cl_int p1) { return clSetUserEventStatus_pfn(p0, p1); } +#undef clSetEventCallback +#define clSetEventCallback clSetEventCallback_fn +inline cl_int clSetEventCallback(cl_event p0, cl_int p1, void (CL_CALLBACK*p2) (cl_event, cl_int, void*), void* p3) { return clSetEventCallback_pfn(p0, p1, p2, p3); } +#undef clGetEventProfilingInfo +#define clGetEventProfilingInfo clGetEventProfilingInfo_fn +inline cl_int clGetEventProfilingInfo(cl_event p0, cl_profiling_info p1, size_t p2, void* p3, size_t* p4) { return clGetEventProfilingInfo_pfn(p0, p1, p2, p3, p4); } +#undef clFlush +#define clFlush clFlush_fn +inline cl_int clFlush(cl_command_queue p0) { return clFlush_pfn(p0); } +#undef clFinish +#define clFinish clFinish_fn +inline cl_int clFinish(cl_command_queue p0) { return clFinish_pfn(p0); } +#undef clEnqueueReadBuffer +#define clEnqueueReadBuffer clEnqueueReadBuffer_fn +inline cl_int clEnqueueReadBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, size_t p3, size_t p4, void* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueReadBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueReadBufferRect +#define clEnqueueReadBufferRect clEnqueueReadBufferRect_fn +inline cl_int clEnqueueReadBufferRect(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, void* p10, cl_uint p11, const cl_event* p12, cl_event* p13) { return clEnqueueReadBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); } +#undef clEnqueueWriteBuffer +#define clEnqueueWriteBuffer clEnqueueWriteBuffer_fn +inline cl_int clEnqueueWriteBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, size_t p3, size_t p4, const void* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueWriteBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueWriteBufferRect +#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_fn +inline cl_int clEnqueueWriteBufferRect(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, const void* p10, cl_uint p11, const cl_event* p12, cl_event* p13) { return clEnqueueWriteBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); } +#undef clEnqueueFillBuffer +#define clEnqueueFillBuffer clEnqueueFillBuffer_fn +inline cl_int clEnqueueFillBuffer(cl_command_queue p0, cl_mem p1, const void* p2, size_t p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueFillBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueCopyBuffer +#define clEnqueueCopyBuffer clEnqueueCopyBuffer_fn +inline cl_int clEnqueueCopyBuffer(cl_command_queue p0, cl_mem p1, cl_mem p2, size_t p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueCopyBufferRect +#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_fn +inline cl_int clEnqueueCopyBufferRect(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, cl_uint p10, const cl_event* p11, cl_event* p12) { return clEnqueueCopyBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12); } +#undef clEnqueueReadImage +#define clEnqueueReadImage clEnqueueReadImage_fn +inline cl_int clEnqueueReadImage(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, size_t p5, size_t p6, void* p7, cl_uint p8, const cl_event* p9, cl_event* p10) { return clEnqueueReadImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); } +#undef clEnqueueWriteImage +#define clEnqueueWriteImage clEnqueueWriteImage_fn +inline cl_int clEnqueueWriteImage(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, size_t p5, size_t p6, const void* p7, cl_uint p8, const cl_event* p9, cl_event* p10) { return clEnqueueWriteImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); } +#undef clEnqueueFillImage +#define clEnqueueFillImage clEnqueueFillImage_fn +inline cl_int clEnqueueFillImage(cl_command_queue p0, cl_mem p1, const void* p2, const size_t* p3, const size_t* p4, cl_uint p5, const cl_event* p6, cl_event* p7) { return clEnqueueFillImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7); } +#undef clEnqueueCopyImage +#define clEnqueueCopyImage clEnqueueCopyImage_fn +inline cl_int clEnqueueCopyImage(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueCopyImageToBuffer +#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_fn +inline cl_int clEnqueueCopyImageToBuffer(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyImageToBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueCopyBufferToImage +#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_fn +inline cl_int clEnqueueCopyBufferToImage(cl_command_queue p0, cl_mem p1, cl_mem p2, size_t p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyBufferToImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueMapBuffer +#define clEnqueueMapBuffer clEnqueueMapBuffer_fn +inline void* clEnqueueMapBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, cl_map_flags p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8, cl_int* p9) { return clEnqueueMapBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } +#undef clEnqueueMapImage +#define clEnqueueMapImage clEnqueueMapImage_fn +inline void* clEnqueueMapImage(cl_command_queue p0, cl_mem p1, cl_bool p2, cl_map_flags p3, const size_t* p4, const size_t* p5, size_t* p6, size_t* p7, cl_uint p8, const cl_event* p9, cl_event* p10, cl_int* p11) { return clEnqueueMapImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11); } +#undef clEnqueueUnmapMemObject +#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_fn +inline cl_int clEnqueueUnmapMemObject(cl_command_queue p0, cl_mem p1, void* p2, cl_uint p3, const cl_event* p4, cl_event* p5) { return clEnqueueUnmapMemObject_pfn(p0, p1, p2, p3, p4, p5); } +#undef clEnqueueMigrateMemObjects +#define clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects_fn +inline cl_int clEnqueueMigrateMemObjects(cl_command_queue p0, cl_uint p1, const cl_mem* p2, cl_mem_migration_flags p3, cl_uint p4, const cl_event* p5, cl_event* p6) { return clEnqueueMigrateMemObjects_pfn(p0, p1, p2, p3, p4, p5, p6); } +#undef clEnqueueNDRangeKernel +#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_fn +inline cl_int clEnqueueNDRangeKernel(cl_command_queue p0, cl_kernel p1, cl_uint p2, const size_t* p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueNDRangeKernel_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueTask +#define clEnqueueTask clEnqueueTask_fn +inline cl_int clEnqueueTask(cl_command_queue p0, cl_kernel p1, cl_uint p2, const cl_event* p3, cl_event* p4) { return clEnqueueTask_pfn(p0, p1, p2, p3, p4); } +#undef clEnqueueNativeKernel +#define clEnqueueNativeKernel clEnqueueNativeKernel_fn +inline cl_int clEnqueueNativeKernel(cl_command_queue p0, void (CL_CALLBACK*p1) (void*), void* p2, size_t p3, cl_uint p4, const cl_mem* p5, const void** p6, cl_uint p7, const cl_event* p8, cl_event* p9) { return clEnqueueNativeKernel_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } +#undef clEnqueueMarkerWithWaitList +#define clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList_fn +inline cl_int clEnqueueMarkerWithWaitList(cl_command_queue p0, cl_uint p1, const cl_event* p2, cl_event* p3) { return clEnqueueMarkerWithWaitList_pfn(p0, p1, p2, p3); } +#undef clEnqueueBarrierWithWaitList +#define clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList_fn +inline cl_int clEnqueueBarrierWithWaitList(cl_command_queue p0, cl_uint p1, const cl_event* p2, cl_event* p3) { return clEnqueueBarrierWithWaitList_pfn(p0, p1, p2, p3); } +#undef clGetExtensionFunctionAddressForPlatform +#define clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform_fn +inline void* clGetExtensionFunctionAddressForPlatform(cl_platform_id p0, const char* p1) { return clGetExtensionFunctionAddressForPlatform_pfn(p0, p1); } +#undef clCreateImage2D +#define clCreateImage2D clCreateImage2D_fn +inline cl_mem clCreateImage2D(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, size_t p3, size_t p4, size_t p5, void* p6, cl_int* p7) { return clCreateImage2D_pfn(p0, p1, p2, p3, p4, p5, p6, p7); } +#undef clCreateImage3D +#define clCreateImage3D clCreateImage3D_fn +inline cl_mem clCreateImage3D(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, size_t p3, size_t p4, size_t p5, size_t p6, size_t p7, void* p8, cl_int* p9) { return clCreateImage3D_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } +#undef clEnqueueMarker +#define clEnqueueMarker clEnqueueMarker_fn +inline cl_int clEnqueueMarker(cl_command_queue p0, cl_event* p1) { return clEnqueueMarker_pfn(p0, p1); } +#undef clEnqueueWaitForEvents +#define clEnqueueWaitForEvents clEnqueueWaitForEvents_fn +inline cl_int clEnqueueWaitForEvents(cl_command_queue p0, cl_uint p1, const cl_event* p2) { return clEnqueueWaitForEvents_pfn(p0, p1, p2); } +#undef clEnqueueBarrier +#define clEnqueueBarrier clEnqueueBarrier_fn +inline cl_int clEnqueueBarrier(cl_command_queue p0) { return clEnqueueBarrier_pfn(p0); } +#undef clUnloadCompiler +#define clUnloadCompiler clUnloadCompiler_fn +inline cl_int clUnloadCompiler() { return clUnloadCompiler_pfn(); } +#undef clGetExtensionFunctionAddress +#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_fn +inline void* clGetExtensionFunctionAddress(const char* p0) { return clGetExtensionFunctionAddress_pfn(p0); } + +#endif // __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__ \ No newline at end of file diff --git a/modules/ocl/src/cl_context.cpp b/modules/ocl/src/cl_context.cpp index e24cc8b358..01785eaa26 100644 --- a/modules/ocl/src/cl_context.cpp +++ b/modules/ocl/src/cl_context.cpp @@ -50,6 +50,15 @@ #include #include "cl_programcache.hpp" +// workaround for OpenCL C++ bindings +#if defined(HAVE_OPENCL12) +#include "opencv2/ocl/cl_runtime/cl_runtime_opencl12_wrappers.hpp" +#elif defined(HAVE_OPENCL11) +#include "opencv2/ocl/cl_runtime/cl_runtime_opencl11_wrappers.hpp" +#else +#error Invalid OpenCL configuration +#endif + #if defined _MSC_VER && _MSC_VER >= 1200 #pragma warning( disable: 4100 4101 4127 4244 4267 4510 4512 4610) #endif diff --git a/modules/ocl/src/cl_programcache.cpp b/modules/ocl/src/cl_programcache.cpp index 7c58e7c489..a34f828557 100644 --- a/modules/ocl/src/cl_programcache.cpp +++ b/modules/ocl/src/cl_programcache.cpp @@ -50,6 +50,15 @@ #include #include "cl_programcache.hpp" +// workaround for OpenCL C++ bindings +#if defined(HAVE_OPENCL12) +#include "opencv2/ocl/cl_runtime/cl_runtime_opencl12_wrappers.hpp" +#elif defined(HAVE_OPENCL11) +#include "opencv2/ocl/cl_runtime/cl_runtime_opencl11_wrappers.hpp" +#else +#error Invalid OpenCL configuration +#endif + #if defined _MSC_VER && _MSC_VER >= 1200 # pragma warning( disable: 4100 4244 4267 4510 4512 4610) #endif diff --git a/modules/ocl/src/cl_runtime/generator/common.py b/modules/ocl/src/cl_runtime/generator/common.py index 99a56096b0..19b21454c8 100644 --- a/modules/ocl/src/cl_runtime/generator/common.py +++ b/modules/ocl/src/cl_runtime/generator/common.py @@ -182,6 +182,29 @@ def generateTemplates(sz, lprefix, switch_name, calling_convention=''): print '};' print '' +@outputToString +def generateInlineWrappers(fns): + print '// generated by %s' % os.path.basename(sys.argv[0]) + for fn in fns: + print '#undef %s' % (fn['name']) + print '#define %s %s_fn' % (fn['name'], fn['name']) + params = [] + call_params = [] + for i in range(0, len(fn['params'])): + t = fn['params'][i] + if t.find('*)') >= 0: + p = re.sub(r'\*\)', (' *p%d)' % i), t, 1) + params.append(p) + else: + params.append('%s p%d' % (t, i)) + call_params.append('p%d' % (i)) + + if len(fn['ret']) == 1 and fn['ret'][0] == 'void': + print 'inline void %s(%s) { %s_pfn(%s); }' \ + % (fn['name'], ', '.join(params), fn['name'], ', '.join(call_params)) + else: + print 'inline %s %s(%s) { return %s_pfn(%s); }' \ + % (' '.join(fn['ret']), fn['name'], ', '.join(params), fn['name'], ', '.join(call_params)) def ProcessTemplate(inputFile, ctx, noteLine='//\n// AUTOGENERATED, DO NOT EDIT\n//'): f = open(inputFile, "r") diff --git a/modules/ocl/src/cl_runtime/generator/parser_cl.py b/modules/ocl/src/cl_runtime/generator/parser_cl.py index e711e4cf09..608b826183 100644 --- a/modules/ocl/src/cl_runtime/generator/parser_cl.py +++ b/modules/ocl/src/cl_runtime/generator/parser_cl.py @@ -10,6 +10,7 @@ try: if len(sys.argv) > 1: outfile = open('../../../include/opencv2/ocl/cl_runtime/' + sys.argv[1] + '.hpp', "w") outfile_impl = open('../' + sys.argv[1] + '_impl.hpp', "w") + outfile_wrappers = open('../../../include/opencv2/ocl/cl_runtime/' + sys.argv[1] + '_wrappers.hpp', "w") if len(sys.argv) > 2: f = open(sys.argv[2], "r") else: @@ -102,6 +103,11 @@ ctx['CL_FN_DECLARATIONS'] = generateFnDeclaration(fns) sys.stdout = outfile ProcessTemplate('template/cl_runtime_opencl.hpp.in', ctx) +ctx['CL_FN_INLINE_WRAPPERS'] = generateInlineWrappers(fns) + +sys.stdout = outfile_wrappers +ProcessTemplate('template/cl_runtime_opencl_wrappers.hpp.in', ctx) + ctx['CL_FN_ENUMS'] = generateEnums(fns) ctx['CL_FN_NAMES'] = generateNames(fns) ctx['CL_FN_DEFINITIONS'] = generateFnDefinition(fns) diff --git a/modules/ocl/src/cl_runtime/generator/template/cl_runtime_opencl_wrappers.hpp.in b/modules/ocl/src/cl_runtime/generator/template/cl_runtime_opencl_wrappers.hpp.in new file mode 100644 index 0000000000..d02d4c5ff2 --- /dev/null +++ b/modules/ocl/src/cl_runtime/generator/template/cl_runtime_opencl_wrappers.hpp.in @@ -0,0 +1,6 @@ +#ifndef __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__ +#define __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__ + +@CL_FN_INLINE_WRAPPERS@ + +#endif // __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__ \ No newline at end of file From 69c2ef5ed21255c2e67b143b3adc9500a87a1119 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 3 Oct 2013 19:35:01 +0400 Subject: [PATCH 30/39] ocl: update ocl samples --- samples/gpu/super_resolution.cpp | 8 -------- samples/ocl/adaptive_bilateral_filter.cpp | 5 +---- samples/ocl/bgfg_segm.cpp | 5 +---- samples/ocl/clahe.cpp | 3 --- samples/ocl/facedetect.cpp | 12 +----------- samples/ocl/hog.cpp | 2 -- samples/ocl/pyrlk_optical_flow.cpp | 9 +-------- samples/ocl/squares.cpp | 4 +--- samples/ocl/stereo_match.cpp | 14 ++------------ samples/ocl/surf_matcher.cpp | 12 ++---------- samples/ocl/tvl1_optical_flow.cpp | 9 +-------- 11 files changed, 10 insertions(+), 73 deletions(-) diff --git a/samples/gpu/super_resolution.cpp b/samples/gpu/super_resolution.cpp index 07dda775b4..435e711a1a 100644 --- a/samples/gpu/super_resolution.cpp +++ b/samples/gpu/super_resolution.cpp @@ -132,17 +132,9 @@ int main(int argc, const char* argv[]) } #endif #if defined(HAVE_OPENCV_OCL) - std::vectorinfo; if(useCuda) { CV_Assert(!useOcl); - info.clear(); - } - - if(useOcl) - { - CV_Assert(!useCuda); - cv::ocl::getDevice(info); } #endif Ptr superRes; diff --git a/samples/ocl/adaptive_bilateral_filter.cpp b/samples/ocl/adaptive_bilateral_filter.cpp index df226b195d..d3d2521df9 100644 --- a/samples/ocl/adaptive_bilateral_filter.cpp +++ b/samples/ocl/adaptive_bilateral_filter.cpp @@ -25,9 +25,6 @@ int main( int argc, const char** argv ) return -1; } - std::vector infos; - ocl::getDevice(infos); - ocl::oclMat dsrc(src), dABFilter, dBFilter; Size ksize(ks, ks); @@ -48,4 +45,4 @@ int main( int argc, const char** argv ) waitKey(); return 0; -} \ No newline at end of file +} diff --git a/samples/ocl/bgfg_segm.cpp b/samples/ocl/bgfg_segm.cpp index 410f346936..589a34914f 100644 --- a/samples/ocl/bgfg_segm.cpp +++ b/samples/ocl/bgfg_segm.cpp @@ -24,7 +24,7 @@ int main(int argc, const char** argv) if (cmd.get("help")) { cout << "Usage : bgfg_segm [options]" << endl; - cout << "Avaible options:" << endl; + cout << "Available options:" << endl; cmd.printParams(); return 0; } @@ -54,9 +54,6 @@ int main(int argc, const char** argv) return -1; } - std::vectorinfo; - cv::ocl::getDevice(info); - Mat frame; cap >> frame; diff --git a/samples/ocl/clahe.cpp b/samples/ocl/clahe.cpp index 1fbf49fac2..5dc20756b4 100644 --- a/samples/ocl/clahe.cpp +++ b/samples/ocl/clahe.cpp @@ -45,9 +45,6 @@ int main(int argc, char** argv) createTrackbar("Tile Size", "CLAHE", &tilesize, 32, (TrackbarCallback)TSize_Callback); createTrackbar("Clip Limit", "CLAHE", &cliplimit, 20, (TrackbarCallback)Clip_Callback); - vector info; - CV_Assert(ocl::getDevice(info)); - Mat frame, outframe; ocl::oclMat d_outframe; diff --git a/samples/ocl/facedetect.cpp b/samples/ocl/facedetect.cpp index 711e257e78..be61b79e44 100644 --- a/samples/ocl/facedetect.cpp +++ b/samples/ocl/facedetect.cpp @@ -72,7 +72,7 @@ int main( int argc, const char** argv ) CommandLineParser cmd(argc, argv, keys); if (cmd.get("help")) { - cout << "Avaible options:" << endl; + cout << "Available options:" << endl; cmd.printParams(); return 0; } @@ -120,16 +120,6 @@ int main( int argc, const char** argv ) cvNamedWindow( "result", 1 ); - vector oclinfo; - int devnums = ocl::getDevice(oclinfo); - if( devnums < 1 ) - { - std::cout << "no device found\n"; - return -1; - } - //if you want to use undefault device, set it here - //setDevice(oclinfo[0]); - ocl::setBinpath("./"); if( capture ) { cout << "In capture ..." << endl; diff --git a/samples/ocl/hog.cpp b/samples/ocl/hog.cpp index ec88c14d23..89c8dff828 100644 --- a/samples/ocl/hog.cpp +++ b/samples/ocl/hog.cpp @@ -135,8 +135,6 @@ App::App(CommandLineParser& cmd) void App::run() { - vector oclinfo; - ocl::getDevice(oclinfo); running = true; VideoWriter video_writer; diff --git a/samples/ocl/pyrlk_optical_flow.cpp b/samples/ocl/pyrlk_optical_flow.cpp index cefa928670..5a59803798 100644 --- a/samples/ocl/pyrlk_optical_flow.cpp +++ b/samples/ocl/pyrlk_optical_flow.cpp @@ -86,13 +86,6 @@ static void drawArrows(Mat& frame, const vector& prevPts, const vector< int main(int argc, const char* argv[]) { - static std::vector ocl_info; - ocl::getDevice(ocl_info); - //if you want to use undefault device, set it here - setDevice(ocl_info[0]); - - //set this to save kernel compile time from second time you run - ocl::setBinpath("./"); const char* keys = "{ h | help | false | print help message }" "{ l | left | | specify left image }" @@ -109,7 +102,7 @@ int main(int argc, const char* argv[]) if (cmd.get("help")) { cout << "Usage: pyrlk_optical_flow [options]" << endl; - cout << "Avaible options:" << endl; + cout << "Available options:" << endl; cmd.printParams(); return 0; } diff --git a/samples/ocl/squares.cpp b/samples/ocl/squares.cpp index 48964ffb2e..9e709245da 100644 --- a/samples/ocl/squares.cpp +++ b/samples/ocl/squares.cpp @@ -284,13 +284,11 @@ int main(int argc, char** argv) string outfile = cmd.get("o"); if(inputName.empty()) { - cout << "Avaible options:" << endl; + cout << "Available options:" << endl; cmd.printParams(); return 0; } - vector info; - CV_Assert(ocl::getDevice(info)); int iterations = 10; namedWindow( wndname, 1 ); vector > squares_cpu, squares_ocl; diff --git a/samples/ocl/stereo_match.cpp b/samples/ocl/stereo_match.cpp index 38dda8a94b..86d60d49be 100644 --- a/samples/ocl/stereo_match.cpp +++ b/samples/ocl/stereo_match.cpp @@ -77,28 +77,18 @@ int main(int argc, char** argv) "{ r | right | | specify right image }" "{ m | method | BM | specify match method(BM/BP/CSBP) }" "{ n | ndisp | 64 | specify number of disparity levels }" - "{ s | cpu_ocl | false | use cpu or gpu as ocl device to process the image }" "{ o | output | stereo_match_output.jpg | specify output path when input is images}"; CommandLineParser cmd(argc, argv, keys); if (cmd.get("help")) { - cout << "Avaible options:" << endl; + cout << "Available options:" << endl; cmd.printParams(); return 0; } try { App app(cmd); - int flag = CVCL_DEVICE_TYPE_GPU; - if(cmd.get("s") == true) - flag = CVCL_DEVICE_TYPE_CPU; - - vector info; - if(getDevice(info, flag) == 0) - { - throw runtime_error("Error: Did not find a valid OpenCL device!"); - } - cout << "Device name:" << info[0].DeviceName[0] << endl; + cout << "Device name:" << cv::ocl::Context::getContext()->getDeviceInfo().deviceName << endl; app.run(); } diff --git a/samples/ocl/surf_matcher.cpp b/samples/ocl/surf_matcher.cpp index bee517fbca..4d73323059 100644 --- a/samples/ocl/surf_matcher.cpp +++ b/samples/ocl/surf_matcher.cpp @@ -145,19 +145,11 @@ int main(int argc, char* argv[]) CommandLineParser cmd(argc, argv, keys); if (cmd.get("help")) { - std::cout << "Avaible options:" << std::endl; + std::cout << "Available options:" << std::endl; cmd.printParams(); return 0; } - vector info; - if(cv::ocl::getDevice(info) == 0) - { - std::cout << "Error: Did not find a valid OpenCL device!" << std::endl; - return -1; - } - ocl::setDevice(info[0]); - Mat cpu_img1, cpu_img2, cpu_img1_grey, cpu_img2_grey; oclMat img1, img2; bool useCPU = cmd.get("c"); @@ -190,7 +182,7 @@ int main(int argc, char* argv[]) { std::cout << "Device name:" - << info[0].DeviceName[0] + << cv::ocl::Context::getContext()->getDeviceInfo().deviceName << std::endl; } double surf_time = 0.; diff --git a/samples/ocl/tvl1_optical_flow.cpp b/samples/ocl/tvl1_optical_flow.cpp index 315970b7a3..296dc69338 100644 --- a/samples/ocl/tvl1_optical_flow.cpp +++ b/samples/ocl/tvl1_optical_flow.cpp @@ -80,13 +80,6 @@ static void getFlowField(const Mat& u, const Mat& v, Mat& flowField) int main(int argc, const char* argv[]) { - static std::vector ocl_info; - ocl::getDevice(ocl_info); - //if you want to use undefault device, set it here - setDevice(ocl_info[0]); - - //set this to save kernel compile time from second time you run - ocl::setBinpath("./"); const char* keys = "{ h | help | false | print help message }" "{ l | left | | specify left image }" @@ -101,7 +94,7 @@ int main(int argc, const char* argv[]) if (cmd.get("help")) { cout << "Usage: pyrlk_optical_flow [options]" << endl; - cout << "Avaible options:" << endl; + cout << "Available options:" << endl; cmd.printParams(); return 0; } From d74ca7b39d6a8f60eeb4a71d34d7c5906a07de3e Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 3 Oct 2013 20:00:41 +0400 Subject: [PATCH 31/39] ocl: update module documentation --- .../doc/structures_and_utility_functions.rst | 50 +++++++------------ 1 file changed, 17 insertions(+), 33 deletions(-) diff --git a/modules/ocl/doc/structures_and_utility_functions.rst b/modules/ocl/doc/structures_and_utility_functions.rst index c3c93ecbfe..aec3f70bf4 100644 --- a/modules/ocl/doc/structures_and_utility_functions.rst +++ b/modules/ocl/doc/structures_and_utility_functions.rst @@ -3,56 +3,40 @@ Data Structures and Utility Functions .. highlight:: cpp -ocl::Info -------------- -.. ocv:class:: ocl::Info +ocl::getOpenCLPlatforms +----------------------- +Returns the list of OpenCL platforms -this class should be maintained by the user and be passed to getDevice +.. ocv:function:: int ocl::getOpenCLPlatforms( PlatformsInfo& platforms ) -ocl::getDevice ------------------- + :param platforms: Output variable + +ocl::getOpenCLDevices +--------------------- Returns the list of devices -.. ocv:function:: int ocl::getDevice( std::vector & oclinfo, int devicetype=CVCL_DEVICE_TYPE_GPU ) +.. ocv:function:: int ocl::getOpenCLDevices( DevicesInfo& devices, int deviceType = CVCL_DEVICE_TYPE_GPU, const PlatformInfo* platform = NULL ) - :param oclinfo: Output vector of ``ocl::Info`` structures + :param devices: Output variable - :param devicetype: One of ``CVCL_DEVICE_TYPE_GPU``, ``CVCL_DEVICE_TYPE_CPU`` or ``CVCL_DEVICE_TYPE_DEFAULT``. + :param deviceType: Bitmask of ``CVCL_DEVICE_TYPE_GPU``, ``CVCL_DEVICE_TYPE_CPU`` or ``CVCL_DEVICE_TYPE_DEFAULT``. -the function must be called before any other ``cv::ocl`` functions; it initializes ocl runtime. + :param platform: Specifies preferrable platform ocl::setDevice ------------------- +-------------- Returns void -.. ocv:function:: void ocl::setDevice( Info &oclinfo, int devnum = 0 ) +.. ocv:function:: void ocl::setDevice( const DeviceInfo* info ) - :param oclinfo: Output vector of ``ocl::Info`` structures + :param info: device info - :param devnum: the selected OpenCL device under this platform. - -ocl::setBinpath +ocl::setBinaryPath ------------------ Returns void -.. ocv:function:: void ocl::setBinpath(const char *path) +.. ocv:function:: void ocl::setBinaryPath(const char *path) :param path: the path of OpenCL kernel binaries If you call this function and set a valid path, the OCL module will save the compiled kernel to the address in the first time and reload the binary since that. It can save compilation time at the runtime. - -ocl::getoclContext ----------------------- -Returns the pointer to the opencl context - -.. ocv:function:: void* ocl::getoclContext() - -Thefunction are used to get opencl context so that opencv can interactive with other opencl program. - -ocl::getoclCommandQueue --------------------------- -Returns the pointer to the opencl command queue - -.. ocv:function:: void* ocl::getoclCommandQueue() - -Thefunction are used to get opencl command queue so that opencv can interactive with other opencl program. From 5f81988699035ea16d1cdc7eb6367aaec520d0ee Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Thu, 3 Oct 2013 20:04:04 +0400 Subject: [PATCH 32/39] refactored arithm binary operations in order to make them more scalable --- modules/ocl/src/arithm.cpp | 16 +-- modules/ocl/src/matrix_operations.cpp | 34 +++--- modules/ocl/src/opencl/arithm_add.cl | 115 +++++++----------- modules/ocl/src/opencl/arithm_add_mask.cl | 20 ++- modules/ocl/src/opencl/arithm_add_scalar.cl | 63 ++++------ .../ocl/src/opencl/arithm_add_scalar_mask.cl | 20 ++- modules/ocl/test/test_arithm.cpp | 2 +- 7 files changed, 135 insertions(+), 135 deletions(-) diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index f34e0f730f..09d250ae3d 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -89,11 +89,11 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const size_t localThreads[3] = { 16, 16, 1 }; size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; - std::string kernelName = op_type == ABS_DIFF ? "arithm_absdiff" : "arithm_binary_op"; + std::string kernelName = "arithm_binary_op"; const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; const char * const WTypeMap[] = { "short", "short", "int", "int", "int", "float", "double" }; - const char operationsMap[] = { '+', '-', '*', '/', '-' }; + const char * const funcMap[] = { "FUNC_ADD", "FUNC_SUB", "FUNC_MUL", "FUNC_DIV", "FUNC_ABS_DIFF" }; const char * const channelMap[] = { "", "", "2", "4", "4" }; bool haveScalar = use_scalar || src2.empty(); @@ -105,12 +105,12 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const else if (op_type == MUL) WDepth = hasDouble && (depth == CV_32S || depth == CV_64F) ? CV_64F : CV_32F; - std::string buildOptions = format("-D T=%s%s -D WT=%s%s -D convertToT=convert_%s%s%s -D Operation=%c" - " -D convertToWT=convert_%s%s", + std::string buildOptions = format("-D T=%s%s -D WT=%s%s -D convertToT=convert_%s%s%s -D %s " + "-D convertToWT=convert_%s%s", typeMap[depth], channelMap[oclChannels], WTypeMap[WDepth], channelMap[oclChannels], typeMap[depth], channelMap[oclChannels], (depth >= CV_32F ? "" : (depth == CV_32S ? "_rte" : "_sat_rte")), - operationsMap[op_type], WTypeMap[WDepth], channelMap[oclChannels]); + funcMap[op_type], WTypeMap[WDepth], channelMap[oclChannels]); vector > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); @@ -124,6 +124,9 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const args.push_back( make_pair( sizeof(cl_int), (void *)&src2offset1 )); kernelName += "_mat"; + + if (haveScalar) + buildOptions += " -D HAVE_SCALAR"; } if (haveScalar) @@ -146,9 +149,6 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const kernelName += "_mask"; } - if (op_type == DIV) - kernelName += "_div"; - args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); args.push_back( make_pair( sizeof(cl_int), (void *)&dststep1 )); args.push_back( make_pair( sizeof(cl_int), (void *)&dstoffset1 )); diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp index 80b2f7d81c..3b0e41731a 100644 --- a/modules/ocl/src/matrix_operations.cpp +++ b/modules/ocl/src/matrix_operations.cpp @@ -366,23 +366,23 @@ static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, stri #ifdef CL_VERSION_1_2 // this enables backwards portability to // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support - if (Context::getContext()->supportsFeature(FEATURE_CL_VER_1_2) && - dst.offset == 0 && dst.cols == dst.wholecols) - { - const int sizeofMap[][7] = - { - { sizeof(cl_uchar) , sizeof(cl_char) , sizeof(cl_ushort) , sizeof(cl_short) , sizeof(cl_int) , sizeof(cl_float) , sizeof(cl_double) }, - { sizeof(cl_uchar2), sizeof(cl_char2), sizeof(cl_ushort2), sizeof(cl_short2), sizeof(cl_int2), sizeof(cl_float2), sizeof(cl_double2) }, - { 0 , 0 , 0 , 0 , 0 , 0 , 0 }, - { sizeof(cl_uchar4), sizeof(cl_char4), sizeof(cl_ushort4), sizeof(cl_short4), sizeof(cl_int4), sizeof(cl_float4), sizeof(cl_double4) }, - }; - int sizeofGeneric = sizeofMap[dst.oclchannels() - 1][dst.depth()]; - - clEnqueueFillBuffer(getClCommandQueue(dst.clCxt), - (cl_mem)dst.data, (void*)mat.data, sizeofGeneric, - 0, dst.step * dst.rows, 0, NULL, NULL); - } - else +// if (Context::getContext()->supportsFeature(Context::CL_VER_1_2) && +// dst.offset == 0 && dst.cols == dst.wholecols) +// { +// const int sizeofMap[][7] = +// { +// { sizeof(cl_uchar) , sizeof(cl_char) , sizeof(cl_ushort) , sizeof(cl_short) , sizeof(cl_int) , sizeof(cl_float) , sizeof(cl_double) }, +// { sizeof(cl_uchar2), sizeof(cl_char2), sizeof(cl_ushort2), sizeof(cl_short2), sizeof(cl_int2), sizeof(cl_float2), sizeof(cl_double2) }, +// { 0 , 0 , 0 , 0 , 0 , 0 , 0 }, +// { sizeof(cl_uchar4), sizeof(cl_char4), sizeof(cl_ushort4), sizeof(cl_short4), sizeof(cl_int4), sizeof(cl_float4), sizeof(cl_double4) }, +// }; +// int sizeofGeneric = sizeofMap[dst.oclchannels() - 1][dst.depth()]; + +// clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(), +// (cl_mem)dst.data, (void*)mat.data, sizeofGeneric, +// 0, dst.step * dst.rows, 0, NULL, NULL); +// } +// else #endif { oclMat m(mat); diff --git a/modules/ocl/src/opencl/arithm_add.cl b/modules/ocl/src/opencl/arithm_add.cl index 38834e7660..40caba5a9e 100644 --- a/modules/ocl/src/opencl/arithm_add.cl +++ b/modules/ocl/src/opencl/arithm_add.cl @@ -52,51 +52,50 @@ #endif #endif -////////////////////////////////////////////////////////////////////////////////////////////////////// -///////////////////////////////////////////// ADD //////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////////////////////////////////////////// +#if defined (FUNC_ADD) +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) + convertToWT(src2[src2_index])); +#endif -__kernel void arithm_binary_op_mat(__global T *src1, int src1_step, int src1_offset, - __global T *src2, int src2_step, int src2_offset, - __global T *dst, int dst_step, int dst_offset, - int cols, int rows) -{ - int x = get_global_id(0); - int y = get_global_id(1); +#if defined (FUNC_SUB) +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) - convertToWT(src2[src2_index])); +#endif - if (x < cols && y < rows) - { - int src1_index = mad24(y, src1_step, x + src1_offset); - int src2_index = mad24(y, src2_step, x + src2_offset); - int dst_index = mad24(y, dst_step, x + dst_offset); +#if defined (FUNC_MUL) +#if defined (HAVE_SCALAR) +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * scalar[0] * convertToWT(src2[src2_index])); +#else +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * convertToWT(src2[src2_index])); +#endif +#endif - dst[dst_index] = convertToT(convertToWT(src1[src1_index]) Operation convertToWT(src2[src2_index])); - } -} +#if defined (FUNC_DIV) +#if defined (HAVE_SCALAR) +#define EXPRESSION T zero = (T)(0); \ + dst[dst_index] = src2[src2_index] == zero ? zero : \ + convertToT(convertToWT(src1[src1_index]) * scalar[0] / convertToWT(src2[src2_index])); +#else +#define EXPRESSION T zero = (T)(0); \ + dst[dst_index] = src2[src2_index] == zero ? zero : \ + convertToT(convertToWT(src1[src1_index]) / convertToWT(src2[src2_index])); +#endif +#endif -__kernel void arithm_binary_op_mat_div(__global T *src1, int src1_step, int src1_offset, - __global T *src2, int src2_step, int src2_offset, - __global T *dst, int dst_step, int dst_offset, - int cols, int rows) -{ - int x = get_global_id(0); - int y = get_global_id(1); +#if defined (FUNC_ABS_DIFF) +#define EXPRESSION WT value = convertToWT(src1[src1_index]) - convertToWT(src2[src2_index]); \ + value = value > (WT)(0) ? value : -value; \ + dst[dst_index] = convertToT(value); +#endif - if (x < cols && y < rows) - { - int src1_index = mad24(y, src1_step, x + src1_offset); - int src2_index = mad24(y, src2_step, x + src2_offset); - int dst_index = mad24(y, dst_step, x + dst_offset); +////////////////////////////////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////// ADD //////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////////////////////// - T zero = (T)(0); - dst[dst_index] = src2[src2_index] == zero ? zero : convertToT(convertToWT(src1[src1_index]) / convertToWT(src2[src2_index])); - } -} +#ifndef HAVE_SCALAR -__kernel void arithm_absdiff_mat(__global T *src1, int src1_step, int src1_offset, - __global T *src2, int src2_step, int src2_offset, - __global T *dst, int dst_step, int dst_offset, - int cols, int rows) +__kernel void arithm_binary_op_mat(__global T *src1, int src1_step, int src1_offset, + __global T *src2, int src2_step, int src2_offset, + __global T *dst, int dst_step, int dst_offset, + int cols, int rows) { int x = get_global_id(0); int y = get_global_id(1); @@ -107,18 +106,18 @@ __kernel void arithm_absdiff_mat(__global T *src1, int src1_step, int src1_offse int src2_index = mad24(y, src2_step, x + src2_offset); int dst_index = mad24(y, dst_step, x + dst_offset); - WT value = convertToWT(src1[src1_index]) - convertToWT(src2[src2_index]); - value = value > (WT)(0) ? value : -value; - dst[dst_index] = convertToT(value); + EXPRESSION } } -// add mat with scale for multiply +#else + +// add mat with scale __kernel void arithm_binary_op_mat_scalar(__global T *src1, int src1_step, int src1_offset, - __global T *src2, int src2_step, int src2_offset, - __global WT *scalar, - __global T *dst, int dst_step, int dst_offset, - int cols, int rows) + __global T *src2, int src2_step, int src2_offset, + __global WT *scalar, + __global T *dst, int dst_step, int dst_offset, + int cols, int rows) { int x = get_global_id(0); int y = get_global_id(1); @@ -129,28 +128,8 @@ __kernel void arithm_binary_op_mat_scalar(__global T *src1, int src1_step, int s int src2_index = mad24(y, src2_step, x + src2_offset); int dst_index = mad24(y, dst_step, x + dst_offset); - dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * scalar[0] * convertToWT(src2[src2_index])); + EXPRESSION } } -// add mat with scale for divide -__kernel void arithm_binary_op_mat_scalar_div(__global T *src1, int src1_step, int src1_offset, - __global T *src2, int src2_step, int src2_offset, - __global WT *scalar, - __global T *dst, int dst_step, int dst_offset, - int cols, int rows) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - int src1_index = mad24(y, src1_step, x + src1_offset); - int src2_index = mad24(y, src2_step, x + src2_offset); - int dst_index = mad24(y, dst_step, x + dst_offset); - - T zero = (T)(0); - dst[dst_index] = src2[src2_index] == zero ? zero : - convertToT(convertToWT(src1[src1_index]) * scalar[0] / convertToWT(src2[src2_index])); - } -} +#endif diff --git a/modules/ocl/src/opencl/arithm_add_mask.cl b/modules/ocl/src/opencl/arithm_add_mask.cl index 52dbfc455c..c3958bf1fb 100644 --- a/modules/ocl/src/opencl/arithm_add_mask.cl +++ b/modules/ocl/src/opencl/arithm_add_mask.cl @@ -51,6 +51,24 @@ #endif #endif +#if defined (FUNC_ADD) +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) + convertToWT(src2[src2_index])); +#endif + +#if defined (FUNC_SUB) +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) - convertToWT(src2[src2_index])); +#endif + +#if defined (FUNC_MUL) +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * convertToWT(src2[src2_index])); +#endif + +#if defined (FUNC_DIV) +#define EXPRESSION T zero = (T)(0); \ + dst[dst_index] = src2[src2_index] == zero ? zero : \ + convertToT(convertToWT(src1[src1_index]) / convertToWT(src2[src2_index])); +#endif + ////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////// add with mask ////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////// @@ -73,7 +91,7 @@ __kernel void arithm_binary_op_mat_mask(__global T * src1, int src1_step, int sr int src2_index = mad24(y, src2_step, x + src2_offset); int dst_index = mad24(y, dst_step, dst_offset + x); - dst[dst_index] = convertToT(convertToWT(src1[src1_index]) Operation convertToWT(src2[src2_index])); + EXPRESSION } } } diff --git a/modules/ocl/src/opencl/arithm_add_scalar.cl b/modules/ocl/src/opencl/arithm_add_scalar.cl index 4e0c7fc5fa..4a0167fd55 100644 --- a/modules/ocl/src/opencl/arithm_add_scalar.cl +++ b/modules/ocl/src/opencl/arithm_add_scalar.cl @@ -51,6 +51,29 @@ #endif #endif +#if defined (FUNC_ADD) +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) + scalar[0]); +#endif + +#if defined (FUNC_SUB) +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) - scalar[0]); +#endif + +#if defined (FUNC_MUL) +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * scalar[0]); +#endif + +#if defined (FUNC_DIV) +#define EXPRESSION T zero = (T)(0); \ + dst[dst_index] = src1[src1_index] == zero ? zero : convertToT(scalar[0] / convertToWT(src1[src1_index])); +#endif + +#if defined (FUNC_ABS_DIFF) +#define EXPRESSION WT value = convertToWT(src1[src1_index]) - scalar[0]; \ + value = value > (WT)(0) ? value : -value; \ + dst[dst_index] = convertToT(value); +#endif + /////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////// Add with scalar ///////////////////////////////// /////////////////////////////////////////////////////////////////////////////////// @@ -68,44 +91,6 @@ __kernel void arithm_binary_op_scalar (__global T *src1, int src1_step, int src1 int src1_index = mad24(y, src1_step, x + src1_offset); int dst_index = mad24(y, dst_step, x + dst_offset); - dst[dst_index] = convertToT(convertToWT(src1[src1_index]) Operation scalar[0]); - } -} - -__kernel void arithm_absdiff_scalar(__global T *src1, int src1_step, int src1_offset, - __global WT *src2, - __global T *dst, int dst_step, int dst_offset, - int cols, int rows) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - int src1_index = mad24(y, src1_step, x + src1_offset); - int dst_index = mad24(y, dst_step, x + dst_offset); - - WT value = convertToWT(src1[src1_index]) - src2[0]; - value = value > (WT)(0) ? value : -value; - dst[dst_index] = convertToT(value); - } -} - -// scalar divide to matrix -__kernel void arithm_binary_op_scalar_div(__global T *src1, int src1_step, int src1_offset, - __global WT *scalar, - __global T *dst, int dst_step, int dst_offset, - int cols, int rows) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - int src1_index = mad24(y, src1_step, x + src1_offset); - int dst_index = mad24(y, dst_step, x + dst_offset); - - T zero = (T)(0); - dst[dst_index] = src1[src1_index] == zero ? zero : convertToT(scalar[0] / convertToWT(src1[src1_index])); + EXPRESSION } } diff --git a/modules/ocl/src/opencl/arithm_add_scalar_mask.cl b/modules/ocl/src/opencl/arithm_add_scalar_mask.cl index 5c34080346..d472b3cbfb 100644 --- a/modules/ocl/src/opencl/arithm_add_scalar_mask.cl +++ b/modules/ocl/src/opencl/arithm_add_scalar_mask.cl @@ -51,6 +51,24 @@ #endif #endif +#if defined (FUNC_ADD) +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) + scalar[0]); +#endif + +#if defined (FUNC_SUB) +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) - scalar[0]); +#endif + +#if defined (FUNC_MUL) +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * scalar[0]); +#endif + +#if defined (FUNC_DIV) +#define EXPRESSION T zero = (T)(0); \ + dst[dst_index] = src2[src2_index] == zero ? zero : \ + convertToT(convertToWT(src1[src1_index]) / scalar[0]); +#endif + /////////////////////////////////////////////////////////////////////////////////// //////////////////////////// Add with scalar with mask //////////////////////////// /////////////////////////////////////////////////////////////////////////////////// @@ -72,7 +90,7 @@ __kernel void arithm_binary_op_scalar_mask(__global T *src1, int src1_step, int int src1_index = mad24(y, src1_step, x + src1_offset); int dst_index = mad24(y, dst_step, dst_offset + x); - dst[dst_index] = convertToT(convertToWT(src1[src1_index]) Operation scalar[0]); + EXPRESSION } } } diff --git a/modules/ocl/test/test_arithm.cpp b/modules/ocl/test/test_arithm.cpp index db01d95036..f2f13ec41e 100644 --- a/modules/ocl/test/test_arithm.cpp +++ b/modules/ocl/test/test_arithm.cpp @@ -535,7 +535,7 @@ TEST_P(Absdiff, Mat) } } -TEST_P(Absdiff, Mat_Scalar) +TEST_P(Absdiff, Scalar) { for (int j = 0; j < LOOP_TIMES; j++) { From 10d60f99dc29935287c5f3755bcf7bf2348976d2 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Thu, 3 Oct 2013 19:17:54 +0400 Subject: [PATCH 33/39] fixed ocl::integral and enabled perf test for it --- modules/ocl/perf/perf_imgproc.cpp | 2 +- modules/ocl/src/haar.cpp | 1 - modules/ocl/src/imgproc.cpp | 149 ++++++++++++++---------------- modules/ocl/test/test_imgproc.cpp | 14 ++- 4 files changed, 85 insertions(+), 81 deletions(-) diff --git a/modules/ocl/perf/perf_imgproc.cpp b/modules/ocl/perf/perf_imgproc.cpp index a6859d09d4..5eb32b46c9 100644 --- a/modules/ocl/perf/perf_imgproc.cpp +++ b/modules/ocl/perf/perf_imgproc.cpp @@ -198,7 +198,7 @@ PERF_TEST_P(cornerHarrisFixture, cornerHarris, typedef TestBaseWithParam integralFixture; -PERF_TEST_P(integralFixture, DISABLED_integral, OCL_TYPICAL_MAT_SIZES) // TODO does not work properly +PERF_TEST_P(integralFixture, integral, OCL_TYPICAL_MAT_SIZES) { const Size srcSize = GetParam(); diff --git a/modules/ocl/src/haar.cpp b/modules/ocl/src/haar.cpp index aac3785e79..05a76aa84d 100644 --- a/modules/ocl/src/haar.cpp +++ b/modules/ocl/src/haar.cpp @@ -1141,7 +1141,6 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std CvSize sz; cv::Rect roi, roi2; - cv::Mat imgroi, imgroisq; cv::ocl::oclMat resizeroi, gimgroi, gimgroisq; for( int i = 0; i < m_loopcount; i++ ) diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp index 5e0f54fab5..ff3d95fdae 100644 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@ -975,10 +975,12 @@ namespace cv void integral(const oclMat &src, oclMat &sum, oclMat &sqsum) { CV_Assert(src.type() == CV_8UC1); - if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) + if(!src.clCxt->supportsFeature(ocl::FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "select device don't support double"); + return; } + int vlen = 4; int offset = src.offset / vlen; int pre_invalid = src.offset % vlen; @@ -986,50 +988,45 @@ namespace cv oclMat t_sum , t_sqsum; int w = src.cols + 1, h = src.rows + 1; - int depth; - if( src.cols * src.rows <= 2901 * 2901 ) //2901 is the maximum size for int when all values are 255 - { - t_sum.create(src.cols, src.rows, CV_32SC1); - sum.create(h, w, CV_32SC1); - } - else - { - //Use float to prevent overflow - t_sum.create(src.cols, src.rows, CV_32FC1); - sum.create(h, w, CV_32FC1); - } - t_sqsum.create(src.cols, src.rows, CV_32FC1); - sqsum.create(h, w, CV_32FC1); - depth = sum.depth(); - int sum_offset = sum.offset / vlen; - int sqsum_offset = sqsum.offset / vlen; - - vector > args; - args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&offset )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step)); - size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1}; - openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, depth); - args.clear(); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&sqsum.data )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum.step)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum_offset)); - size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1}; - openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, depth); + int depth = src.depth() == CV_8U ? CV_32S : CV_64F; + int type = CV_MAKE_TYPE(depth, 1); + + t_sum.create(src.cols, src.rows, type); + sum.create(h, w, type); + + t_sqsum.create(src.cols, src.rows, CV_32FC1); + sqsum.create(h, w, CV_32FC1); + + int sum_offset = sum.offset / vlen; + int sqsum_offset = sqsum.offset / vlen; + + vector > args; + args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&offset )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step)); + size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1}; + openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, depth); + + args.clear(); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&sqsum.data )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum_offset)); + size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1}; + openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, depth); } void integral(const oclMat &src, oclMat &sum) @@ -1042,39 +1039,35 @@ namespace cv oclMat t_sum; int w = src.cols + 1, h = src.rows + 1; - int depth; - if(src.cols * src.rows <= 2901 * 2901) - { - t_sum.create(src.cols, src.rows, CV_32SC1); - sum.create(h, w, CV_32SC1); - }else - { - t_sum.create(src.cols, src.rows, CV_32FC1); - sum.create(h, w, CV_32FC1); - } - depth = sum.depth(); - int sum_offset = sum.offset / vlen; - vector > args; - args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&offset )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step)); - size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1}; - openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, depth); - args.clear(); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset)); - size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1}; - openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, depth); + int depth = src.depth() == CV_8U ? CV_32S : CV_32F; + int type = CV_MAKE_TYPE(depth, 1); + + t_sum.create(src.cols, src.rows, type); + sum.create(h, w, type); + + int sum_offset = sum.offset / vlen; + vector > args; + args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&offset )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step)); + size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1}; + openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, depth); + + args.clear(); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset)); + size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1}; + openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, depth); } /////////////////////// corner ////////////////////////////// diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp index 4d297a7a4f..86c750f371 100644 --- a/modules/ocl/test/test_imgproc.cpp +++ b/modules/ocl/test/test_imgproc.cpp @@ -579,7 +579,19 @@ TEST_P(cornerHarris, Mat) struct integral : ImgprocTestBase {}; -TEST_P(integral, Mat) +TEST_P(integral, Mat1) +{ + for(int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + cv::ocl::integral(clmat1_roi, cldst_roi); + cv::integral(mat1_roi, dst_roi); + Near(0); + } +} + +TEST_P(integral, Mat2) { for(int j = 0; j < LOOP_TIMES; j++) { From 2fdfa37ea5c7b4e85ece394f82d2d179edf34213 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 3 Oct 2013 22:57:05 +0400 Subject: [PATCH 34/39] ocl: runtime, fix 32-bit builds --- modules/ocl/src/cl_runtime/generator/parser_cl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/ocl/src/cl_runtime/generator/parser_cl.py b/modules/ocl/src/cl_runtime/generator/parser_cl.py index 608b826183..0ffbe75794 100644 --- a/modules/ocl/src/cl_runtime/generator/parser_cl.py +++ b/modules/ocl/src/cl_runtime/generator/parser_cl.py @@ -67,7 +67,7 @@ while True: fn['modifiers'] = [] # modifiers fn['ret'] = ret - fn['calling'] = [] # calling + fn['calling'] = calling # print 'modifiers='+' '.join(modifiers) # print 'ret='+' '.join(type) @@ -112,7 +112,7 @@ ctx['CL_FN_ENUMS'] = generateEnums(fns) ctx['CL_FN_NAMES'] = generateNames(fns) ctx['CL_FN_DEFINITIONS'] = generateFnDefinition(fns) ctx['CL_FN_PTRS'] = generatePtrs(fns) -ctx['CL_FN_SWITCH'] = generateTemplates(15, 'opencl_fn', 'opencl_check_fn') +ctx['CL_FN_SWITCH'] = generateTemplates(15, 'opencl_fn', 'opencl_check_fn', 'CL_API_CALL') sys.stdout = outfile_impl ProcessTemplate('template/cl_runtime_impl_opencl.hpp.in', ctx) From 2d1a6687f960de31a6a781e9bd1a856a0f164256 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 3 Oct 2013 22:57:39 +0400 Subject: [PATCH 35/39] autogenerated code --- .../ocl/cl_runtime/cl_runtime_opencl11.hpp | 148 +++++------ .../ocl/cl_runtime/cl_runtime_opencl12.hpp | 176 ++++++------- .../cl_runtime/cl_runtime_opencl11_impl.hpp | 208 +++++++-------- .../cl_runtime/cl_runtime_opencl12_impl.hpp | 236 +++++++++--------- 4 files changed, 384 insertions(+), 384 deletions(-) diff --git a/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl11.hpp b/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl11.hpp index 4155dce963..519cfd63ec 100644 --- a/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl11.hpp +++ b/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl11.hpp @@ -251,80 +251,80 @@ #endif // generated by parser_cl.py -extern CL_RUNTIME_EXPORT cl_int (*clGetPlatformIDs)(cl_uint, cl_platform_id*, cl_uint*); -extern CL_RUNTIME_EXPORT cl_int (*clGetPlatformInfo)(cl_platform_id, cl_platform_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_int (*clGetDeviceIDs)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*); -extern CL_RUNTIME_EXPORT cl_int (*clGetDeviceInfo)(cl_device_id, cl_device_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_context (*clCreateContext)(const cl_context_properties*, cl_uint, const cl_device_id*, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*); -extern CL_RUNTIME_EXPORT cl_context (*clCreateContextFromType)(const cl_context_properties*, cl_device_type, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*); -extern CL_RUNTIME_EXPORT cl_int (*clRetainContext)(cl_context); -extern CL_RUNTIME_EXPORT cl_int (*clReleaseContext)(cl_context); -extern CL_RUNTIME_EXPORT cl_int (*clGetContextInfo)(cl_context, cl_context_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_command_queue (*clCreateCommandQueue)(cl_context, cl_device_id, cl_command_queue_properties, cl_int*); -extern CL_RUNTIME_EXPORT cl_int (*clRetainCommandQueue)(cl_command_queue); -extern CL_RUNTIME_EXPORT cl_int (*clReleaseCommandQueue)(cl_command_queue); -extern CL_RUNTIME_EXPORT cl_int (*clGetCommandQueueInfo)(cl_command_queue, cl_command_queue_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_int (*clSetCommandQueueProperty)(cl_command_queue, cl_command_queue_properties, cl_bool, cl_command_queue_properties*); -extern CL_RUNTIME_EXPORT cl_mem (*clCreateBuffer)(cl_context, cl_mem_flags, size_t, void*, cl_int*); -extern CL_RUNTIME_EXPORT cl_mem (*clCreateSubBuffer)(cl_mem, cl_mem_flags, cl_buffer_create_type, const void*, cl_int*); -extern CL_RUNTIME_EXPORT cl_mem (*clCreateImage2D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, void*, cl_int*); -extern CL_RUNTIME_EXPORT cl_mem (*clCreateImage3D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, size_t, size_t, void*, cl_int*); -extern CL_RUNTIME_EXPORT cl_int (*clRetainMemObject)(cl_mem); -extern CL_RUNTIME_EXPORT cl_int (*clReleaseMemObject)(cl_mem); -extern CL_RUNTIME_EXPORT cl_int (*clGetSupportedImageFormats)(cl_context, cl_mem_flags, cl_mem_object_type, cl_uint, cl_image_format*, cl_uint*); -extern CL_RUNTIME_EXPORT cl_int (*clGetMemObjectInfo)(cl_mem, cl_mem_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_int (*clGetImageInfo)(cl_mem, cl_image_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_int (*clSetMemObjectDestructorCallback)(cl_mem, void (CL_CALLBACK*) (cl_mem, void*), void*); -extern CL_RUNTIME_EXPORT cl_sampler (*clCreateSampler)(cl_context, cl_bool, cl_addressing_mode, cl_filter_mode, cl_int*); -extern CL_RUNTIME_EXPORT cl_int (*clRetainSampler)(cl_sampler); -extern CL_RUNTIME_EXPORT cl_int (*clReleaseSampler)(cl_sampler); -extern CL_RUNTIME_EXPORT cl_int (*clGetSamplerInfo)(cl_sampler, cl_sampler_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_program (*clCreateProgramWithSource)(cl_context, cl_uint, const char**, const size_t*, cl_int*); -extern CL_RUNTIME_EXPORT cl_program (*clCreateProgramWithBinary)(cl_context, cl_uint, const cl_device_id*, const size_t*, const unsigned char**, cl_int*, cl_int*); -extern CL_RUNTIME_EXPORT cl_int (*clRetainProgram)(cl_program); -extern CL_RUNTIME_EXPORT cl_int (*clReleaseProgram)(cl_program); -extern CL_RUNTIME_EXPORT cl_int (*clBuildProgram)(cl_program, cl_uint, const cl_device_id*, const char*, void (CL_CALLBACK*) (cl_program, void*), void*); -extern CL_RUNTIME_EXPORT cl_int (*clUnloadCompiler)(); -extern CL_RUNTIME_EXPORT cl_int (*clGetProgramInfo)(cl_program, cl_program_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_int (*clGetProgramBuildInfo)(cl_program, cl_device_id, cl_program_build_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_kernel (*clCreateKernel)(cl_program, const char*, cl_int*); -extern CL_RUNTIME_EXPORT cl_int (*clCreateKernelsInProgram)(cl_program, cl_uint, cl_kernel*, cl_uint*); -extern CL_RUNTIME_EXPORT cl_int (*clRetainKernel)(cl_kernel); -extern CL_RUNTIME_EXPORT cl_int (*clReleaseKernel)(cl_kernel); -extern CL_RUNTIME_EXPORT cl_int (*clSetKernelArg)(cl_kernel, cl_uint, size_t, const void*); -extern CL_RUNTIME_EXPORT cl_int (*clGetKernelInfo)(cl_kernel, cl_kernel_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_int (*clGetKernelWorkGroupInfo)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_int (*clWaitForEvents)(cl_uint, const cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clGetEventInfo)(cl_event, cl_event_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_event (*clCreateUserEvent)(cl_context, cl_int*); -extern CL_RUNTIME_EXPORT cl_int (*clRetainEvent)(cl_event); -extern CL_RUNTIME_EXPORT cl_int (*clReleaseEvent)(cl_event); -extern CL_RUNTIME_EXPORT cl_int (*clSetUserEventStatus)(cl_event, cl_int); -extern CL_RUNTIME_EXPORT cl_int (*clSetEventCallback)(cl_event, cl_int, void (CL_CALLBACK*) (cl_event, cl_int, void*), void*); -extern CL_RUNTIME_EXPORT cl_int (*clGetEventProfilingInfo)(cl_event, cl_profiling_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_int (*clFlush)(cl_command_queue); -extern CL_RUNTIME_EXPORT cl_int (*clFinish)(cl_command_queue); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueReadBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueReadBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueWriteBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueWriteBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueCopyBuffer)(cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueCopyBufferRect)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueReadImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueWriteImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueCopyImage)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueCopyImageToBuffer)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, size_t, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueCopyBufferToImage)(cl_command_queue, cl_mem, cl_mem, size_t, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT void* (*clEnqueueMapBuffer)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t, size_t, cl_uint, const cl_event*, cl_event*, cl_int*); -extern CL_RUNTIME_EXPORT void* (*clEnqueueMapImage)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, const size_t*, const size_t*, size_t*, size_t*, cl_uint, const cl_event*, cl_event*, cl_int*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueUnmapMemObject)(cl_command_queue, cl_mem, void*, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueTask)(cl_command_queue, cl_kernel, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueNativeKernel)(cl_command_queue, void (CL_CALLBACK* user_func) (void*), void*, size_t, cl_uint, const cl_mem*, const void**, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueMarker)(cl_command_queue, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueWaitForEvents)(cl_command_queue, cl_uint, const cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueBarrier)(cl_command_queue); -extern CL_RUNTIME_EXPORT void* (*clGetExtensionFunctionAddress)(const char*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetPlatformIDs)(cl_uint, cl_platform_id*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetPlatformInfo)(cl_platform_id, cl_platform_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetDeviceIDs)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetDeviceInfo)(cl_device_id, cl_device_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_context (CL_API_CALL*clCreateContext)(const cl_context_properties*, cl_uint, const cl_device_id*, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_context (CL_API_CALL*clCreateContextFromType)(const cl_context_properties*, cl_device_type, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainContext)(cl_context); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseContext)(cl_context); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetContextInfo)(cl_context, cl_context_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_command_queue (CL_API_CALL*clCreateCommandQueue)(cl_context, cl_device_id, cl_command_queue_properties, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainCommandQueue)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseCommandQueue)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetCommandQueueInfo)(cl_command_queue, cl_command_queue_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetCommandQueueProperty)(cl_command_queue, cl_command_queue_properties, cl_bool, cl_command_queue_properties*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateBuffer)(cl_context, cl_mem_flags, size_t, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateSubBuffer)(cl_mem, cl_mem_flags, cl_buffer_create_type, const void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateImage2D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateImage3D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, size_t, size_t, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainMemObject)(cl_mem); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseMemObject)(cl_mem); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetSupportedImageFormats)(cl_context, cl_mem_flags, cl_mem_object_type, cl_uint, cl_image_format*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetMemObjectInfo)(cl_mem, cl_mem_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetImageInfo)(cl_mem, cl_image_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetMemObjectDestructorCallback)(cl_mem, void (CL_CALLBACK*) (cl_mem, void*), void*); +extern CL_RUNTIME_EXPORT cl_sampler (CL_API_CALL*clCreateSampler)(cl_context, cl_bool, cl_addressing_mode, cl_filter_mode, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainSampler)(cl_sampler); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseSampler)(cl_sampler); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetSamplerInfo)(cl_sampler, cl_sampler_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clCreateProgramWithSource)(cl_context, cl_uint, const char**, const size_t*, cl_int*); +extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clCreateProgramWithBinary)(cl_context, cl_uint, const cl_device_id*, const size_t*, const unsigned char**, cl_int*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainProgram)(cl_program); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseProgram)(cl_program); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clBuildProgram)(cl_program, cl_uint, const cl_device_id*, const char*, void (CL_CALLBACK*) (cl_program, void*), void*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clUnloadCompiler)(); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetProgramInfo)(cl_program, cl_program_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetProgramBuildInfo)(cl_program, cl_device_id, cl_program_build_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_kernel (CL_API_CALL*clCreateKernel)(cl_program, const char*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clCreateKernelsInProgram)(cl_program, cl_uint, cl_kernel*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainKernel)(cl_kernel); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseKernel)(cl_kernel); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetKernelArg)(cl_kernel, cl_uint, size_t, const void*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetKernelInfo)(cl_kernel, cl_kernel_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetKernelWorkGroupInfo)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clWaitForEvents)(cl_uint, const cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetEventInfo)(cl_event, cl_event_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_event (CL_API_CALL*clCreateUserEvent)(cl_context, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainEvent)(cl_event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseEvent)(cl_event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetUserEventStatus)(cl_event, cl_int); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetEventCallback)(cl_event, cl_int, void (CL_CALLBACK*) (cl_event, cl_int, void*), void*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetEventProfilingInfo)(cl_event, cl_profiling_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clFlush)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clFinish)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReadBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReadBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWriteBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWriteBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyBuffer)(cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyBufferRect)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReadImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWriteImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyImage)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyImageToBuffer)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyBufferToImage)(cl_command_queue, cl_mem, cl_mem, size_t, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clEnqueueMapBuffer)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t, size_t, cl_uint, const cl_event*, cl_event*, cl_int*); +extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clEnqueueMapImage)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, const size_t*, const size_t*, size_t*, size_t*, cl_uint, const cl_event*, cl_event*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueUnmapMemObject)(cl_command_queue, cl_mem, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueTask)(cl_command_queue, cl_kernel, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueNativeKernel)(cl_command_queue, void (CL_CALLBACK* user_func) (void*), void*, size_t, cl_uint, const cl_mem*, const void**, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueMarker)(cl_command_queue, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWaitForEvents)(cl_command_queue, cl_uint, const cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueBarrier)(cl_command_queue); +extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clGetExtensionFunctionAddress)(const char*); #endif diff --git a/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl12.hpp b/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl12.hpp index 8d03fbff6a..6df0d5f608 100644 --- a/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl12.hpp +++ b/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl12.hpp @@ -293,94 +293,94 @@ #endif // generated by parser_cl.py -extern CL_RUNTIME_EXPORT cl_int (*clGetPlatformIDs)(cl_uint, cl_platform_id*, cl_uint*); -extern CL_RUNTIME_EXPORT cl_int (*clGetPlatformInfo)(cl_platform_id, cl_platform_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_int (*clGetDeviceIDs)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*); -extern CL_RUNTIME_EXPORT cl_int (*clGetDeviceInfo)(cl_device_id, cl_device_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_int (*clCreateSubDevices)(cl_device_id, const cl_device_partition_property*, cl_uint, cl_device_id*, cl_uint*); -extern CL_RUNTIME_EXPORT cl_int (*clRetainDevice)(cl_device_id); -extern CL_RUNTIME_EXPORT cl_int (*clReleaseDevice)(cl_device_id); -extern CL_RUNTIME_EXPORT cl_context (*clCreateContext)(const cl_context_properties*, cl_uint, const cl_device_id*, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*); -extern CL_RUNTIME_EXPORT cl_context (*clCreateContextFromType)(const cl_context_properties*, cl_device_type, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*); -extern CL_RUNTIME_EXPORT cl_int (*clRetainContext)(cl_context); -extern CL_RUNTIME_EXPORT cl_int (*clReleaseContext)(cl_context); -extern CL_RUNTIME_EXPORT cl_int (*clGetContextInfo)(cl_context, cl_context_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_command_queue (*clCreateCommandQueue)(cl_context, cl_device_id, cl_command_queue_properties, cl_int*); -extern CL_RUNTIME_EXPORT cl_int (*clRetainCommandQueue)(cl_command_queue); -extern CL_RUNTIME_EXPORT cl_int (*clReleaseCommandQueue)(cl_command_queue); -extern CL_RUNTIME_EXPORT cl_int (*clGetCommandQueueInfo)(cl_command_queue, cl_command_queue_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_mem (*clCreateBuffer)(cl_context, cl_mem_flags, size_t, void*, cl_int*); -extern CL_RUNTIME_EXPORT cl_mem (*clCreateSubBuffer)(cl_mem, cl_mem_flags, cl_buffer_create_type, const void*, cl_int*); -extern CL_RUNTIME_EXPORT cl_mem (*clCreateImage)(cl_context, cl_mem_flags, const cl_image_format*, const cl_image_desc*, void*, cl_int*); -extern CL_RUNTIME_EXPORT cl_int (*clRetainMemObject)(cl_mem); -extern CL_RUNTIME_EXPORT cl_int (*clReleaseMemObject)(cl_mem); -extern CL_RUNTIME_EXPORT cl_int (*clGetSupportedImageFormats)(cl_context, cl_mem_flags, cl_mem_object_type, cl_uint, cl_image_format*, cl_uint*); -extern CL_RUNTIME_EXPORT cl_int (*clGetMemObjectInfo)(cl_mem, cl_mem_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_int (*clGetImageInfo)(cl_mem, cl_image_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_int (*clSetMemObjectDestructorCallback)(cl_mem, void (CL_CALLBACK*) (cl_mem, void*), void*); -extern CL_RUNTIME_EXPORT cl_sampler (*clCreateSampler)(cl_context, cl_bool, cl_addressing_mode, cl_filter_mode, cl_int*); -extern CL_RUNTIME_EXPORT cl_int (*clRetainSampler)(cl_sampler); -extern CL_RUNTIME_EXPORT cl_int (*clReleaseSampler)(cl_sampler); -extern CL_RUNTIME_EXPORT cl_int (*clGetSamplerInfo)(cl_sampler, cl_sampler_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_program (*clCreateProgramWithSource)(cl_context, cl_uint, const char**, const size_t*, cl_int*); -extern CL_RUNTIME_EXPORT cl_program (*clCreateProgramWithBinary)(cl_context, cl_uint, const cl_device_id*, const size_t*, const unsigned char**, cl_int*, cl_int*); -extern CL_RUNTIME_EXPORT cl_program (*clCreateProgramWithBuiltInKernels)(cl_context, cl_uint, const cl_device_id*, const char*, cl_int*); -extern CL_RUNTIME_EXPORT cl_int (*clRetainProgram)(cl_program); -extern CL_RUNTIME_EXPORT cl_int (*clReleaseProgram)(cl_program); -extern CL_RUNTIME_EXPORT cl_int (*clBuildProgram)(cl_program, cl_uint, const cl_device_id*, const char*, void (CL_CALLBACK*) (cl_program, void*), void*); -extern CL_RUNTIME_EXPORT cl_int (*clCompileProgram)(cl_program, cl_uint, const cl_device_id*, const char*, cl_uint, const cl_program*, const char**, void (CL_CALLBACK*) (cl_program, void*), void*); -extern CL_RUNTIME_EXPORT cl_program (*clLinkProgram)(cl_context, cl_uint, const cl_device_id*, const char*, cl_uint, const cl_program*, void (CL_CALLBACK*) (cl_program, void*), void*, cl_int*); -extern CL_RUNTIME_EXPORT cl_int (*clUnloadPlatformCompiler)(cl_platform_id); -extern CL_RUNTIME_EXPORT cl_int (*clGetProgramInfo)(cl_program, cl_program_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_int (*clGetProgramBuildInfo)(cl_program, cl_device_id, cl_program_build_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_kernel (*clCreateKernel)(cl_program, const char*, cl_int*); -extern CL_RUNTIME_EXPORT cl_int (*clCreateKernelsInProgram)(cl_program, cl_uint, cl_kernel*, cl_uint*); -extern CL_RUNTIME_EXPORT cl_int (*clRetainKernel)(cl_kernel); -extern CL_RUNTIME_EXPORT cl_int (*clReleaseKernel)(cl_kernel); -extern CL_RUNTIME_EXPORT cl_int (*clSetKernelArg)(cl_kernel, cl_uint, size_t, const void*); -extern CL_RUNTIME_EXPORT cl_int (*clGetKernelInfo)(cl_kernel, cl_kernel_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_int (*clGetKernelArgInfo)(cl_kernel, cl_uint, cl_kernel_arg_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_int (*clGetKernelWorkGroupInfo)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_int (*clWaitForEvents)(cl_uint, const cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clGetEventInfo)(cl_event, cl_event_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_event (*clCreateUserEvent)(cl_context, cl_int*); -extern CL_RUNTIME_EXPORT cl_int (*clRetainEvent)(cl_event); -extern CL_RUNTIME_EXPORT cl_int (*clReleaseEvent)(cl_event); -extern CL_RUNTIME_EXPORT cl_int (*clSetUserEventStatus)(cl_event, cl_int); -extern CL_RUNTIME_EXPORT cl_int (*clSetEventCallback)(cl_event, cl_int, void (CL_CALLBACK*) (cl_event, cl_int, void*), void*); -extern CL_RUNTIME_EXPORT cl_int (*clGetEventProfilingInfo)(cl_event, cl_profiling_info, size_t, void*, size_t*); -extern CL_RUNTIME_EXPORT cl_int (*clFlush)(cl_command_queue); -extern CL_RUNTIME_EXPORT cl_int (*clFinish)(cl_command_queue); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueReadBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueReadBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueWriteBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueWriteBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueFillBuffer)(cl_command_queue, cl_mem, const void*, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueCopyBuffer)(cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueCopyBufferRect)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueReadImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueWriteImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueFillImage)(cl_command_queue, cl_mem, const void*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueCopyImage)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueCopyImageToBuffer)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, size_t, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueCopyBufferToImage)(cl_command_queue, cl_mem, cl_mem, size_t, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT void* (*clEnqueueMapBuffer)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t, size_t, cl_uint, const cl_event*, cl_event*, cl_int*); -extern CL_RUNTIME_EXPORT void* (*clEnqueueMapImage)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, const size_t*, const size_t*, size_t*, size_t*, cl_uint, const cl_event*, cl_event*, cl_int*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueUnmapMemObject)(cl_command_queue, cl_mem, void*, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueMigrateMemObjects)(cl_command_queue, cl_uint, const cl_mem*, cl_mem_migration_flags, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueTask)(cl_command_queue, cl_kernel, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueNativeKernel)(cl_command_queue, void (CL_CALLBACK*) (void*), void*, size_t, cl_uint, const cl_mem*, const void**, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueMarkerWithWaitList)(cl_command_queue, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueBarrierWithWaitList)(cl_command_queue, cl_uint, const cl_event*, cl_event*); -extern CL_RUNTIME_EXPORT void* (*clGetExtensionFunctionAddressForPlatform)(cl_platform_id, const char*); -extern CL_RUNTIME_EXPORT cl_mem (*clCreateImage2D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, void*, cl_int*); -extern CL_RUNTIME_EXPORT cl_mem (*clCreateImage3D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, size_t, size_t, void*, cl_int*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueMarker)(cl_command_queue, cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueWaitForEvents)(cl_command_queue, cl_uint, const cl_event*); -extern CL_RUNTIME_EXPORT cl_int (*clEnqueueBarrier)(cl_command_queue); -extern CL_RUNTIME_EXPORT cl_int (*clUnloadCompiler)(); -extern CL_RUNTIME_EXPORT void* (*clGetExtensionFunctionAddress)(const char*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetPlatformIDs)(cl_uint, cl_platform_id*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetPlatformInfo)(cl_platform_id, cl_platform_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetDeviceIDs)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetDeviceInfo)(cl_device_id, cl_device_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clCreateSubDevices)(cl_device_id, const cl_device_partition_property*, cl_uint, cl_device_id*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainDevice)(cl_device_id); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseDevice)(cl_device_id); +extern CL_RUNTIME_EXPORT cl_context (CL_API_CALL*clCreateContext)(const cl_context_properties*, cl_uint, const cl_device_id*, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_context (CL_API_CALL*clCreateContextFromType)(const cl_context_properties*, cl_device_type, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainContext)(cl_context); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseContext)(cl_context); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetContextInfo)(cl_context, cl_context_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_command_queue (CL_API_CALL*clCreateCommandQueue)(cl_context, cl_device_id, cl_command_queue_properties, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainCommandQueue)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseCommandQueue)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetCommandQueueInfo)(cl_command_queue, cl_command_queue_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateBuffer)(cl_context, cl_mem_flags, size_t, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateSubBuffer)(cl_mem, cl_mem_flags, cl_buffer_create_type, const void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateImage)(cl_context, cl_mem_flags, const cl_image_format*, const cl_image_desc*, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainMemObject)(cl_mem); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseMemObject)(cl_mem); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetSupportedImageFormats)(cl_context, cl_mem_flags, cl_mem_object_type, cl_uint, cl_image_format*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetMemObjectInfo)(cl_mem, cl_mem_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetImageInfo)(cl_mem, cl_image_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetMemObjectDestructorCallback)(cl_mem, void (CL_CALLBACK*) (cl_mem, void*), void*); +extern CL_RUNTIME_EXPORT cl_sampler (CL_API_CALL*clCreateSampler)(cl_context, cl_bool, cl_addressing_mode, cl_filter_mode, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainSampler)(cl_sampler); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseSampler)(cl_sampler); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetSamplerInfo)(cl_sampler, cl_sampler_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clCreateProgramWithSource)(cl_context, cl_uint, const char**, const size_t*, cl_int*); +extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clCreateProgramWithBinary)(cl_context, cl_uint, const cl_device_id*, const size_t*, const unsigned char**, cl_int*, cl_int*); +extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clCreateProgramWithBuiltInKernels)(cl_context, cl_uint, const cl_device_id*, const char*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainProgram)(cl_program); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseProgram)(cl_program); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clBuildProgram)(cl_program, cl_uint, const cl_device_id*, const char*, void (CL_CALLBACK*) (cl_program, void*), void*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clCompileProgram)(cl_program, cl_uint, const cl_device_id*, const char*, cl_uint, const cl_program*, const char**, void (CL_CALLBACK*) (cl_program, void*), void*); +extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clLinkProgram)(cl_context, cl_uint, const cl_device_id*, const char*, cl_uint, const cl_program*, void (CL_CALLBACK*) (cl_program, void*), void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clUnloadPlatformCompiler)(cl_platform_id); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetProgramInfo)(cl_program, cl_program_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetProgramBuildInfo)(cl_program, cl_device_id, cl_program_build_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_kernel (CL_API_CALL*clCreateKernel)(cl_program, const char*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clCreateKernelsInProgram)(cl_program, cl_uint, cl_kernel*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainKernel)(cl_kernel); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseKernel)(cl_kernel); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetKernelArg)(cl_kernel, cl_uint, size_t, const void*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetKernelInfo)(cl_kernel, cl_kernel_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetKernelArgInfo)(cl_kernel, cl_uint, cl_kernel_arg_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetKernelWorkGroupInfo)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clWaitForEvents)(cl_uint, const cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetEventInfo)(cl_event, cl_event_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_event (CL_API_CALL*clCreateUserEvent)(cl_context, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainEvent)(cl_event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseEvent)(cl_event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetUserEventStatus)(cl_event, cl_int); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetEventCallback)(cl_event, cl_int, void (CL_CALLBACK*) (cl_event, cl_int, void*), void*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetEventProfilingInfo)(cl_event, cl_profiling_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clFlush)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clFinish)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReadBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReadBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWriteBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWriteBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueFillBuffer)(cl_command_queue, cl_mem, const void*, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyBuffer)(cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyBufferRect)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReadImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWriteImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueFillImage)(cl_command_queue, cl_mem, const void*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyImage)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyImageToBuffer)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyBufferToImage)(cl_command_queue, cl_mem, cl_mem, size_t, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clEnqueueMapBuffer)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t, size_t, cl_uint, const cl_event*, cl_event*, cl_int*); +extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clEnqueueMapImage)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, const size_t*, const size_t*, size_t*, size_t*, cl_uint, const cl_event*, cl_event*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueUnmapMemObject)(cl_command_queue, cl_mem, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueMigrateMemObjects)(cl_command_queue, cl_uint, const cl_mem*, cl_mem_migration_flags, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueTask)(cl_command_queue, cl_kernel, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueNativeKernel)(cl_command_queue, void (CL_CALLBACK*) (void*), void*, size_t, cl_uint, const cl_mem*, const void**, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueMarkerWithWaitList)(cl_command_queue, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueBarrierWithWaitList)(cl_command_queue, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clGetExtensionFunctionAddressForPlatform)(cl_platform_id, const char*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateImage2D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateImage3D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, size_t, size_t, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueMarker)(cl_command_queue, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWaitForEvents)(cl_command_queue, cl_uint, const cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueBarrier)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clUnloadCompiler)(); +extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clGetExtensionFunctionAddress)(const char*); #endif diff --git a/modules/ocl/src/cl_runtime/cl_runtime_opencl11_impl.hpp b/modules/ocl/src/cl_runtime/cl_runtime_opencl11_impl.hpp index ef14696996..e594c16dae 100644 --- a/modules/ocl/src/cl_runtime/cl_runtime_opencl11_impl.hpp +++ b/modules/ocl/src/cl_runtime/cl_runtime_opencl11_impl.hpp @@ -161,200 +161,200 @@ namespace { template struct opencl_fn0 { - typedef _R (*FN)(); - static _R switch_fn() + typedef _R (CL_API_CALL*FN)(); + static _R CL_API_CALL switch_fn() { return ((FN)opencl_check_fn(ID))(); } }; template struct opencl_fn1 { - typedef _R (*FN)(_T1); - static _R switch_fn(_T1 p1) + typedef _R (CL_API_CALL*FN)(_T1); + static _R CL_API_CALL switch_fn(_T1 p1) { return ((FN)opencl_check_fn(ID))(p1); } }; template struct opencl_fn2 { - typedef _R (*FN)(_T1, _T2); - static _R switch_fn(_T1 p1, _T2 p2) + typedef _R (CL_API_CALL*FN)(_T1, _T2); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2) { return ((FN)opencl_check_fn(ID))(p1, p2); } }; template struct opencl_fn3 { - typedef _R (*FN)(_T1, _T2, _T3); - static _R switch_fn(_T1 p1, _T2 p2, _T3 p3) + typedef _R (CL_API_CALL*FN)(_T1, _T2, _T3); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2, _T3 p3) { return ((FN)opencl_check_fn(ID))(p1, p2, p3); } }; template struct opencl_fn4 { - typedef _R (*FN)(_T1, _T2, _T3, _T4); - static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4) + typedef _R (CL_API_CALL*FN)(_T1, _T2, _T3, _T4); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4) { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4); } }; template struct opencl_fn5 { - typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5); - static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5) + typedef _R (CL_API_CALL*FN)(_T1, _T2, _T3, _T4, _T5); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5) { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5); } }; template struct opencl_fn6 { - typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6); - static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6) + typedef _R (CL_API_CALL*FN)(_T1, _T2, _T3, _T4, _T5, _T6); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6) { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6); } }; template struct opencl_fn7 { - typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7); - static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7) + typedef _R (CL_API_CALL*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7) { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7); } }; template struct opencl_fn8 { - typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8); - static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8) + typedef _R (CL_API_CALL*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8) { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8); } }; template struct opencl_fn9 { - typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9); - static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9) + typedef _R (CL_API_CALL*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9) { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9); } }; template struct opencl_fn10 { - typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10); - static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10) + typedef _R (CL_API_CALL*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10) { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); } }; template struct opencl_fn11 { - typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11); - static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11) + typedef _R (CL_API_CALL*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11) { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11); } }; template struct opencl_fn12 { - typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12); - static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12) + typedef _R (CL_API_CALL*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12) { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12); } }; template struct opencl_fn13 { - typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13); - static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13) + typedef _R (CL_API_CALL*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13) { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); } }; template struct opencl_fn14 { - typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13, _T14); - static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13, _T14 p14) + typedef _R (CL_API_CALL*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13, _T14); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13, _T14 p14) { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14); } }; } // generated by parser_cl.py -cl_int (*clGetPlatformIDs)(cl_uint, cl_platform_id*, cl_uint*) = opencl_fn3::switch_fn; -cl_int (*clGetPlatformInfo)(cl_platform_id, cl_platform_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; -cl_int (*clGetDeviceIDs)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*) = opencl_fn5::switch_fn; -cl_int (*clGetDeviceInfo)(cl_device_id, cl_device_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; -cl_context (*clCreateContext)(const cl_context_properties*, cl_uint, const cl_device_id*, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*) = opencl_fn6::switch_fn; -cl_context (*clCreateContextFromType)(const cl_context_properties*, cl_device_type, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*) = opencl_fn5::switch_fn; -cl_int (*clRetainContext)(cl_context) = opencl_fn1::switch_fn; -cl_int (*clReleaseContext)(cl_context) = opencl_fn1::switch_fn; -cl_int (*clGetContextInfo)(cl_context, cl_context_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; -cl_command_queue (*clCreateCommandQueue)(cl_context, cl_device_id, cl_command_queue_properties, cl_int*) = opencl_fn4::switch_fn; -cl_int (*clRetainCommandQueue)(cl_command_queue) = opencl_fn1::switch_fn; -cl_int (*clReleaseCommandQueue)(cl_command_queue) = opencl_fn1::switch_fn; -cl_int (*clGetCommandQueueInfo)(cl_command_queue, cl_command_queue_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; -cl_int (*clSetCommandQueueProperty)(cl_command_queue, cl_command_queue_properties, cl_bool, cl_command_queue_properties*) = opencl_fn4::switch_fn; -cl_mem (*clCreateBuffer)(cl_context, cl_mem_flags, size_t, void*, cl_int*) = opencl_fn5::switch_fn; -cl_mem (*clCreateSubBuffer)(cl_mem, cl_mem_flags, cl_buffer_create_type, const void*, cl_int*) = opencl_fn5::switch_fn; -cl_mem (*clCreateImage2D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, void*, cl_int*) = opencl_fn8::switch_fn; -cl_mem (*clCreateImage3D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, size_t, size_t, void*, cl_int*) = opencl_fn10::switch_fn; -cl_int (*clRetainMemObject)(cl_mem) = opencl_fn1::switch_fn; -cl_int (*clReleaseMemObject)(cl_mem) = opencl_fn1::switch_fn; -cl_int (*clGetSupportedImageFormats)(cl_context, cl_mem_flags, cl_mem_object_type, cl_uint, cl_image_format*, cl_uint*) = opencl_fn6::switch_fn; -cl_int (*clGetMemObjectInfo)(cl_mem, cl_mem_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; -cl_int (*clGetImageInfo)(cl_mem, cl_image_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; -cl_int (*clSetMemObjectDestructorCallback)(cl_mem, void (CL_CALLBACK*) (cl_mem, void*), void*) = opencl_fn3::switch_fn; -cl_sampler (*clCreateSampler)(cl_context, cl_bool, cl_addressing_mode, cl_filter_mode, cl_int*) = opencl_fn5::switch_fn; -cl_int (*clRetainSampler)(cl_sampler) = opencl_fn1::switch_fn; -cl_int (*clReleaseSampler)(cl_sampler) = opencl_fn1::switch_fn; -cl_int (*clGetSamplerInfo)(cl_sampler, cl_sampler_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; -cl_program (*clCreateProgramWithSource)(cl_context, cl_uint, const char**, const size_t*, cl_int*) = opencl_fn5::switch_fn; -cl_program (*clCreateProgramWithBinary)(cl_context, cl_uint, const cl_device_id*, const size_t*, const unsigned char**, cl_int*, cl_int*) = opencl_fn7::switch_fn; -cl_int (*clRetainProgram)(cl_program) = opencl_fn1::switch_fn; -cl_int (*clReleaseProgram)(cl_program) = opencl_fn1::switch_fn; -cl_int (*clBuildProgram)(cl_program, cl_uint, const cl_device_id*, const char*, void (CL_CALLBACK*) (cl_program, void*), void*) = opencl_fn6::switch_fn; -cl_int (*clUnloadCompiler)() = opencl_fn0::switch_fn; -cl_int (*clGetProgramInfo)(cl_program, cl_program_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; -cl_int (*clGetProgramBuildInfo)(cl_program, cl_device_id, cl_program_build_info, size_t, void*, size_t*) = opencl_fn6::switch_fn; -cl_kernel (*clCreateKernel)(cl_program, const char*, cl_int*) = opencl_fn3::switch_fn; -cl_int (*clCreateKernelsInProgram)(cl_program, cl_uint, cl_kernel*, cl_uint*) = opencl_fn4::switch_fn; -cl_int (*clRetainKernel)(cl_kernel) = opencl_fn1::switch_fn; -cl_int (*clReleaseKernel)(cl_kernel) = opencl_fn1::switch_fn; -cl_int (*clSetKernelArg)(cl_kernel, cl_uint, size_t, const void*) = opencl_fn4::switch_fn; -cl_int (*clGetKernelInfo)(cl_kernel, cl_kernel_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; -cl_int (*clGetKernelWorkGroupInfo)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void*, size_t*) = opencl_fn6::switch_fn; -cl_int (*clWaitForEvents)(cl_uint, const cl_event*) = opencl_fn2::switch_fn; -cl_int (*clGetEventInfo)(cl_event, cl_event_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; -cl_event (*clCreateUserEvent)(cl_context, cl_int*) = opencl_fn2::switch_fn; -cl_int (*clRetainEvent)(cl_event) = opencl_fn1::switch_fn; -cl_int (*clReleaseEvent)(cl_event) = opencl_fn1::switch_fn; -cl_int (*clSetUserEventStatus)(cl_event, cl_int) = opencl_fn2::switch_fn; -cl_int (*clSetEventCallback)(cl_event, cl_int, void (CL_CALLBACK*) (cl_event, cl_int, void*), void*) = opencl_fn4::switch_fn; -cl_int (*clGetEventProfilingInfo)(cl_event, cl_profiling_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; -cl_int (*clFlush)(cl_command_queue) = opencl_fn1::switch_fn; -cl_int (*clFinish)(cl_command_queue) = opencl_fn1::switch_fn; -cl_int (*clEnqueueReadBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; -cl_int (*clEnqueueReadBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*) = opencl_fn14::switch_fn; -cl_int (*clEnqueueWriteBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; -cl_int (*clEnqueueWriteBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*) = opencl_fn14::switch_fn; -cl_int (*clEnqueueCopyBuffer)(cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; -cl_int (*clEnqueueCopyBufferRect)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*) = opencl_fn13::switch_fn; -cl_int (*clEnqueueReadImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*) = opencl_fn11::switch_fn; -cl_int (*clEnqueueWriteImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*) = opencl_fn11::switch_fn; -cl_int (*clEnqueueCopyImage)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; -cl_int (*clEnqueueCopyImageToBuffer)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, size_t, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; -cl_int (*clEnqueueCopyBufferToImage)(cl_command_queue, cl_mem, cl_mem, size_t, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; -void* (*clEnqueueMapBuffer)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t, size_t, cl_uint, const cl_event*, cl_event*, cl_int*) = opencl_fn10::switch_fn; -void* (*clEnqueueMapImage)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, const size_t*, const size_t*, size_t*, size_t*, cl_uint, const cl_event*, cl_event*, cl_int*) = opencl_fn12::switch_fn; -cl_int (*clEnqueueUnmapMemObject)(cl_command_queue, cl_mem, void*, cl_uint, const cl_event*, cl_event*) = opencl_fn6::switch_fn; -cl_int (*clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; -cl_int (*clEnqueueTask)(cl_command_queue, cl_kernel, cl_uint, const cl_event*, cl_event*) = opencl_fn5::switch_fn; -cl_int (*clEnqueueNativeKernel)(cl_command_queue, void (CL_CALLBACK*) (void*), void*, size_t, cl_uint, const cl_mem*, const void**, cl_uint, const cl_event*, cl_event*) = opencl_fn10::switch_fn; -cl_int (*clEnqueueMarker)(cl_command_queue, cl_event*) = opencl_fn2::switch_fn; -cl_int (*clEnqueueWaitForEvents)(cl_command_queue, cl_uint, const cl_event*) = opencl_fn3::switch_fn; -cl_int (*clEnqueueBarrier)(cl_command_queue) = opencl_fn1::switch_fn; -void* (*clGetExtensionFunctionAddress)(const char*) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clGetPlatformIDs)(cl_uint, cl_platform_id*, cl_uint*) = opencl_fn3::switch_fn; +cl_int (CL_API_CALL*clGetPlatformInfo)(cl_platform_id, cl_platform_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (CL_API_CALL*clGetDeviceIDs)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*) = opencl_fn5::switch_fn; +cl_int (CL_API_CALL*clGetDeviceInfo)(cl_device_id, cl_device_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_context (CL_API_CALL*clCreateContext)(const cl_context_properties*, cl_uint, const cl_device_id*, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*) = opencl_fn6::switch_fn; +cl_context (CL_API_CALL*clCreateContextFromType)(const cl_context_properties*, cl_device_type, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*) = opencl_fn5::switch_fn; +cl_int (CL_API_CALL*clRetainContext)(cl_context) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clReleaseContext)(cl_context) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clGetContextInfo)(cl_context, cl_context_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_command_queue (CL_API_CALL*clCreateCommandQueue)(cl_context, cl_device_id, cl_command_queue_properties, cl_int*) = opencl_fn4::switch_fn; +cl_int (CL_API_CALL*clRetainCommandQueue)(cl_command_queue) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clReleaseCommandQueue)(cl_command_queue) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clGetCommandQueueInfo)(cl_command_queue, cl_command_queue_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (CL_API_CALL*clSetCommandQueueProperty)(cl_command_queue, cl_command_queue_properties, cl_bool, cl_command_queue_properties*) = opencl_fn4::switch_fn; +cl_mem (CL_API_CALL*clCreateBuffer)(cl_context, cl_mem_flags, size_t, void*, cl_int*) = opencl_fn5::switch_fn; +cl_mem (CL_API_CALL*clCreateSubBuffer)(cl_mem, cl_mem_flags, cl_buffer_create_type, const void*, cl_int*) = opencl_fn5::switch_fn; +cl_mem (CL_API_CALL*clCreateImage2D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, void*, cl_int*) = opencl_fn8::switch_fn; +cl_mem (CL_API_CALL*clCreateImage3D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, size_t, size_t, void*, cl_int*) = opencl_fn10::switch_fn; +cl_int (CL_API_CALL*clRetainMemObject)(cl_mem) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clReleaseMemObject)(cl_mem) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clGetSupportedImageFormats)(cl_context, cl_mem_flags, cl_mem_object_type, cl_uint, cl_image_format*, cl_uint*) = opencl_fn6::switch_fn; +cl_int (CL_API_CALL*clGetMemObjectInfo)(cl_mem, cl_mem_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (CL_API_CALL*clGetImageInfo)(cl_mem, cl_image_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (CL_API_CALL*clSetMemObjectDestructorCallback)(cl_mem, void (CL_CALLBACK*) (cl_mem, void*), void*) = opencl_fn3::switch_fn; +cl_sampler (CL_API_CALL*clCreateSampler)(cl_context, cl_bool, cl_addressing_mode, cl_filter_mode, cl_int*) = opencl_fn5::switch_fn; +cl_int (CL_API_CALL*clRetainSampler)(cl_sampler) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clReleaseSampler)(cl_sampler) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clGetSamplerInfo)(cl_sampler, cl_sampler_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_program (CL_API_CALL*clCreateProgramWithSource)(cl_context, cl_uint, const char**, const size_t*, cl_int*) = opencl_fn5::switch_fn; +cl_program (CL_API_CALL*clCreateProgramWithBinary)(cl_context, cl_uint, const cl_device_id*, const size_t*, const unsigned char**, cl_int*, cl_int*) = opencl_fn7::switch_fn; +cl_int (CL_API_CALL*clRetainProgram)(cl_program) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clReleaseProgram)(cl_program) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clBuildProgram)(cl_program, cl_uint, const cl_device_id*, const char*, void (CL_CALLBACK*) (cl_program, void*), void*) = opencl_fn6::switch_fn; +cl_int (CL_API_CALL*clUnloadCompiler)() = opencl_fn0::switch_fn; +cl_int (CL_API_CALL*clGetProgramInfo)(cl_program, cl_program_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (CL_API_CALL*clGetProgramBuildInfo)(cl_program, cl_device_id, cl_program_build_info, size_t, void*, size_t*) = opencl_fn6::switch_fn; +cl_kernel (CL_API_CALL*clCreateKernel)(cl_program, const char*, cl_int*) = opencl_fn3::switch_fn; +cl_int (CL_API_CALL*clCreateKernelsInProgram)(cl_program, cl_uint, cl_kernel*, cl_uint*) = opencl_fn4::switch_fn; +cl_int (CL_API_CALL*clRetainKernel)(cl_kernel) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clReleaseKernel)(cl_kernel) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clSetKernelArg)(cl_kernel, cl_uint, size_t, const void*) = opencl_fn4::switch_fn; +cl_int (CL_API_CALL*clGetKernelInfo)(cl_kernel, cl_kernel_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (CL_API_CALL*clGetKernelWorkGroupInfo)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void*, size_t*) = opencl_fn6::switch_fn; +cl_int (CL_API_CALL*clWaitForEvents)(cl_uint, const cl_event*) = opencl_fn2::switch_fn; +cl_int (CL_API_CALL*clGetEventInfo)(cl_event, cl_event_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_event (CL_API_CALL*clCreateUserEvent)(cl_context, cl_int*) = opencl_fn2::switch_fn; +cl_int (CL_API_CALL*clRetainEvent)(cl_event) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clReleaseEvent)(cl_event) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clSetUserEventStatus)(cl_event, cl_int) = opencl_fn2::switch_fn; +cl_int (CL_API_CALL*clSetEventCallback)(cl_event, cl_int, void (CL_CALLBACK*) (cl_event, cl_int, void*), void*) = opencl_fn4::switch_fn; +cl_int (CL_API_CALL*clGetEventProfilingInfo)(cl_event, cl_profiling_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (CL_API_CALL*clFlush)(cl_command_queue) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clFinish)(cl_command_queue) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clEnqueueReadBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (CL_API_CALL*clEnqueueReadBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*) = opencl_fn14::switch_fn; +cl_int (CL_API_CALL*clEnqueueWriteBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (CL_API_CALL*clEnqueueWriteBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*) = opencl_fn14::switch_fn; +cl_int (CL_API_CALL*clEnqueueCopyBuffer)(cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (CL_API_CALL*clEnqueueCopyBufferRect)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*) = opencl_fn13::switch_fn; +cl_int (CL_API_CALL*clEnqueueReadImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*) = opencl_fn11::switch_fn; +cl_int (CL_API_CALL*clEnqueueWriteImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*) = opencl_fn11::switch_fn; +cl_int (CL_API_CALL*clEnqueueCopyImage)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (CL_API_CALL*clEnqueueCopyImageToBuffer)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, size_t, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (CL_API_CALL*clEnqueueCopyBufferToImage)(cl_command_queue, cl_mem, cl_mem, size_t, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +void* (CL_API_CALL*clEnqueueMapBuffer)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t, size_t, cl_uint, const cl_event*, cl_event*, cl_int*) = opencl_fn10::switch_fn; +void* (CL_API_CALL*clEnqueueMapImage)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, const size_t*, const size_t*, size_t*, size_t*, cl_uint, const cl_event*, cl_event*, cl_int*) = opencl_fn12::switch_fn; +cl_int (CL_API_CALL*clEnqueueUnmapMemObject)(cl_command_queue, cl_mem, void*, cl_uint, const cl_event*, cl_event*) = opencl_fn6::switch_fn; +cl_int (CL_API_CALL*clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (CL_API_CALL*clEnqueueTask)(cl_command_queue, cl_kernel, cl_uint, const cl_event*, cl_event*) = opencl_fn5::switch_fn; +cl_int (CL_API_CALL*clEnqueueNativeKernel)(cl_command_queue, void (CL_CALLBACK*) (void*), void*, size_t, cl_uint, const cl_mem*, const void**, cl_uint, const cl_event*, cl_event*) = opencl_fn10::switch_fn; +cl_int (CL_API_CALL*clEnqueueMarker)(cl_command_queue, cl_event*) = opencl_fn2::switch_fn; +cl_int (CL_API_CALL*clEnqueueWaitForEvents)(cl_command_queue, cl_uint, const cl_event*) = opencl_fn3::switch_fn; +cl_int (CL_API_CALL*clEnqueueBarrier)(cl_command_queue) = opencl_fn1::switch_fn; +void* (CL_API_CALL*clGetExtensionFunctionAddress)(const char*) = opencl_fn1::switch_fn; // generated by parser_cl.py void* opencl_fn_ptrs[] = { diff --git a/modules/ocl/src/cl_runtime/cl_runtime_opencl12_impl.hpp b/modules/ocl/src/cl_runtime/cl_runtime_opencl12_impl.hpp index f6f3e957a1..2282dc5f33 100644 --- a/modules/ocl/src/cl_runtime/cl_runtime_opencl12_impl.hpp +++ b/modules/ocl/src/cl_runtime/cl_runtime_opencl12_impl.hpp @@ -189,214 +189,214 @@ namespace { template struct opencl_fn0 { - typedef _R (*FN)(); - static _R switch_fn() + typedef _R (CL_API_CALL*FN)(); + static _R CL_API_CALL switch_fn() { return ((FN)opencl_check_fn(ID))(); } }; template struct opencl_fn1 { - typedef _R (*FN)(_T1); - static _R switch_fn(_T1 p1) + typedef _R (CL_API_CALL*FN)(_T1); + static _R CL_API_CALL switch_fn(_T1 p1) { return ((FN)opencl_check_fn(ID))(p1); } }; template struct opencl_fn2 { - typedef _R (*FN)(_T1, _T2); - static _R switch_fn(_T1 p1, _T2 p2) + typedef _R (CL_API_CALL*FN)(_T1, _T2); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2) { return ((FN)opencl_check_fn(ID))(p1, p2); } }; template struct opencl_fn3 { - typedef _R (*FN)(_T1, _T2, _T3); - static _R switch_fn(_T1 p1, _T2 p2, _T3 p3) + typedef _R (CL_API_CALL*FN)(_T1, _T2, _T3); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2, _T3 p3) { return ((FN)opencl_check_fn(ID))(p1, p2, p3); } }; template struct opencl_fn4 { - typedef _R (*FN)(_T1, _T2, _T3, _T4); - static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4) + typedef _R (CL_API_CALL*FN)(_T1, _T2, _T3, _T4); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4) { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4); } }; template struct opencl_fn5 { - typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5); - static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5) + typedef _R (CL_API_CALL*FN)(_T1, _T2, _T3, _T4, _T5); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5) { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5); } }; template struct opencl_fn6 { - typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6); - static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6) + typedef _R (CL_API_CALL*FN)(_T1, _T2, _T3, _T4, _T5, _T6); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6) { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6); } }; template struct opencl_fn7 { - typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7); - static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7) + typedef _R (CL_API_CALL*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7) { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7); } }; template struct opencl_fn8 { - typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8); - static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8) + typedef _R (CL_API_CALL*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8) { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8); } }; template struct opencl_fn9 { - typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9); - static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9) + typedef _R (CL_API_CALL*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9) { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9); } }; template struct opencl_fn10 { - typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10); - static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10) + typedef _R (CL_API_CALL*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10) { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); } }; template struct opencl_fn11 { - typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11); - static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11) + typedef _R (CL_API_CALL*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11) { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11); } }; template struct opencl_fn12 { - typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12); - static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12) + typedef _R (CL_API_CALL*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12) { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12); } }; template struct opencl_fn13 { - typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13); - static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13) + typedef _R (CL_API_CALL*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13) { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); } }; template struct opencl_fn14 { - typedef _R (*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13, _T14); - static _R switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13, _T14 p14) + typedef _R (CL_API_CALL*FN)(_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10, _T11, _T12, _T13, _T14); + static _R CL_API_CALL switch_fn(_T1 p1, _T2 p2, _T3 p3, _T4 p4, _T5 p5, _T6 p6, _T7 p7, _T8 p8, _T9 p9, _T10 p10, _T11 p11, _T12 p12, _T13 p13, _T14 p14) { return ((FN)opencl_check_fn(ID))(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14); } }; } // generated by parser_cl.py -cl_int (*clGetPlatformIDs)(cl_uint, cl_platform_id*, cl_uint*) = opencl_fn3::switch_fn; -cl_int (*clGetPlatformInfo)(cl_platform_id, cl_platform_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; -cl_int (*clGetDeviceIDs)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*) = opencl_fn5::switch_fn; -cl_int (*clGetDeviceInfo)(cl_device_id, cl_device_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; -cl_int (*clCreateSubDevices)(cl_device_id, const cl_device_partition_property*, cl_uint, cl_device_id*, cl_uint*) = opencl_fn5::switch_fn; -cl_int (*clRetainDevice)(cl_device_id) = opencl_fn1::switch_fn; -cl_int (*clReleaseDevice)(cl_device_id) = opencl_fn1::switch_fn; -cl_context (*clCreateContext)(const cl_context_properties*, cl_uint, const cl_device_id*, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*) = opencl_fn6::switch_fn; -cl_context (*clCreateContextFromType)(const cl_context_properties*, cl_device_type, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*) = opencl_fn5::switch_fn; -cl_int (*clRetainContext)(cl_context) = opencl_fn1::switch_fn; -cl_int (*clReleaseContext)(cl_context) = opencl_fn1::switch_fn; -cl_int (*clGetContextInfo)(cl_context, cl_context_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; -cl_command_queue (*clCreateCommandQueue)(cl_context, cl_device_id, cl_command_queue_properties, cl_int*) = opencl_fn4::switch_fn; -cl_int (*clRetainCommandQueue)(cl_command_queue) = opencl_fn1::switch_fn; -cl_int (*clReleaseCommandQueue)(cl_command_queue) = opencl_fn1::switch_fn; -cl_int (*clGetCommandQueueInfo)(cl_command_queue, cl_command_queue_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; -cl_mem (*clCreateBuffer)(cl_context, cl_mem_flags, size_t, void*, cl_int*) = opencl_fn5::switch_fn; -cl_mem (*clCreateSubBuffer)(cl_mem, cl_mem_flags, cl_buffer_create_type, const void*, cl_int*) = opencl_fn5::switch_fn; -cl_mem (*clCreateImage)(cl_context, cl_mem_flags, const cl_image_format*, const cl_image_desc*, void*, cl_int*) = opencl_fn6::switch_fn; -cl_int (*clRetainMemObject)(cl_mem) = opencl_fn1::switch_fn; -cl_int (*clReleaseMemObject)(cl_mem) = opencl_fn1::switch_fn; -cl_int (*clGetSupportedImageFormats)(cl_context, cl_mem_flags, cl_mem_object_type, cl_uint, cl_image_format*, cl_uint*) = opencl_fn6::switch_fn; -cl_int (*clGetMemObjectInfo)(cl_mem, cl_mem_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; -cl_int (*clGetImageInfo)(cl_mem, cl_image_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; -cl_int (*clSetMemObjectDestructorCallback)(cl_mem, void (CL_CALLBACK*) (cl_mem, void*), void*) = opencl_fn3::switch_fn; -cl_sampler (*clCreateSampler)(cl_context, cl_bool, cl_addressing_mode, cl_filter_mode, cl_int*) = opencl_fn5::switch_fn; -cl_int (*clRetainSampler)(cl_sampler) = opencl_fn1::switch_fn; -cl_int (*clReleaseSampler)(cl_sampler) = opencl_fn1::switch_fn; -cl_int (*clGetSamplerInfo)(cl_sampler, cl_sampler_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; -cl_program (*clCreateProgramWithSource)(cl_context, cl_uint, const char**, const size_t*, cl_int*) = opencl_fn5::switch_fn; -cl_program (*clCreateProgramWithBinary)(cl_context, cl_uint, const cl_device_id*, const size_t*, const unsigned char**, cl_int*, cl_int*) = opencl_fn7::switch_fn; -cl_program (*clCreateProgramWithBuiltInKernels)(cl_context, cl_uint, const cl_device_id*, const char*, cl_int*) = opencl_fn5::switch_fn; -cl_int (*clRetainProgram)(cl_program) = opencl_fn1::switch_fn; -cl_int (*clReleaseProgram)(cl_program) = opencl_fn1::switch_fn; -cl_int (*clBuildProgram)(cl_program, cl_uint, const cl_device_id*, const char*, void (CL_CALLBACK*) (cl_program, void*), void*) = opencl_fn6::switch_fn; -cl_int (*clCompileProgram)(cl_program, cl_uint, const cl_device_id*, const char*, cl_uint, const cl_program*, const char**, void (CL_CALLBACK*) (cl_program, void*), void*) = opencl_fn9::switch_fn; -cl_program (*clLinkProgram)(cl_context, cl_uint, const cl_device_id*, const char*, cl_uint, const cl_program*, void (CL_CALLBACK*) (cl_program, void*), void*, cl_int*) = opencl_fn9::switch_fn; -cl_int (*clUnloadPlatformCompiler)(cl_platform_id) = opencl_fn1::switch_fn; -cl_int (*clGetProgramInfo)(cl_program, cl_program_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; -cl_int (*clGetProgramBuildInfo)(cl_program, cl_device_id, cl_program_build_info, size_t, void*, size_t*) = opencl_fn6::switch_fn; -cl_kernel (*clCreateKernel)(cl_program, const char*, cl_int*) = opencl_fn3::switch_fn; -cl_int (*clCreateKernelsInProgram)(cl_program, cl_uint, cl_kernel*, cl_uint*) = opencl_fn4::switch_fn; -cl_int (*clRetainKernel)(cl_kernel) = opencl_fn1::switch_fn; -cl_int (*clReleaseKernel)(cl_kernel) = opencl_fn1::switch_fn; -cl_int (*clSetKernelArg)(cl_kernel, cl_uint, size_t, const void*) = opencl_fn4::switch_fn; -cl_int (*clGetKernelInfo)(cl_kernel, cl_kernel_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; -cl_int (*clGetKernelArgInfo)(cl_kernel, cl_uint, cl_kernel_arg_info, size_t, void*, size_t*) = opencl_fn6::switch_fn; -cl_int (*clGetKernelWorkGroupInfo)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void*, size_t*) = opencl_fn6::switch_fn; -cl_int (*clWaitForEvents)(cl_uint, const cl_event*) = opencl_fn2::switch_fn; -cl_int (*clGetEventInfo)(cl_event, cl_event_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; -cl_event (*clCreateUserEvent)(cl_context, cl_int*) = opencl_fn2::switch_fn; -cl_int (*clRetainEvent)(cl_event) = opencl_fn1::switch_fn; -cl_int (*clReleaseEvent)(cl_event) = opencl_fn1::switch_fn; -cl_int (*clSetUserEventStatus)(cl_event, cl_int) = opencl_fn2::switch_fn; -cl_int (*clSetEventCallback)(cl_event, cl_int, void (CL_CALLBACK*) (cl_event, cl_int, void*), void*) = opencl_fn4::switch_fn; -cl_int (*clGetEventProfilingInfo)(cl_event, cl_profiling_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; -cl_int (*clFlush)(cl_command_queue) = opencl_fn1::switch_fn; -cl_int (*clFinish)(cl_command_queue) = opencl_fn1::switch_fn; -cl_int (*clEnqueueReadBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; -cl_int (*clEnqueueReadBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*) = opencl_fn14::switch_fn; -cl_int (*clEnqueueWriteBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; -cl_int (*clEnqueueWriteBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*) = opencl_fn14::switch_fn; -cl_int (*clEnqueueFillBuffer)(cl_command_queue, cl_mem, const void*, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; -cl_int (*clEnqueueCopyBuffer)(cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; -cl_int (*clEnqueueCopyBufferRect)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*) = opencl_fn13::switch_fn; -cl_int (*clEnqueueReadImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*) = opencl_fn11::switch_fn; -cl_int (*clEnqueueWriteImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*) = opencl_fn11::switch_fn; -cl_int (*clEnqueueFillImage)(cl_command_queue, cl_mem, const void*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*) = opencl_fn8::switch_fn; -cl_int (*clEnqueueCopyImage)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; -cl_int (*clEnqueueCopyImageToBuffer)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, size_t, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; -cl_int (*clEnqueueCopyBufferToImage)(cl_command_queue, cl_mem, cl_mem, size_t, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; -void* (*clEnqueueMapBuffer)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t, size_t, cl_uint, const cl_event*, cl_event*, cl_int*) = opencl_fn10::switch_fn; -void* (*clEnqueueMapImage)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, const size_t*, const size_t*, size_t*, size_t*, cl_uint, const cl_event*, cl_event*, cl_int*) = opencl_fn12::switch_fn; -cl_int (*clEnqueueUnmapMemObject)(cl_command_queue, cl_mem, void*, cl_uint, const cl_event*, cl_event*) = opencl_fn6::switch_fn; -cl_int (*clEnqueueMigrateMemObjects)(cl_command_queue, cl_uint, const cl_mem*, cl_mem_migration_flags, cl_uint, const cl_event*, cl_event*) = opencl_fn7::switch_fn; -cl_int (*clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; -cl_int (*clEnqueueTask)(cl_command_queue, cl_kernel, cl_uint, const cl_event*, cl_event*) = opencl_fn5::switch_fn; -cl_int (*clEnqueueNativeKernel)(cl_command_queue, void (CL_CALLBACK*) (void*), void*, size_t, cl_uint, const cl_mem*, const void**, cl_uint, const cl_event*, cl_event*) = opencl_fn10::switch_fn; -cl_int (*clEnqueueMarkerWithWaitList)(cl_command_queue, cl_uint, const cl_event*, cl_event*) = opencl_fn4::switch_fn; -cl_int (*clEnqueueBarrierWithWaitList)(cl_command_queue, cl_uint, const cl_event*, cl_event*) = opencl_fn4::switch_fn; -void* (*clGetExtensionFunctionAddressForPlatform)(cl_platform_id, const char*) = opencl_fn2::switch_fn; -cl_mem (*clCreateImage2D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, void*, cl_int*) = opencl_fn8::switch_fn; -cl_mem (*clCreateImage3D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, size_t, size_t, void*, cl_int*) = opencl_fn10::switch_fn; -cl_int (*clEnqueueMarker)(cl_command_queue, cl_event*) = opencl_fn2::switch_fn; -cl_int (*clEnqueueWaitForEvents)(cl_command_queue, cl_uint, const cl_event*) = opencl_fn3::switch_fn; -cl_int (*clEnqueueBarrier)(cl_command_queue) = opencl_fn1::switch_fn; -cl_int (*clUnloadCompiler)() = opencl_fn0::switch_fn; -void* (*clGetExtensionFunctionAddress)(const char*) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clGetPlatformIDs)(cl_uint, cl_platform_id*, cl_uint*) = opencl_fn3::switch_fn; +cl_int (CL_API_CALL*clGetPlatformInfo)(cl_platform_id, cl_platform_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (CL_API_CALL*clGetDeviceIDs)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*) = opencl_fn5::switch_fn; +cl_int (CL_API_CALL*clGetDeviceInfo)(cl_device_id, cl_device_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (CL_API_CALL*clCreateSubDevices)(cl_device_id, const cl_device_partition_property*, cl_uint, cl_device_id*, cl_uint*) = opencl_fn5::switch_fn; +cl_int (CL_API_CALL*clRetainDevice)(cl_device_id) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clReleaseDevice)(cl_device_id) = opencl_fn1::switch_fn; +cl_context (CL_API_CALL*clCreateContext)(const cl_context_properties*, cl_uint, const cl_device_id*, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*) = opencl_fn6::switch_fn; +cl_context (CL_API_CALL*clCreateContextFromType)(const cl_context_properties*, cl_device_type, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*) = opencl_fn5::switch_fn; +cl_int (CL_API_CALL*clRetainContext)(cl_context) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clReleaseContext)(cl_context) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clGetContextInfo)(cl_context, cl_context_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_command_queue (CL_API_CALL*clCreateCommandQueue)(cl_context, cl_device_id, cl_command_queue_properties, cl_int*) = opencl_fn4::switch_fn; +cl_int (CL_API_CALL*clRetainCommandQueue)(cl_command_queue) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clReleaseCommandQueue)(cl_command_queue) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clGetCommandQueueInfo)(cl_command_queue, cl_command_queue_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_mem (CL_API_CALL*clCreateBuffer)(cl_context, cl_mem_flags, size_t, void*, cl_int*) = opencl_fn5::switch_fn; +cl_mem (CL_API_CALL*clCreateSubBuffer)(cl_mem, cl_mem_flags, cl_buffer_create_type, const void*, cl_int*) = opencl_fn5::switch_fn; +cl_mem (CL_API_CALL*clCreateImage)(cl_context, cl_mem_flags, const cl_image_format*, const cl_image_desc*, void*, cl_int*) = opencl_fn6::switch_fn; +cl_int (CL_API_CALL*clRetainMemObject)(cl_mem) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clReleaseMemObject)(cl_mem) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clGetSupportedImageFormats)(cl_context, cl_mem_flags, cl_mem_object_type, cl_uint, cl_image_format*, cl_uint*) = opencl_fn6::switch_fn; +cl_int (CL_API_CALL*clGetMemObjectInfo)(cl_mem, cl_mem_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (CL_API_CALL*clGetImageInfo)(cl_mem, cl_image_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (CL_API_CALL*clSetMemObjectDestructorCallback)(cl_mem, void (CL_CALLBACK*) (cl_mem, void*), void*) = opencl_fn3::switch_fn; +cl_sampler (CL_API_CALL*clCreateSampler)(cl_context, cl_bool, cl_addressing_mode, cl_filter_mode, cl_int*) = opencl_fn5::switch_fn; +cl_int (CL_API_CALL*clRetainSampler)(cl_sampler) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clReleaseSampler)(cl_sampler) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clGetSamplerInfo)(cl_sampler, cl_sampler_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_program (CL_API_CALL*clCreateProgramWithSource)(cl_context, cl_uint, const char**, const size_t*, cl_int*) = opencl_fn5::switch_fn; +cl_program (CL_API_CALL*clCreateProgramWithBinary)(cl_context, cl_uint, const cl_device_id*, const size_t*, const unsigned char**, cl_int*, cl_int*) = opencl_fn7::switch_fn; +cl_program (CL_API_CALL*clCreateProgramWithBuiltInKernels)(cl_context, cl_uint, const cl_device_id*, const char*, cl_int*) = opencl_fn5::switch_fn; +cl_int (CL_API_CALL*clRetainProgram)(cl_program) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clReleaseProgram)(cl_program) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clBuildProgram)(cl_program, cl_uint, const cl_device_id*, const char*, void (CL_CALLBACK*) (cl_program, void*), void*) = opencl_fn6::switch_fn; +cl_int (CL_API_CALL*clCompileProgram)(cl_program, cl_uint, const cl_device_id*, const char*, cl_uint, const cl_program*, const char**, void (CL_CALLBACK*) (cl_program, void*), void*) = opencl_fn9::switch_fn; +cl_program (CL_API_CALL*clLinkProgram)(cl_context, cl_uint, const cl_device_id*, const char*, cl_uint, const cl_program*, void (CL_CALLBACK*) (cl_program, void*), void*, cl_int*) = opencl_fn9::switch_fn; +cl_int (CL_API_CALL*clUnloadPlatformCompiler)(cl_platform_id) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clGetProgramInfo)(cl_program, cl_program_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (CL_API_CALL*clGetProgramBuildInfo)(cl_program, cl_device_id, cl_program_build_info, size_t, void*, size_t*) = opencl_fn6::switch_fn; +cl_kernel (CL_API_CALL*clCreateKernel)(cl_program, const char*, cl_int*) = opencl_fn3::switch_fn; +cl_int (CL_API_CALL*clCreateKernelsInProgram)(cl_program, cl_uint, cl_kernel*, cl_uint*) = opencl_fn4::switch_fn; +cl_int (CL_API_CALL*clRetainKernel)(cl_kernel) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clReleaseKernel)(cl_kernel) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clSetKernelArg)(cl_kernel, cl_uint, size_t, const void*) = opencl_fn4::switch_fn; +cl_int (CL_API_CALL*clGetKernelInfo)(cl_kernel, cl_kernel_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (CL_API_CALL*clGetKernelArgInfo)(cl_kernel, cl_uint, cl_kernel_arg_info, size_t, void*, size_t*) = opencl_fn6::switch_fn; +cl_int (CL_API_CALL*clGetKernelWorkGroupInfo)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void*, size_t*) = opencl_fn6::switch_fn; +cl_int (CL_API_CALL*clWaitForEvents)(cl_uint, const cl_event*) = opencl_fn2::switch_fn; +cl_int (CL_API_CALL*clGetEventInfo)(cl_event, cl_event_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_event (CL_API_CALL*clCreateUserEvent)(cl_context, cl_int*) = opencl_fn2::switch_fn; +cl_int (CL_API_CALL*clRetainEvent)(cl_event) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clReleaseEvent)(cl_event) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clSetUserEventStatus)(cl_event, cl_int) = opencl_fn2::switch_fn; +cl_int (CL_API_CALL*clSetEventCallback)(cl_event, cl_int, void (CL_CALLBACK*) (cl_event, cl_int, void*), void*) = opencl_fn4::switch_fn; +cl_int (CL_API_CALL*clGetEventProfilingInfo)(cl_event, cl_profiling_info, size_t, void*, size_t*) = opencl_fn5::switch_fn; +cl_int (CL_API_CALL*clFlush)(cl_command_queue) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clFinish)(cl_command_queue) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clEnqueueReadBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (CL_API_CALL*clEnqueueReadBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*) = opencl_fn14::switch_fn; +cl_int (CL_API_CALL*clEnqueueWriteBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (CL_API_CALL*clEnqueueWriteBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*) = opencl_fn14::switch_fn; +cl_int (CL_API_CALL*clEnqueueFillBuffer)(cl_command_queue, cl_mem, const void*, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (CL_API_CALL*clEnqueueCopyBuffer)(cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (CL_API_CALL*clEnqueueCopyBufferRect)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*) = opencl_fn13::switch_fn; +cl_int (CL_API_CALL*clEnqueueReadImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*) = opencl_fn11::switch_fn; +cl_int (CL_API_CALL*clEnqueueWriteImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*) = opencl_fn11::switch_fn; +cl_int (CL_API_CALL*clEnqueueFillImage)(cl_command_queue, cl_mem, const void*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*) = opencl_fn8::switch_fn; +cl_int (CL_API_CALL*clEnqueueCopyImage)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (CL_API_CALL*clEnqueueCopyImageToBuffer)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, size_t, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (CL_API_CALL*clEnqueueCopyBufferToImage)(cl_command_queue, cl_mem, cl_mem, size_t, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +void* (CL_API_CALL*clEnqueueMapBuffer)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t, size_t, cl_uint, const cl_event*, cl_event*, cl_int*) = opencl_fn10::switch_fn; +void* (CL_API_CALL*clEnqueueMapImage)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, const size_t*, const size_t*, size_t*, size_t*, cl_uint, const cl_event*, cl_event*, cl_int*) = opencl_fn12::switch_fn; +cl_int (CL_API_CALL*clEnqueueUnmapMemObject)(cl_command_queue, cl_mem, void*, cl_uint, const cl_event*, cl_event*) = opencl_fn6::switch_fn; +cl_int (CL_API_CALL*clEnqueueMigrateMemObjects)(cl_command_queue, cl_uint, const cl_mem*, cl_mem_migration_flags, cl_uint, const cl_event*, cl_event*) = opencl_fn7::switch_fn; +cl_int (CL_API_CALL*clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*) = opencl_fn9::switch_fn; +cl_int (CL_API_CALL*clEnqueueTask)(cl_command_queue, cl_kernel, cl_uint, const cl_event*, cl_event*) = opencl_fn5::switch_fn; +cl_int (CL_API_CALL*clEnqueueNativeKernel)(cl_command_queue, void (CL_CALLBACK*) (void*), void*, size_t, cl_uint, const cl_mem*, const void**, cl_uint, const cl_event*, cl_event*) = opencl_fn10::switch_fn; +cl_int (CL_API_CALL*clEnqueueMarkerWithWaitList)(cl_command_queue, cl_uint, const cl_event*, cl_event*) = opencl_fn4::switch_fn; +cl_int (CL_API_CALL*clEnqueueBarrierWithWaitList)(cl_command_queue, cl_uint, const cl_event*, cl_event*) = opencl_fn4::switch_fn; +void* (CL_API_CALL*clGetExtensionFunctionAddressForPlatform)(cl_platform_id, const char*) = opencl_fn2::switch_fn; +cl_mem (CL_API_CALL*clCreateImage2D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, void*, cl_int*) = opencl_fn8::switch_fn; +cl_mem (CL_API_CALL*clCreateImage3D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, size_t, size_t, void*, cl_int*) = opencl_fn10::switch_fn; +cl_int (CL_API_CALL*clEnqueueMarker)(cl_command_queue, cl_event*) = opencl_fn2::switch_fn; +cl_int (CL_API_CALL*clEnqueueWaitForEvents)(cl_command_queue, cl_uint, const cl_event*) = opencl_fn3::switch_fn; +cl_int (CL_API_CALL*clEnqueueBarrier)(cl_command_queue) = opencl_fn1::switch_fn; +cl_int (CL_API_CALL*clUnloadCompiler)() = opencl_fn0::switch_fn; +void* (CL_API_CALL*clGetExtensionFunctionAddress)(const char*) = opencl_fn1::switch_fn; // generated by parser_cl.py void* opencl_fn_ptrs[] = { From 90df620debfbfa4ba2a4fa08f6646ecf04fe8f80 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 30 Sep 2013 20:33:56 +0400 Subject: [PATCH 36/39] ocl: stereo_csbp: divUp -> roundUp --- modules/ocl/src/stereo_csbp.cpp | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/modules/ocl/src/stereo_csbp.cpp b/modules/ocl/src/stereo_csbp.cpp index f9e86442bb..13aaf78dec 100644 --- a/modules/ocl/src/stereo_csbp.cpp +++ b/modules/ocl/src/stereo_csbp.cpp @@ -179,10 +179,7 @@ namespace cv //size_t blockSize = 256; size_t localThreads[] = {32, 8 ,1}; - size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0], - divUp(h, localThreads[1]) *localThreads[1], - 1 - }; + size_t globalThreads[] = { roundUp(w, localThreads[0]), roundUp(h, localThreads[1]), 1 }; int disp_step = msg_step * h; openCLVerifyKernel(clCxt, kernel, localThreads); @@ -274,11 +271,8 @@ namespace cv cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName); - size_t localThreads[] = {32, 8, 1}; - size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0], - divUp(h, localThreads[1]) *localThreads[1], - 1 - }; + size_t localThreads[] = { 32, 8, 1 }; + size_t globalThreads[] = { roundUp(w, localThreads[0]), roundUp(h, localThreads[1]), 1 }; int disp_step1 = msg_step1 * h; int disp_step2 = msg_step2 * h2; @@ -387,10 +381,7 @@ namespace cv //size_t blockSize = 256; size_t localThreads[] = {32, 8, 1}; - size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0], - divUp(h, localThreads[1]) *localThreads[1], - 1 - }; + size_t globalThreads[] = { roundUp(w, localThreads[0]), roundUp(h, localThreads[1]), 1 }; int disp_step1 = msg_step1 * h; int disp_step2 = msg_step2 * h2; @@ -490,11 +481,8 @@ namespace cv cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName); //size_t blockSize = 256; - size_t localThreads[] = {32, 8, 1}; - size_t globalThreads[] = {divUp(disp.cols, localThreads[0]) *localThreads[0], - divUp(disp.rows, localThreads[1]) *localThreads[1], - 1 - }; + size_t localThreads[] = { 32, 8, 1 }; + size_t globalThreads[] = { roundUp(disp.cols, localThreads[0]), roundUp(disp.rows, localThreads[1]), 1 }; int step_size = disp.step / disp.elemSize(); int disp_step = disp.rows * msg_step; From 3cfad385ec2687c71b462cba91e0fba5a2349b7e Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Fri, 4 Oct 2013 14:23:06 +0400 Subject: [PATCH 37/39] Fix a -Wshadow warning in cap_openni.cpp. --- modules/highgui/src/cap_openni.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/highgui/src/cap_openni.cpp b/modules/highgui/src/cap_openni.cpp index 59e19d14a3..51877128a3 100644 --- a/modules/highgui/src/cap_openni.cpp +++ b/modules/highgui/src/cap_openni.cpp @@ -310,15 +310,15 @@ private: class TBBApproximateSynchronizer: public ApproximateSynchronizerBase { public: - TBBApproximateSynchronizer( ApproximateSyncGrabber& approxSyncGrabber ) : - ApproximateSynchronizerBase(approxSyncGrabber) + TBBApproximateSynchronizer( ApproximateSyncGrabber& _approxSyncGrabber ) : + ApproximateSynchronizerBase(_approxSyncGrabber) { setMaxBufferSize(); } void setMaxBufferSize() { - int maxBufferSize = ApproximateSynchronizerBase::approxSyncGrabber.getMaxBufferSize(); + int maxBufferSize = approxSyncGrabber.getMaxBufferSize(); if( maxBufferSize >= 0 ) { depthQueue.set_capacity( maxBufferSize ); From adc516becc804f5af05437ea7c15d46d295860ef Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Fri, 4 Oct 2013 15:14:21 +0400 Subject: [PATCH 38/39] fixed OpenCVConfig.cmake.in file for CUDA 5.5 toolkit --- cmake/templates/OpenCVConfig.cmake.in | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/cmake/templates/OpenCVConfig.cmake.in b/cmake/templates/OpenCVConfig.cmake.in index 7441b59d59..45cd5d3299 100644 --- a/cmake/templates/OpenCVConfig.cmake.in +++ b/cmake/templates/OpenCVConfig.cmake.in @@ -231,7 +231,16 @@ foreach(__opttype OPT DBG) endif() endif() - list(APPEND OpenCV_EXTRA_LIBS_${__opttype} ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY}) + list(APPEND OpenCV_EXTRA_LIBS_${__opttype} ${CUDA_LIBRARIES}) + + if(${CUDA_VERSION} VERSION_LESS "5.5") + list(APPEND OpenCV_EXTRA_LIBS_${__opttype} ${CUDA_npp_LIBRARY}) + else() + find_cuda_helper_libs(nppc) + find_cuda_helper_libs(nppi) + find_cuda_helper_libs(npps) + list(APPEND OpenCV_EXTRA_LIBS_${__opttype} ${CUDA_nppc_LIBRARY} ${CUDA_nppi_LIBRARY} ${CUDA_npps_LIBRARY}) + endif() if(OpenCV_USE_CUBLAS) list(APPEND OpenCV_EXTRA_LIBS_${__opttype} ${CUDA_CUBLAS_LIBRARIES}) From 076eeffd4d63264301735bc813d27cc82fe419d8 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Tue, 8 Oct 2013 19:13:10 +0400 Subject: [PATCH 39/39] ocl: fix CL_RUNTIME_EXPORT for master branch --- modules/ocl/src/cl_runtime/cl_runtime.cpp | 7 ------- modules/ocl/src/match_template.cpp | 2 +- modules/ocl/src/precomp.hpp | 8 +++++++- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/modules/ocl/src/cl_runtime/cl_runtime.cpp b/modules/ocl/src/cl_runtime/cl_runtime.cpp index c237afe74c..6752467cb9 100644 --- a/modules/ocl/src/cl_runtime/cl_runtime.cpp +++ b/modules/ocl/src/cl_runtime/cl_runtime.cpp @@ -2,13 +2,6 @@ #if defined(HAVE_OPENCL) && (!defined(__APPLE__) || defined(IOS)) -#if defined(BUILD_SHARED_LIBS) && (defined WIN32 || defined _WIN32 || defined WINCE) -#define CL_RUNTIME_EXPORT __declspec(dllexport) -#else -#define CL_RUNTIME_EXPORT -#endif - - #include "opencv2/ocl/cl_runtime/cl_runtime.hpp" #if defined(__APPLE__) diff --git a/modules/ocl/src/match_template.cpp b/modules/ocl/src/match_template.cpp index cbcd6b3803..9720c74214 100644 --- a/modules/ocl/src/match_template.cpp +++ b/modules/ocl/src/match_template.cpp @@ -91,7 +91,7 @@ namespace cv static bool useNaive(int method, int depth, Size size) { #ifdef HAVE_CLAMDFFT - if (method == TM_SQDIFF && (depth == CV_32F || !Context::getContext()->supportsFeature(Context::CL_DOUBLE))) + if (method == TM_SQDIFF && (depth == CV_32F || !Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))) { return true; } diff --git a/modules/ocl/src/precomp.hpp b/modules/ocl/src/precomp.hpp index 140dcdb9d7..b7ef58b1f8 100644 --- a/modules/ocl/src/precomp.hpp +++ b/modules/ocl/src/precomp.hpp @@ -58,8 +58,14 @@ #include "cvconfig.h" -#if defined(BUILD_SHARED_LIBS) && (defined WIN32 || defined _WIN32 || defined WINCE) +#if defined(BUILD_SHARED_LIBS) +#if defined WIN32 || defined _WIN32 || defined WINCE #define CL_RUNTIME_EXPORT __declspec(dllexport) +#elif defined __GNUC__ && __GNUC__ >= 4 +#define CL_RUNTIME_EXPORT __attribute__ ((visibility ("default"))) +#else +#define CL_RUNTIME_EXPORT +#endif #else #define CL_RUNTIME_EXPORT #endif