diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index bd31c170d3..fcaff4e549 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -125,6 +125,24 @@ namespace cv Impl *impl; }; + //! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing. + CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt , + const char **source, string kernelName, + size_t globalThreads[3], size_t localThreads[3], + std::vector< std::pair > &args, + int channels, int depth, const char *build_options, + bool finish = true, bool measureKernelTime = false, + bool cleanUp = true); + + //! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing. + CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt , + const char **fileName, const int numFiles, string kernelName, + size_t globalThreads[3], size_t localThreads[3], + std::vector< std::pair > &args, + int channels, int depth, const char *build_options, + bool finish = true, bool measureKernelTime = false, + bool cleanUp = true); + class CV_EXPORTS oclMatExpr; //////////////////////////////// oclMat //////////////////////////////// class CV_EXPORTS oclMat diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp index 643626bce1..8f8c19b945 100644 --- a/modules/ocl/src/initialization.cpp +++ b/modules/ocl/src/initialization.cpp @@ -47,6 +47,7 @@ #include "precomp.hpp" #include +#include #include "binarycaching.hpp" using namespace cv; @@ -729,8 +730,139 @@ namespace cv cout << "average kernel total time: " << total_kernel_time / RUN_TIMES << endl; // "ms" << endl; #endif } + + double openCLExecuteKernelInterop(Context *clCxt , const char **source, string kernelName, + size_t globalThreads[3], size_t localThreads[3], + vector< pair > &args, int channels, int depth, const char *build_options, + bool finish, bool measureKernelTime, bool cleanUp) - cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value, + { + //construct kernel name + //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number + //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char) + stringstream idxStr; + if(channels != -1) + idxStr << "_C" << channels; + if(depth != -1) + idxStr << "_D" << depth; + kernelName += idxStr.str(); + + cl_kernel kernel; + kernel = openCLGetKernelFromSource(clCxt, source, kernelName, build_options); + + double kernelTime = 0.0; + + if( globalThreads != NULL) + { + if ( localThreads != NULL) + { + globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0]; + globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1]; + globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2]; + + //size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2]; + cv::ocl::openCLVerifyKernel(clCxt, kernel, localThreads); + } + for(size_t i = 0; i < args.size(); i ++) + openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second)); + + if(measureKernelTime == false) + { + openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads, + localThreads, 0, NULL, NULL)); + } + else + { + cl_event event = NULL; + openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads, + localThreads, 0, NULL, &event)); + + cl_ulong end_time, queue_time; + + openCLSafeCall(clWaitForEvents(1, &event)); + + openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, + sizeof(cl_ulong), &end_time, 0)); + + openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, + sizeof(cl_ulong), &queue_time, 0)); + + kernelTime = (double)(end_time - queue_time) / (1000 * 1000); + + clReleaseEvent(event); + } + } + + if(finish) + { + clFinish(clCxt->impl->clCmdQueue); + } + + if(cleanUp) + { + openCLSafeCall(clReleaseKernel(kernel)); + } + + return kernelTime; + } + + // Converts the contents of a file into a string + static int convertToString(const char *filename, std::string& s) + { + size_t size; + char* str; + + std::fstream f(filename, (std::fstream::in | std::fstream::binary)); + if(f.is_open()) + { + size_t fileSize; + f.seekg(0, std::fstream::end); + size = fileSize = (size_t)f.tellg(); + f.seekg(0, std::fstream::beg); + + str = new char[size+1]; + if(!str) + { + f.close(); + return -1; + } + + f.read(str, fileSize); + f.close(); + str[size] = '\0'; + + s = str; + delete[] str; + return 0; + } + printf("Error: Failed to open file %s\n", filename); + return -1; + } + + double openCLExecuteKernelInterop(Context *clCxt , const char **fileName, const int numFiles, string kernelName, + size_t globalThreads[3], size_t localThreads[3], + vector< pair > &args, int channels, int depth, const char *build_options, + bool finish, bool measureKernelTime, bool cleanUp) + + { + std::vector fsource; + for (int i = 0 ; i < numFiles ; i++) + { + std::string str; + if (convertToString(fileName[i], str) >= 0) + fsource.push_back(str); + } + const char **source = new const char *[numFiles]; + for (int i = 0 ; i < numFiles ; i++) + source[i] = fsource[i].c_str(); + double kernelTime = openCLExecuteKernelInterop(clCxt ,source, kernelName, globalThreads, localThreads, + args, channels, depth, build_options, finish, measureKernelTime, cleanUp); + fsource.clear(); + delete []source; + return kernelTime; + } + + cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value, const size_t size) { int status;