mirror of https://github.com/opencv/opencv.git
Merge pull request #1561 from alalek:ocl_refactoring
commit
8224f9843e
69 changed files with 2999 additions and 2171 deletions
@ -0,0 +1,231 @@ |
||||
//
|
||||
// AUTOGENERATED, DO NOT EDIT
|
||||
//
|
||||
#ifndef __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__ |
||||
#define __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__ |
||||
|
||||
// generated by parser_cl.py
|
||||
#undef clGetPlatformIDs |
||||
#define clGetPlatformIDs clGetPlatformIDs_fn |
||||
inline cl_int clGetPlatformIDs(cl_uint p0, cl_platform_id* p1, cl_uint* p2) { return clGetPlatformIDs_pfn(p0, p1, p2); } |
||||
#undef clGetPlatformInfo |
||||
#define clGetPlatformInfo clGetPlatformInfo_fn |
||||
inline cl_int clGetPlatformInfo(cl_platform_id p0, cl_platform_info p1, size_t p2, void* p3, size_t* p4) { return clGetPlatformInfo_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clGetDeviceIDs |
||||
#define clGetDeviceIDs clGetDeviceIDs_fn |
||||
inline cl_int clGetDeviceIDs(cl_platform_id p0, cl_device_type p1, cl_uint p2, cl_device_id* p3, cl_uint* p4) { return clGetDeviceIDs_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clGetDeviceInfo |
||||
#define clGetDeviceInfo clGetDeviceInfo_fn |
||||
inline cl_int clGetDeviceInfo(cl_device_id p0, cl_device_info p1, size_t p2, void* p3, size_t* p4) { return clGetDeviceInfo_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clCreateContext |
||||
#define clCreateContext clCreateContext_fn |
||||
inline cl_context clCreateContext(const cl_context_properties* p0, cl_uint p1, const cl_device_id* p2, void (CL_CALLBACK*p3) (const char*, const void*, size_t, void*), void* p4, cl_int* p5) { return clCreateContext_pfn(p0, p1, p2, p3, p4, p5); } |
||||
#undef clCreateContextFromType |
||||
#define clCreateContextFromType clCreateContextFromType_fn |
||||
inline cl_context clCreateContextFromType(const cl_context_properties* p0, cl_device_type p1, void (CL_CALLBACK*p2) (const char*, const void*, size_t, void*), void* p3, cl_int* p4) { return clCreateContextFromType_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clRetainContext |
||||
#define clRetainContext clRetainContext_fn |
||||
inline cl_int clRetainContext(cl_context p0) { return clRetainContext_pfn(p0); } |
||||
#undef clReleaseContext |
||||
#define clReleaseContext clReleaseContext_fn |
||||
inline cl_int clReleaseContext(cl_context p0) { return clReleaseContext_pfn(p0); } |
||||
#undef clGetContextInfo |
||||
#define clGetContextInfo clGetContextInfo_fn |
||||
inline cl_int clGetContextInfo(cl_context p0, cl_context_info p1, size_t p2, void* p3, size_t* p4) { return clGetContextInfo_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clCreateCommandQueue |
||||
#define clCreateCommandQueue clCreateCommandQueue_fn |
||||
inline cl_command_queue clCreateCommandQueue(cl_context p0, cl_device_id p1, cl_command_queue_properties p2, cl_int* p3) { return clCreateCommandQueue_pfn(p0, p1, p2, p3); } |
||||
#undef clRetainCommandQueue |
||||
#define clRetainCommandQueue clRetainCommandQueue_fn |
||||
inline cl_int clRetainCommandQueue(cl_command_queue p0) { return clRetainCommandQueue_pfn(p0); } |
||||
#undef clReleaseCommandQueue |
||||
#define clReleaseCommandQueue clReleaseCommandQueue_fn |
||||
inline cl_int clReleaseCommandQueue(cl_command_queue p0) { return clReleaseCommandQueue_pfn(p0); } |
||||
#undef clGetCommandQueueInfo |
||||
#define clGetCommandQueueInfo clGetCommandQueueInfo_fn |
||||
inline cl_int clGetCommandQueueInfo(cl_command_queue p0, cl_command_queue_info p1, size_t p2, void* p3, size_t* p4) { return clGetCommandQueueInfo_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clSetCommandQueueProperty |
||||
#define clSetCommandQueueProperty clSetCommandQueueProperty_fn |
||||
inline cl_int clSetCommandQueueProperty(cl_command_queue p0, cl_command_queue_properties p1, cl_bool p2, cl_command_queue_properties* p3) { return clSetCommandQueueProperty_pfn(p0, p1, p2, p3); } |
||||
#undef clCreateBuffer |
||||
#define clCreateBuffer clCreateBuffer_fn |
||||
inline cl_mem clCreateBuffer(cl_context p0, cl_mem_flags p1, size_t p2, void* p3, cl_int* p4) { return clCreateBuffer_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clCreateSubBuffer |
||||
#define clCreateSubBuffer clCreateSubBuffer_fn |
||||
inline cl_mem clCreateSubBuffer(cl_mem p0, cl_mem_flags p1, cl_buffer_create_type p2, const void* p3, cl_int* p4) { return clCreateSubBuffer_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clCreateImage2D |
||||
#define clCreateImage2D clCreateImage2D_fn |
||||
inline cl_mem clCreateImage2D(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, size_t p3, size_t p4, size_t p5, void* p6, cl_int* p7) { return clCreateImage2D_pfn(p0, p1, p2, p3, p4, p5, p6, p7); } |
||||
#undef clCreateImage3D |
||||
#define clCreateImage3D clCreateImage3D_fn |
||||
inline cl_mem clCreateImage3D(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, size_t p3, size_t p4, size_t p5, size_t p6, size_t p7, void* p8, cl_int* p9) { return clCreateImage3D_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } |
||||
#undef clRetainMemObject |
||||
#define clRetainMemObject clRetainMemObject_fn |
||||
inline cl_int clRetainMemObject(cl_mem p0) { return clRetainMemObject_pfn(p0); } |
||||
#undef clReleaseMemObject |
||||
#define clReleaseMemObject clReleaseMemObject_fn |
||||
inline cl_int clReleaseMemObject(cl_mem p0) { return clReleaseMemObject_pfn(p0); } |
||||
#undef clGetSupportedImageFormats |
||||
#define clGetSupportedImageFormats clGetSupportedImageFormats_fn |
||||
inline cl_int clGetSupportedImageFormats(cl_context p0, cl_mem_flags p1, cl_mem_object_type p2, cl_uint p3, cl_image_format* p4, cl_uint* p5) { return clGetSupportedImageFormats_pfn(p0, p1, p2, p3, p4, p5); } |
||||
#undef clGetMemObjectInfo |
||||
#define clGetMemObjectInfo clGetMemObjectInfo_fn |
||||
inline cl_int clGetMemObjectInfo(cl_mem p0, cl_mem_info p1, size_t p2, void* p3, size_t* p4) { return clGetMemObjectInfo_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clGetImageInfo |
||||
#define clGetImageInfo clGetImageInfo_fn |
||||
inline cl_int clGetImageInfo(cl_mem p0, cl_image_info p1, size_t p2, void* p3, size_t* p4) { return clGetImageInfo_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clSetMemObjectDestructorCallback |
||||
#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_fn |
||||
inline cl_int clSetMemObjectDestructorCallback(cl_mem p0, void (CL_CALLBACK*p1) (cl_mem, void*), void* p2) { return clSetMemObjectDestructorCallback_pfn(p0, p1, p2); } |
||||
#undef clCreateSampler |
||||
#define clCreateSampler clCreateSampler_fn |
||||
inline cl_sampler clCreateSampler(cl_context p0, cl_bool p1, cl_addressing_mode p2, cl_filter_mode p3, cl_int* p4) { return clCreateSampler_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clRetainSampler |
||||
#define clRetainSampler clRetainSampler_fn |
||||
inline cl_int clRetainSampler(cl_sampler p0) { return clRetainSampler_pfn(p0); } |
||||
#undef clReleaseSampler |
||||
#define clReleaseSampler clReleaseSampler_fn |
||||
inline cl_int clReleaseSampler(cl_sampler p0) { return clReleaseSampler_pfn(p0); } |
||||
#undef clGetSamplerInfo |
||||
#define clGetSamplerInfo clGetSamplerInfo_fn |
||||
inline cl_int clGetSamplerInfo(cl_sampler p0, cl_sampler_info p1, size_t p2, void* p3, size_t* p4) { return clGetSamplerInfo_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clCreateProgramWithSource |
||||
#define clCreateProgramWithSource clCreateProgramWithSource_fn |
||||
inline cl_program clCreateProgramWithSource(cl_context p0, cl_uint p1, const char** p2, const size_t* p3, cl_int* p4) { return clCreateProgramWithSource_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clCreateProgramWithBinary |
||||
#define clCreateProgramWithBinary clCreateProgramWithBinary_fn |
||||
inline cl_program clCreateProgramWithBinary(cl_context p0, cl_uint p1, const cl_device_id* p2, const size_t* p3, const unsigned char** p4, cl_int* p5, cl_int* p6) { return clCreateProgramWithBinary_pfn(p0, p1, p2, p3, p4, p5, p6); } |
||||
#undef clRetainProgram |
||||
#define clRetainProgram clRetainProgram_fn |
||||
inline cl_int clRetainProgram(cl_program p0) { return clRetainProgram_pfn(p0); } |
||||
#undef clReleaseProgram |
||||
#define clReleaseProgram clReleaseProgram_fn |
||||
inline cl_int clReleaseProgram(cl_program p0) { return clReleaseProgram_pfn(p0); } |
||||
#undef clBuildProgram |
||||
#define clBuildProgram clBuildProgram_fn |
||||
inline cl_int clBuildProgram(cl_program p0, cl_uint p1, const cl_device_id* p2, const char* p3, void (CL_CALLBACK*p4) (cl_program, void*), void* p5) { return clBuildProgram_pfn(p0, p1, p2, p3, p4, p5); } |
||||
#undef clUnloadCompiler |
||||
#define clUnloadCompiler clUnloadCompiler_fn |
||||
inline cl_int clUnloadCompiler() { return clUnloadCompiler_pfn(); } |
||||
#undef clGetProgramInfo |
||||
#define clGetProgramInfo clGetProgramInfo_fn |
||||
inline cl_int clGetProgramInfo(cl_program p0, cl_program_info p1, size_t p2, void* p3, size_t* p4) { return clGetProgramInfo_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clGetProgramBuildInfo |
||||
#define clGetProgramBuildInfo clGetProgramBuildInfo_fn |
||||
inline cl_int clGetProgramBuildInfo(cl_program p0, cl_device_id p1, cl_program_build_info p2, size_t p3, void* p4, size_t* p5) { return clGetProgramBuildInfo_pfn(p0, p1, p2, p3, p4, p5); } |
||||
#undef clCreateKernel |
||||
#define clCreateKernel clCreateKernel_fn |
||||
inline cl_kernel clCreateKernel(cl_program p0, const char* p1, cl_int* p2) { return clCreateKernel_pfn(p0, p1, p2); } |
||||
#undef clCreateKernelsInProgram |
||||
#define clCreateKernelsInProgram clCreateKernelsInProgram_fn |
||||
inline cl_int clCreateKernelsInProgram(cl_program p0, cl_uint p1, cl_kernel* p2, cl_uint* p3) { return clCreateKernelsInProgram_pfn(p0, p1, p2, p3); } |
||||
#undef clRetainKernel |
||||
#define clRetainKernel clRetainKernel_fn |
||||
inline cl_int clRetainKernel(cl_kernel p0) { return clRetainKernel_pfn(p0); } |
||||
#undef clReleaseKernel |
||||
#define clReleaseKernel clReleaseKernel_fn |
||||
inline cl_int clReleaseKernel(cl_kernel p0) { return clReleaseKernel_pfn(p0); } |
||||
#undef clSetKernelArg |
||||
#define clSetKernelArg clSetKernelArg_fn |
||||
inline cl_int clSetKernelArg(cl_kernel p0, cl_uint p1, size_t p2, const void* p3) { return clSetKernelArg_pfn(p0, p1, p2, p3); } |
||||
#undef clGetKernelInfo |
||||
#define clGetKernelInfo clGetKernelInfo_fn |
||||
inline cl_int clGetKernelInfo(cl_kernel p0, cl_kernel_info p1, size_t p2, void* p3, size_t* p4) { return clGetKernelInfo_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clGetKernelWorkGroupInfo |
||||
#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_fn |
||||
inline cl_int clGetKernelWorkGroupInfo(cl_kernel p0, cl_device_id p1, cl_kernel_work_group_info p2, size_t p3, void* p4, size_t* p5) { return clGetKernelWorkGroupInfo_pfn(p0, p1, p2, p3, p4, p5); } |
||||
#undef clWaitForEvents |
||||
#define clWaitForEvents clWaitForEvents_fn |
||||
inline cl_int clWaitForEvents(cl_uint p0, const cl_event* p1) { return clWaitForEvents_pfn(p0, p1); } |
||||
#undef clGetEventInfo |
||||
#define clGetEventInfo clGetEventInfo_fn |
||||
inline cl_int clGetEventInfo(cl_event p0, cl_event_info p1, size_t p2, void* p3, size_t* p4) { return clGetEventInfo_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clCreateUserEvent |
||||
#define clCreateUserEvent clCreateUserEvent_fn |
||||
inline cl_event clCreateUserEvent(cl_context p0, cl_int* p1) { return clCreateUserEvent_pfn(p0, p1); } |
||||
#undef clRetainEvent |
||||
#define clRetainEvent clRetainEvent_fn |
||||
inline cl_int clRetainEvent(cl_event p0) { return clRetainEvent_pfn(p0); } |
||||
#undef clReleaseEvent |
||||
#define clReleaseEvent clReleaseEvent_fn |
||||
inline cl_int clReleaseEvent(cl_event p0) { return clReleaseEvent_pfn(p0); } |
||||
#undef clSetUserEventStatus |
||||
#define clSetUserEventStatus clSetUserEventStatus_fn |
||||
inline cl_int clSetUserEventStatus(cl_event p0, cl_int p1) { return clSetUserEventStatus_pfn(p0, p1); } |
||||
#undef clSetEventCallback |
||||
#define clSetEventCallback clSetEventCallback_fn |
||||
inline cl_int clSetEventCallback(cl_event p0, cl_int p1, void (CL_CALLBACK*p2) (cl_event, cl_int, void*), void* p3) { return clSetEventCallback_pfn(p0, p1, p2, p3); } |
||||
#undef clGetEventProfilingInfo |
||||
#define clGetEventProfilingInfo clGetEventProfilingInfo_fn |
||||
inline cl_int clGetEventProfilingInfo(cl_event p0, cl_profiling_info p1, size_t p2, void* p3, size_t* p4) { return clGetEventProfilingInfo_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clFlush |
||||
#define clFlush clFlush_fn |
||||
inline cl_int clFlush(cl_command_queue p0) { return clFlush_pfn(p0); } |
||||
#undef clFinish |
||||
#define clFinish clFinish_fn |
||||
inline cl_int clFinish(cl_command_queue p0) { return clFinish_pfn(p0); } |
||||
#undef clEnqueueReadBuffer |
||||
#define clEnqueueReadBuffer clEnqueueReadBuffer_fn |
||||
inline cl_int clEnqueueReadBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, size_t p3, size_t p4, void* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueReadBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } |
||||
#undef clEnqueueReadBufferRect |
||||
#define clEnqueueReadBufferRect clEnqueueReadBufferRect_fn |
||||
inline cl_int clEnqueueReadBufferRect(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, void* p10, cl_uint p11, const cl_event* p12, cl_event* p13) { return clEnqueueReadBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); } |
||||
#undef clEnqueueWriteBuffer |
||||
#define clEnqueueWriteBuffer clEnqueueWriteBuffer_fn |
||||
inline cl_int clEnqueueWriteBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, size_t p3, size_t p4, const void* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueWriteBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } |
||||
#undef clEnqueueWriteBufferRect |
||||
#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_fn |
||||
inline cl_int clEnqueueWriteBufferRect(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, const void* p10, cl_uint p11, const cl_event* p12, cl_event* p13) { return clEnqueueWriteBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); } |
||||
#undef clEnqueueCopyBuffer |
||||
#define clEnqueueCopyBuffer clEnqueueCopyBuffer_fn |
||||
inline cl_int clEnqueueCopyBuffer(cl_command_queue p0, cl_mem p1, cl_mem p2, size_t p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } |
||||
#undef clEnqueueCopyBufferRect |
||||
#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_fn |
||||
inline cl_int clEnqueueCopyBufferRect(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, cl_uint p10, const cl_event* p11, cl_event* p12) { return clEnqueueCopyBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12); } |
||||
#undef clEnqueueReadImage |
||||
#define clEnqueueReadImage clEnqueueReadImage_fn |
||||
inline cl_int clEnqueueReadImage(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, size_t p5, size_t p6, void* p7, cl_uint p8, const cl_event* p9, cl_event* p10) { return clEnqueueReadImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); } |
||||
#undef clEnqueueWriteImage |
||||
#define clEnqueueWriteImage clEnqueueWriteImage_fn |
||||
inline cl_int clEnqueueWriteImage(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, size_t p5, size_t p6, const void* p7, cl_uint p8, const cl_event* p9, cl_event* p10) { return clEnqueueWriteImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); } |
||||
#undef clEnqueueCopyImage |
||||
#define clEnqueueCopyImage clEnqueueCopyImage_fn |
||||
inline cl_int clEnqueueCopyImage(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } |
||||
#undef clEnqueueCopyImageToBuffer |
||||
#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_fn |
||||
inline cl_int clEnqueueCopyImageToBuffer(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyImageToBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } |
||||
#undef clEnqueueCopyBufferToImage |
||||
#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_fn |
||||
inline cl_int clEnqueueCopyBufferToImage(cl_command_queue p0, cl_mem p1, cl_mem p2, size_t p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyBufferToImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } |
||||
#undef clEnqueueMapBuffer |
||||
#define clEnqueueMapBuffer clEnqueueMapBuffer_fn |
||||
inline void* clEnqueueMapBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, cl_map_flags p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8, cl_int* p9) { return clEnqueueMapBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } |
||||
#undef clEnqueueMapImage |
||||
#define clEnqueueMapImage clEnqueueMapImage_fn |
||||
inline void* clEnqueueMapImage(cl_command_queue p0, cl_mem p1, cl_bool p2, cl_map_flags p3, const size_t* p4, const size_t* p5, size_t* p6, size_t* p7, cl_uint p8, const cl_event* p9, cl_event* p10, cl_int* p11) { return clEnqueueMapImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11); } |
||||
#undef clEnqueueUnmapMemObject |
||||
#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_fn |
||||
inline cl_int clEnqueueUnmapMemObject(cl_command_queue p0, cl_mem p1, void* p2, cl_uint p3, const cl_event* p4, cl_event* p5) { return clEnqueueUnmapMemObject_pfn(p0, p1, p2, p3, p4, p5); } |
||||
#undef clEnqueueNDRangeKernel |
||||
#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_fn |
||||
inline cl_int clEnqueueNDRangeKernel(cl_command_queue p0, cl_kernel p1, cl_uint p2, const size_t* p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueNDRangeKernel_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } |
||||
#undef clEnqueueTask |
||||
#define clEnqueueTask clEnqueueTask_fn |
||||
inline cl_int clEnqueueTask(cl_command_queue p0, cl_kernel p1, cl_uint p2, const cl_event* p3, cl_event* p4) { return clEnqueueTask_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clEnqueueNativeKernel |
||||
#define clEnqueueNativeKernel clEnqueueNativeKernel_fn |
||||
inline cl_int clEnqueueNativeKernel(cl_command_queue p0, void (CL_CALLBACK*p1) (void*), void* p2, size_t p3, cl_uint p4, const cl_mem* p5, const void** p6, cl_uint p7, const cl_event* p8, cl_event* p9) { return clEnqueueNativeKernel_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } |
||||
#undef clEnqueueMarker |
||||
#define clEnqueueMarker clEnqueueMarker_fn |
||||
inline cl_int clEnqueueMarker(cl_command_queue p0, cl_event* p1) { return clEnqueueMarker_pfn(p0, p1); } |
||||
#undef clEnqueueWaitForEvents |
||||
#define clEnqueueWaitForEvents clEnqueueWaitForEvents_fn |
||||
inline cl_int clEnqueueWaitForEvents(cl_command_queue p0, cl_uint p1, const cl_event* p2) { return clEnqueueWaitForEvents_pfn(p0, p1, p2); } |
||||
#undef clEnqueueBarrier |
||||
#define clEnqueueBarrier clEnqueueBarrier_fn |
||||
inline cl_int clEnqueueBarrier(cl_command_queue p0) { return clEnqueueBarrier_pfn(p0); } |
||||
#undef clGetExtensionFunctionAddress |
||||
#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_fn |
||||
inline void* clGetExtensionFunctionAddress(const char* p0) { return clGetExtensionFunctionAddress_pfn(p0); } |
||||
|
||||
#endif // __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__
|
@ -0,0 +1,273 @@ |
||||
//
|
||||
// AUTOGENERATED, DO NOT EDIT
|
||||
//
|
||||
#ifndef __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__ |
||||
#define __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__ |
||||
|
||||
// generated by parser_cl.py
|
||||
#undef clGetPlatformIDs |
||||
#define clGetPlatformIDs clGetPlatformIDs_fn |
||||
inline cl_int clGetPlatformIDs(cl_uint p0, cl_platform_id* p1, cl_uint* p2) { return clGetPlatformIDs_pfn(p0, p1, p2); } |
||||
#undef clGetPlatformInfo |
||||
#define clGetPlatformInfo clGetPlatformInfo_fn |
||||
inline cl_int clGetPlatformInfo(cl_platform_id p0, cl_platform_info p1, size_t p2, void* p3, size_t* p4) { return clGetPlatformInfo_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clGetDeviceIDs |
||||
#define clGetDeviceIDs clGetDeviceIDs_fn |
||||
inline cl_int clGetDeviceIDs(cl_platform_id p0, cl_device_type p1, cl_uint p2, cl_device_id* p3, cl_uint* p4) { return clGetDeviceIDs_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clGetDeviceInfo |
||||
#define clGetDeviceInfo clGetDeviceInfo_fn |
||||
inline cl_int clGetDeviceInfo(cl_device_id p0, cl_device_info p1, size_t p2, void* p3, size_t* p4) { return clGetDeviceInfo_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clCreateSubDevices |
||||
#define clCreateSubDevices clCreateSubDevices_fn |
||||
inline cl_int clCreateSubDevices(cl_device_id p0, const cl_device_partition_property* p1, cl_uint p2, cl_device_id* p3, cl_uint* p4) { return clCreateSubDevices_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clRetainDevice |
||||
#define clRetainDevice clRetainDevice_fn |
||||
inline cl_int clRetainDevice(cl_device_id p0) { return clRetainDevice_pfn(p0); } |
||||
#undef clReleaseDevice |
||||
#define clReleaseDevice clReleaseDevice_fn |
||||
inline cl_int clReleaseDevice(cl_device_id p0) { return clReleaseDevice_pfn(p0); } |
||||
#undef clCreateContext |
||||
#define clCreateContext clCreateContext_fn |
||||
inline cl_context clCreateContext(const cl_context_properties* p0, cl_uint p1, const cl_device_id* p2, void (CL_CALLBACK*p3) (const char*, const void*, size_t, void*), void* p4, cl_int* p5) { return clCreateContext_pfn(p0, p1, p2, p3, p4, p5); } |
||||
#undef clCreateContextFromType |
||||
#define clCreateContextFromType clCreateContextFromType_fn |
||||
inline cl_context clCreateContextFromType(const cl_context_properties* p0, cl_device_type p1, void (CL_CALLBACK*p2) (const char*, const void*, size_t, void*), void* p3, cl_int* p4) { return clCreateContextFromType_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clRetainContext |
||||
#define clRetainContext clRetainContext_fn |
||||
inline cl_int clRetainContext(cl_context p0) { return clRetainContext_pfn(p0); } |
||||
#undef clReleaseContext |
||||
#define clReleaseContext clReleaseContext_fn |
||||
inline cl_int clReleaseContext(cl_context p0) { return clReleaseContext_pfn(p0); } |
||||
#undef clGetContextInfo |
||||
#define clGetContextInfo clGetContextInfo_fn |
||||
inline cl_int clGetContextInfo(cl_context p0, cl_context_info p1, size_t p2, void* p3, size_t* p4) { return clGetContextInfo_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clCreateCommandQueue |
||||
#define clCreateCommandQueue clCreateCommandQueue_fn |
||||
inline cl_command_queue clCreateCommandQueue(cl_context p0, cl_device_id p1, cl_command_queue_properties p2, cl_int* p3) { return clCreateCommandQueue_pfn(p0, p1, p2, p3); } |
||||
#undef clRetainCommandQueue |
||||
#define clRetainCommandQueue clRetainCommandQueue_fn |
||||
inline cl_int clRetainCommandQueue(cl_command_queue p0) { return clRetainCommandQueue_pfn(p0); } |
||||
#undef clReleaseCommandQueue |
||||
#define clReleaseCommandQueue clReleaseCommandQueue_fn |
||||
inline cl_int clReleaseCommandQueue(cl_command_queue p0) { return clReleaseCommandQueue_pfn(p0); } |
||||
#undef clGetCommandQueueInfo |
||||
#define clGetCommandQueueInfo clGetCommandQueueInfo_fn |
||||
inline cl_int clGetCommandQueueInfo(cl_command_queue p0, cl_command_queue_info p1, size_t p2, void* p3, size_t* p4) { return clGetCommandQueueInfo_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clCreateBuffer |
||||
#define clCreateBuffer clCreateBuffer_fn |
||||
inline cl_mem clCreateBuffer(cl_context p0, cl_mem_flags p1, size_t p2, void* p3, cl_int* p4) { return clCreateBuffer_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clCreateSubBuffer |
||||
#define clCreateSubBuffer clCreateSubBuffer_fn |
||||
inline cl_mem clCreateSubBuffer(cl_mem p0, cl_mem_flags p1, cl_buffer_create_type p2, const void* p3, cl_int* p4) { return clCreateSubBuffer_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clCreateImage |
||||
#define clCreateImage clCreateImage_fn |
||||
inline cl_mem clCreateImage(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, const cl_image_desc* p3, void* p4, cl_int* p5) { return clCreateImage_pfn(p0, p1, p2, p3, p4, p5); } |
||||
#undef clRetainMemObject |
||||
#define clRetainMemObject clRetainMemObject_fn |
||||
inline cl_int clRetainMemObject(cl_mem p0) { return clRetainMemObject_pfn(p0); } |
||||
#undef clReleaseMemObject |
||||
#define clReleaseMemObject clReleaseMemObject_fn |
||||
inline cl_int clReleaseMemObject(cl_mem p0) { return clReleaseMemObject_pfn(p0); } |
||||
#undef clGetSupportedImageFormats |
||||
#define clGetSupportedImageFormats clGetSupportedImageFormats_fn |
||||
inline cl_int clGetSupportedImageFormats(cl_context p0, cl_mem_flags p1, cl_mem_object_type p2, cl_uint p3, cl_image_format* p4, cl_uint* p5) { return clGetSupportedImageFormats_pfn(p0, p1, p2, p3, p4, p5); } |
||||
#undef clGetMemObjectInfo |
||||
#define clGetMemObjectInfo clGetMemObjectInfo_fn |
||||
inline cl_int clGetMemObjectInfo(cl_mem p0, cl_mem_info p1, size_t p2, void* p3, size_t* p4) { return clGetMemObjectInfo_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clGetImageInfo |
||||
#define clGetImageInfo clGetImageInfo_fn |
||||
inline cl_int clGetImageInfo(cl_mem p0, cl_image_info p1, size_t p2, void* p3, size_t* p4) { return clGetImageInfo_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clSetMemObjectDestructorCallback |
||||
#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_fn |
||||
inline cl_int clSetMemObjectDestructorCallback(cl_mem p0, void (CL_CALLBACK*p1) (cl_mem, void*), void* p2) { return clSetMemObjectDestructorCallback_pfn(p0, p1, p2); } |
||||
#undef clCreateSampler |
||||
#define clCreateSampler clCreateSampler_fn |
||||
inline cl_sampler clCreateSampler(cl_context p0, cl_bool p1, cl_addressing_mode p2, cl_filter_mode p3, cl_int* p4) { return clCreateSampler_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clRetainSampler |
||||
#define clRetainSampler clRetainSampler_fn |
||||
inline cl_int clRetainSampler(cl_sampler p0) { return clRetainSampler_pfn(p0); } |
||||
#undef clReleaseSampler |
||||
#define clReleaseSampler clReleaseSampler_fn |
||||
inline cl_int clReleaseSampler(cl_sampler p0) { return clReleaseSampler_pfn(p0); } |
||||
#undef clGetSamplerInfo |
||||
#define clGetSamplerInfo clGetSamplerInfo_fn |
||||
inline cl_int clGetSamplerInfo(cl_sampler p0, cl_sampler_info p1, size_t p2, void* p3, size_t* p4) { return clGetSamplerInfo_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clCreateProgramWithSource |
||||
#define clCreateProgramWithSource clCreateProgramWithSource_fn |
||||
inline cl_program clCreateProgramWithSource(cl_context p0, cl_uint p1, const char** p2, const size_t* p3, cl_int* p4) { return clCreateProgramWithSource_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clCreateProgramWithBinary |
||||
#define clCreateProgramWithBinary clCreateProgramWithBinary_fn |
||||
inline cl_program clCreateProgramWithBinary(cl_context p0, cl_uint p1, const cl_device_id* p2, const size_t* p3, const unsigned char** p4, cl_int* p5, cl_int* p6) { return clCreateProgramWithBinary_pfn(p0, p1, p2, p3, p4, p5, p6); } |
||||
#undef clCreateProgramWithBuiltInKernels |
||||
#define clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels_fn |
||||
inline cl_program clCreateProgramWithBuiltInKernels(cl_context p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_int* p4) { return clCreateProgramWithBuiltInKernels_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clRetainProgram |
||||
#define clRetainProgram clRetainProgram_fn |
||||
inline cl_int clRetainProgram(cl_program p0) { return clRetainProgram_pfn(p0); } |
||||
#undef clReleaseProgram |
||||
#define clReleaseProgram clReleaseProgram_fn |
||||
inline cl_int clReleaseProgram(cl_program p0) { return clReleaseProgram_pfn(p0); } |
||||
#undef clBuildProgram |
||||
#define clBuildProgram clBuildProgram_fn |
||||
inline cl_int clBuildProgram(cl_program p0, cl_uint p1, const cl_device_id* p2, const char* p3, void (CL_CALLBACK*p4) (cl_program, void*), void* p5) { return clBuildProgram_pfn(p0, p1, p2, p3, p4, p5); } |
||||
#undef clCompileProgram |
||||
#define clCompileProgram clCompileProgram_fn |
||||
inline cl_int clCompileProgram(cl_program p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_uint p4, const cl_program* p5, const char** p6, void (CL_CALLBACK*p7) (cl_program, void*), void* p8) { return clCompileProgram_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } |
||||
#undef clLinkProgram |
||||
#define clLinkProgram clLinkProgram_fn |
||||
inline cl_program clLinkProgram(cl_context p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_uint p4, const cl_program* p5, void (CL_CALLBACK*p6) (cl_program, void*), void* p7, cl_int* p8) { return clLinkProgram_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } |
||||
#undef clUnloadPlatformCompiler |
||||
#define clUnloadPlatformCompiler clUnloadPlatformCompiler_fn |
||||
inline cl_int clUnloadPlatformCompiler(cl_platform_id p0) { return clUnloadPlatformCompiler_pfn(p0); } |
||||
#undef clGetProgramInfo |
||||
#define clGetProgramInfo clGetProgramInfo_fn |
||||
inline cl_int clGetProgramInfo(cl_program p0, cl_program_info p1, size_t p2, void* p3, size_t* p4) { return clGetProgramInfo_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clGetProgramBuildInfo |
||||
#define clGetProgramBuildInfo clGetProgramBuildInfo_fn |
||||
inline cl_int clGetProgramBuildInfo(cl_program p0, cl_device_id p1, cl_program_build_info p2, size_t p3, void* p4, size_t* p5) { return clGetProgramBuildInfo_pfn(p0, p1, p2, p3, p4, p5); } |
||||
#undef clCreateKernel |
||||
#define clCreateKernel clCreateKernel_fn |
||||
inline cl_kernel clCreateKernel(cl_program p0, const char* p1, cl_int* p2) { return clCreateKernel_pfn(p0, p1, p2); } |
||||
#undef clCreateKernelsInProgram |
||||
#define clCreateKernelsInProgram clCreateKernelsInProgram_fn |
||||
inline cl_int clCreateKernelsInProgram(cl_program p0, cl_uint p1, cl_kernel* p2, cl_uint* p3) { return clCreateKernelsInProgram_pfn(p0, p1, p2, p3); } |
||||
#undef clRetainKernel |
||||
#define clRetainKernel clRetainKernel_fn |
||||
inline cl_int clRetainKernel(cl_kernel p0) { return clRetainKernel_pfn(p0); } |
||||
#undef clReleaseKernel |
||||
#define clReleaseKernel clReleaseKernel_fn |
||||
inline cl_int clReleaseKernel(cl_kernel p0) { return clReleaseKernel_pfn(p0); } |
||||
#undef clSetKernelArg |
||||
#define clSetKernelArg clSetKernelArg_fn |
||||
inline cl_int clSetKernelArg(cl_kernel p0, cl_uint p1, size_t p2, const void* p3) { return clSetKernelArg_pfn(p0, p1, p2, p3); } |
||||
#undef clGetKernelInfo |
||||
#define clGetKernelInfo clGetKernelInfo_fn |
||||
inline cl_int clGetKernelInfo(cl_kernel p0, cl_kernel_info p1, size_t p2, void* p3, size_t* p4) { return clGetKernelInfo_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clGetKernelArgInfo |
||||
#define clGetKernelArgInfo clGetKernelArgInfo_fn |
||||
inline cl_int clGetKernelArgInfo(cl_kernel p0, cl_uint p1, cl_kernel_arg_info p2, size_t p3, void* p4, size_t* p5) { return clGetKernelArgInfo_pfn(p0, p1, p2, p3, p4, p5); } |
||||
#undef clGetKernelWorkGroupInfo |
||||
#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_fn |
||||
inline cl_int clGetKernelWorkGroupInfo(cl_kernel p0, cl_device_id p1, cl_kernel_work_group_info p2, size_t p3, void* p4, size_t* p5) { return clGetKernelWorkGroupInfo_pfn(p0, p1, p2, p3, p4, p5); } |
||||
#undef clWaitForEvents |
||||
#define clWaitForEvents clWaitForEvents_fn |
||||
inline cl_int clWaitForEvents(cl_uint p0, const cl_event* p1) { return clWaitForEvents_pfn(p0, p1); } |
||||
#undef clGetEventInfo |
||||
#define clGetEventInfo clGetEventInfo_fn |
||||
inline cl_int clGetEventInfo(cl_event p0, cl_event_info p1, size_t p2, void* p3, size_t* p4) { return clGetEventInfo_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clCreateUserEvent |
||||
#define clCreateUserEvent clCreateUserEvent_fn |
||||
inline cl_event clCreateUserEvent(cl_context p0, cl_int* p1) { return clCreateUserEvent_pfn(p0, p1); } |
||||
#undef clRetainEvent |
||||
#define clRetainEvent clRetainEvent_fn |
||||
inline cl_int clRetainEvent(cl_event p0) { return clRetainEvent_pfn(p0); } |
||||
#undef clReleaseEvent |
||||
#define clReleaseEvent clReleaseEvent_fn |
||||
inline cl_int clReleaseEvent(cl_event p0) { return clReleaseEvent_pfn(p0); } |
||||
#undef clSetUserEventStatus |
||||
#define clSetUserEventStatus clSetUserEventStatus_fn |
||||
inline cl_int clSetUserEventStatus(cl_event p0, cl_int p1) { return clSetUserEventStatus_pfn(p0, p1); } |
||||
#undef clSetEventCallback |
||||
#define clSetEventCallback clSetEventCallback_fn |
||||
inline cl_int clSetEventCallback(cl_event p0, cl_int p1, void (CL_CALLBACK*p2) (cl_event, cl_int, void*), void* p3) { return clSetEventCallback_pfn(p0, p1, p2, p3); } |
||||
#undef clGetEventProfilingInfo |
||||
#define clGetEventProfilingInfo clGetEventProfilingInfo_fn |
||||
inline cl_int clGetEventProfilingInfo(cl_event p0, cl_profiling_info p1, size_t p2, void* p3, size_t* p4) { return clGetEventProfilingInfo_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clFlush |
||||
#define clFlush clFlush_fn |
||||
inline cl_int clFlush(cl_command_queue p0) { return clFlush_pfn(p0); } |
||||
#undef clFinish |
||||
#define clFinish clFinish_fn |
||||
inline cl_int clFinish(cl_command_queue p0) { return clFinish_pfn(p0); } |
||||
#undef clEnqueueReadBuffer |
||||
#define clEnqueueReadBuffer clEnqueueReadBuffer_fn |
||||
inline cl_int clEnqueueReadBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, size_t p3, size_t p4, void* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueReadBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } |
||||
#undef clEnqueueReadBufferRect |
||||
#define clEnqueueReadBufferRect clEnqueueReadBufferRect_fn |
||||
inline cl_int clEnqueueReadBufferRect(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, void* p10, cl_uint p11, const cl_event* p12, cl_event* p13) { return clEnqueueReadBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); } |
||||
#undef clEnqueueWriteBuffer |
||||
#define clEnqueueWriteBuffer clEnqueueWriteBuffer_fn |
||||
inline cl_int clEnqueueWriteBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, size_t p3, size_t p4, const void* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueWriteBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } |
||||
#undef clEnqueueWriteBufferRect |
||||
#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_fn |
||||
inline cl_int clEnqueueWriteBufferRect(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, const void* p10, cl_uint p11, const cl_event* p12, cl_event* p13) { return clEnqueueWriteBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); } |
||||
#undef clEnqueueFillBuffer |
||||
#define clEnqueueFillBuffer clEnqueueFillBuffer_fn |
||||
inline cl_int clEnqueueFillBuffer(cl_command_queue p0, cl_mem p1, const void* p2, size_t p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueFillBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } |
||||
#undef clEnqueueCopyBuffer |
||||
#define clEnqueueCopyBuffer clEnqueueCopyBuffer_fn |
||||
inline cl_int clEnqueueCopyBuffer(cl_command_queue p0, cl_mem p1, cl_mem p2, size_t p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } |
||||
#undef clEnqueueCopyBufferRect |
||||
#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_fn |
||||
inline cl_int clEnqueueCopyBufferRect(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, cl_uint p10, const cl_event* p11, cl_event* p12) { return clEnqueueCopyBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12); } |
||||
#undef clEnqueueReadImage |
||||
#define clEnqueueReadImage clEnqueueReadImage_fn |
||||
inline cl_int clEnqueueReadImage(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, size_t p5, size_t p6, void* p7, cl_uint p8, const cl_event* p9, cl_event* p10) { return clEnqueueReadImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); } |
||||
#undef clEnqueueWriteImage |
||||
#define clEnqueueWriteImage clEnqueueWriteImage_fn |
||||
inline cl_int clEnqueueWriteImage(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, size_t p5, size_t p6, const void* p7, cl_uint p8, const cl_event* p9, cl_event* p10) { return clEnqueueWriteImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); } |
||||
#undef clEnqueueFillImage |
||||
#define clEnqueueFillImage clEnqueueFillImage_fn |
||||
inline cl_int clEnqueueFillImage(cl_command_queue p0, cl_mem p1, const void* p2, const size_t* p3, const size_t* p4, cl_uint p5, const cl_event* p6, cl_event* p7) { return clEnqueueFillImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7); } |
||||
#undef clEnqueueCopyImage |
||||
#define clEnqueueCopyImage clEnqueueCopyImage_fn |
||||
inline cl_int clEnqueueCopyImage(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } |
||||
#undef clEnqueueCopyImageToBuffer |
||||
#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_fn |
||||
inline cl_int clEnqueueCopyImageToBuffer(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyImageToBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } |
||||
#undef clEnqueueCopyBufferToImage |
||||
#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_fn |
||||
inline cl_int clEnqueueCopyBufferToImage(cl_command_queue p0, cl_mem p1, cl_mem p2, size_t p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyBufferToImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } |
||||
#undef clEnqueueMapBuffer |
||||
#define clEnqueueMapBuffer clEnqueueMapBuffer_fn |
||||
inline void* clEnqueueMapBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, cl_map_flags p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8, cl_int* p9) { return clEnqueueMapBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } |
||||
#undef clEnqueueMapImage |
||||
#define clEnqueueMapImage clEnqueueMapImage_fn |
||||
inline void* clEnqueueMapImage(cl_command_queue p0, cl_mem p1, cl_bool p2, cl_map_flags p3, const size_t* p4, const size_t* p5, size_t* p6, size_t* p7, cl_uint p8, const cl_event* p9, cl_event* p10, cl_int* p11) { return clEnqueueMapImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11); } |
||||
#undef clEnqueueUnmapMemObject |
||||
#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_fn |
||||
inline cl_int clEnqueueUnmapMemObject(cl_command_queue p0, cl_mem p1, void* p2, cl_uint p3, const cl_event* p4, cl_event* p5) { return clEnqueueUnmapMemObject_pfn(p0, p1, p2, p3, p4, p5); } |
||||
#undef clEnqueueMigrateMemObjects |
||||
#define clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects_fn |
||||
inline cl_int clEnqueueMigrateMemObjects(cl_command_queue p0, cl_uint p1, const cl_mem* p2, cl_mem_migration_flags p3, cl_uint p4, const cl_event* p5, cl_event* p6) { return clEnqueueMigrateMemObjects_pfn(p0, p1, p2, p3, p4, p5, p6); } |
||||
#undef clEnqueueNDRangeKernel |
||||
#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_fn |
||||
inline cl_int clEnqueueNDRangeKernel(cl_command_queue p0, cl_kernel p1, cl_uint p2, const size_t* p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueNDRangeKernel_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } |
||||
#undef clEnqueueTask |
||||
#define clEnqueueTask clEnqueueTask_fn |
||||
inline cl_int clEnqueueTask(cl_command_queue p0, cl_kernel p1, cl_uint p2, const cl_event* p3, cl_event* p4) { return clEnqueueTask_pfn(p0, p1, p2, p3, p4); } |
||||
#undef clEnqueueNativeKernel |
||||
#define clEnqueueNativeKernel clEnqueueNativeKernel_fn |
||||
inline cl_int clEnqueueNativeKernel(cl_command_queue p0, void (CL_CALLBACK*p1) (void*), void* p2, size_t p3, cl_uint p4, const cl_mem* p5, const void** p6, cl_uint p7, const cl_event* p8, cl_event* p9) { return clEnqueueNativeKernel_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } |
||||
#undef clEnqueueMarkerWithWaitList |
||||
#define clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList_fn |
||||
inline cl_int clEnqueueMarkerWithWaitList(cl_command_queue p0, cl_uint p1, const cl_event* p2, cl_event* p3) { return clEnqueueMarkerWithWaitList_pfn(p0, p1, p2, p3); } |
||||
#undef clEnqueueBarrierWithWaitList |
||||
#define clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList_fn |
||||
inline cl_int clEnqueueBarrierWithWaitList(cl_command_queue p0, cl_uint p1, const cl_event* p2, cl_event* p3) { return clEnqueueBarrierWithWaitList_pfn(p0, p1, p2, p3); } |
||||
#undef clGetExtensionFunctionAddressForPlatform |
||||
#define clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform_fn |
||||
inline void* clGetExtensionFunctionAddressForPlatform(cl_platform_id p0, const char* p1) { return clGetExtensionFunctionAddressForPlatform_pfn(p0, p1); } |
||||
#undef clCreateImage2D |
||||
#define clCreateImage2D clCreateImage2D_fn |
||||
inline cl_mem clCreateImage2D(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, size_t p3, size_t p4, size_t p5, void* p6, cl_int* p7) { return clCreateImage2D_pfn(p0, p1, p2, p3, p4, p5, p6, p7); } |
||||
#undef clCreateImage3D |
||||
#define clCreateImage3D clCreateImage3D_fn |
||||
inline cl_mem clCreateImage3D(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, size_t p3, size_t p4, size_t p5, size_t p6, size_t p7, void* p8, cl_int* p9) { return clCreateImage3D_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } |
||||
#undef clEnqueueMarker |
||||
#define clEnqueueMarker clEnqueueMarker_fn |
||||
inline cl_int clEnqueueMarker(cl_command_queue p0, cl_event* p1) { return clEnqueueMarker_pfn(p0, p1); } |
||||
#undef clEnqueueWaitForEvents |
||||
#define clEnqueueWaitForEvents clEnqueueWaitForEvents_fn |
||||
inline cl_int clEnqueueWaitForEvents(cl_command_queue p0, cl_uint p1, const cl_event* p2) { return clEnqueueWaitForEvents_pfn(p0, p1, p2); } |
||||
#undef clEnqueueBarrier |
||||
#define clEnqueueBarrier clEnqueueBarrier_fn |
||||
inline cl_int clEnqueueBarrier(cl_command_queue p0) { return clEnqueueBarrier_pfn(p0); } |
||||
#undef clUnloadCompiler |
||||
#define clUnloadCompiler clUnloadCompiler_fn |
||||
inline cl_int clUnloadCompiler() { return clUnloadCompiler_pfn(); } |
||||
#undef clGetExtensionFunctionAddress |
||||
#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_fn |
||||
inline void* clGetExtensionFunctionAddress(const char* p0) { return clGetExtensionFunctionAddress_pfn(p0); } |
||||
|
||||
#endif // __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__
|
@ -0,0 +1,756 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Guoping Long, longguoping@gmail.com
|
||||
// Niko Li, newlife20080214@gmail.com
|
||||
// Yao Wang, bitwangyaoyao@gmail.com
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp" |
||||
#include <iomanip> |
||||
#include <fstream> |
||||
#include "cl_programcache.hpp" |
||||
|
||||
// workaround for OpenCL C++ bindings
|
||||
#if defined(HAVE_OPENCL12) |
||||
#include "opencv2/ocl/cl_runtime/cl_runtime_opencl12_wrappers.hpp" |
||||
#elif defined(HAVE_OPENCL11) |
||||
#include "opencv2/ocl/cl_runtime/cl_runtime_opencl11_wrappers.hpp" |
||||
#else |
||||
#error Invalid OpenCL configuration |
||||
#endif |
||||
|
||||
#if defined _MSC_VER && _MSC_VER >= 1200 |
||||
#pragma warning( disable: 4100 4101 4127 4244 4267 4510 4512 4610) |
||||
#endif |
||||
#undef __CL_ENABLE_EXCEPTIONS |
||||
#include <CL/cl.hpp> |
||||
|
||||
namespace cv { |
||||
namespace ocl { |
||||
|
||||
struct PlatformInfoImpl |
||||
{ |
||||
cl_platform_id platform_id; |
||||
|
||||
std::vector<int> deviceIDs; |
||||
|
||||
PlatformInfo info; |
||||
|
||||
PlatformInfoImpl() |
||||
: platform_id(NULL) |
||||
{ |
||||
} |
||||
}; |
||||
|
||||
struct DeviceInfoImpl |
||||
{ |
||||
cl_platform_id platform_id; |
||||
cl_device_id device_id; |
||||
|
||||
DeviceInfo info; |
||||
|
||||
DeviceInfoImpl() |
||||
: platform_id(NULL), device_id(NULL) |
||||
{ |
||||
} |
||||
}; |
||||
|
||||
static std::vector<PlatformInfoImpl> global_platforms; |
||||
static std::vector<DeviceInfoImpl> global_devices; |
||||
|
||||
static bool parseOpenCLVersion(const std::string& versionStr, int& major, int& minor) |
||||
{ |
||||
size_t p0 = versionStr.find(' '); |
||||
while (true) |
||||
{ |
||||
if (p0 == std::string::npos) |
||||
break; |
||||
if (p0 + 1 >= versionStr.length()) |
||||
break; |
||||
char c = versionStr[p0 + 1]; |
||||
if (isdigit(c)) |
||||
break; |
||||
p0 = versionStr.find(' ', p0 + 1); |
||||
} |
||||
size_t p1 = versionStr.find('.', p0); |
||||
size_t p2 = versionStr.find(' ', p1); |
||||
if (p0 == std::string::npos || p1 == std::string::npos || p2 == std::string::npos) |
||||
{ |
||||
major = 0; |
||||
minor = 0; |
||||
return false; |
||||
} |
||||
std::string majorStr = versionStr.substr(p0 + 1, p1 - p0 - 1); |
||||
std::string minorStr = versionStr.substr(p1 + 1, p2 - p1 - 1); |
||||
major = atoi(majorStr.c_str()); |
||||
minor = atoi(minorStr.c_str()); |
||||
return true; |
||||
} |
||||
|
||||
static void split(const std::string &s, char delim, std::vector<std::string> &elems) { |
||||
std::stringstream ss(s); |
||||
std::string item; |
||||
while (std::getline(ss, item, delim)) { |
||||
elems.push_back(item); |
||||
} |
||||
} |
||||
|
||||
static std::vector<std::string> split(const std::string &s, char delim) { |
||||
std::vector<std::string> elems; |
||||
split(s, delim, elems); |
||||
return elems; |
||||
} |
||||
|
||||
// Layout: <Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<deviceName>
|
||||
// Sample: AMD:GPU:
|
||||
// Sample: AMD:GPU:Tahiti
|
||||
// Sample: :GPU|CPU: = '' = ':' = '::'
|
||||
static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr, |
||||
std::string& platform, std::vector<std::string>& deviceTypes, std::string& deviceNameOrID) |
||||
{ |
||||
std::string deviceTypesStr; |
||||
size_t p0 = configurationStr.find(':'); |
||||
if (p0 != std::string::npos) |
||||
{ |
||||
size_t p1 = configurationStr.find(':', p0 + 1); |
||||
if (p1 != std::string::npos) |
||||
{ |
||||
size_t p2 = configurationStr.find(':', p1 + 1); |
||||
if (p2 != std::string::npos) |
||||
{ |
||||
std::cerr << "ERROR: Invalid configuration string for OpenCL device" << std::endl; |
||||
return false; |
||||
} |
||||
else |
||||
{ |
||||
// assume platform + device types + device name/id
|
||||
platform = configurationStr.substr(0, p0); |
||||
deviceTypesStr = configurationStr.substr(p0 + 1, p1 - (p0 + 1)); |
||||
deviceNameOrID = configurationStr.substr(p1 + 1, configurationStr.length() - (p1 + 1)); |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
// assume platform + device types
|
||||
platform = configurationStr.substr(0, p0); |
||||
deviceTypesStr = configurationStr.substr(p0 + 1, configurationStr.length() - (p0 + 1)); |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
// assume only platform
|
||||
platform = configurationStr; |
||||
} |
||||
deviceTypes = split(deviceTypesStr, '|'); |
||||
return true; |
||||
} |
||||
|
||||
static bool __deviceSelected = false; |
||||
static bool selectOpenCLDevice() |
||||
{ |
||||
__deviceSelected = true; |
||||
|
||||
std::string platform; |
||||
std::vector<std::string> deviceTypes; |
||||
std::string deviceName; |
||||
const char* configuration = getenv("OPENCV_OPENCL_DEVICE"); |
||||
if (configuration) |
||||
{ |
||||
if (!parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName)) |
||||
return false; |
||||
} |
||||
|
||||
bool isID = false; |
||||
int deviceID = -1; |
||||
if (deviceName.length() == 1) |
||||
// We limit ID range to 0..9, because we want to write:
|
||||
// - '2500' to mean i5-2500
|
||||
// - '8350' to mean AMD FX-8350
|
||||
// - '650' to mean GeForce 650
|
||||
// To extend ID range change condition to '> 0'
|
||||
{ |
||||
isID = true; |
||||
for (size_t i = 0; i < deviceName.length(); i++) |
||||
{ |
||||
if (!isdigit(deviceName[i])) |
||||
{ |
||||
isID = false; |
||||
break; |
||||
} |
||||
} |
||||
if (isID) |
||||
{ |
||||
deviceID = atoi(deviceName.c_str()); |
||||
CV_Assert(deviceID >= 0); |
||||
} |
||||
} |
||||
|
||||
const PlatformInfo* platformInfo = NULL; |
||||
if (platform.length() > 0) |
||||
{ |
||||
PlatformsInfo platforms; |
||||
getOpenCLPlatforms(platforms); |
||||
for (size_t i = 0; i < platforms.size(); i++) |
||||
{ |
||||
if (platforms[i]->platformName.find(platform) != std::string::npos) |
||||
{ |
||||
platformInfo = platforms[i]; |
||||
break; |
||||
} |
||||
} |
||||
if (platformInfo == NULL) |
||||
{ |
||||
std::cerr << "ERROR: Can't find OpenCL platform by name: " << platform << std::endl; |
||||
goto not_found; |
||||
} |
||||
} |
||||
|
||||
if (deviceTypes.size() == 0) |
||||
{ |
||||
if (!isID) |
||||
{ |
||||
deviceTypes.push_back("GPU"); |
||||
deviceTypes.push_back("CPU"); |
||||
} |
||||
else |
||||
{ |
||||
deviceTypes.push_back("ALL"); |
||||
} |
||||
} |
||||
for (size_t t = 0; t < deviceTypes.size(); t++) |
||||
{ |
||||
int deviceType = 0; |
||||
if (deviceTypes[t] == "GPU") |
||||
{ |
||||
deviceType = CVCL_DEVICE_TYPE_GPU; |
||||
} |
||||
else if (deviceTypes[t] == "CPU") |
||||
{ |
||||
deviceType = CVCL_DEVICE_TYPE_CPU; |
||||
} |
||||
else if (deviceTypes[t] == "ACCELERATOR") |
||||
{ |
||||
deviceType = CVCL_DEVICE_TYPE_ACCELERATOR; |
||||
} |
||||
else if (deviceTypes[t] == "ALL") |
||||
{ |
||||
deviceType = CVCL_DEVICE_TYPE_ALL; |
||||
} |
||||
else |
||||
{ |
||||
std::cerr << "ERROR: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t] << std::endl; |
||||
goto not_found; |
||||
} |
||||
|
||||
DevicesInfo devices; |
||||
getOpenCLDevices(devices, deviceType, platformInfo); |
||||
|
||||
for (size_t i = (isID ? deviceID : 0); |
||||
(isID ? (i == (size_t)deviceID) : true) && (i < devices.size()); |
||||
i++) |
||||
{ |
||||
if (isID || devices[i]->deviceName.find(deviceName) != std::string::npos) |
||||
{ |
||||
// check for OpenCL 1.1
|
||||
if (devices[i]->deviceVersionMajor < 1 || |
||||
(devices[i]->deviceVersionMajor == 1 && devices[i]->deviceVersionMinor < 1)) |
||||
{ |
||||
std::cerr << "Skip unsupported version of OpenCL device: " << devices[i]->deviceName |
||||
<< "(" << devices[i]->platform->platformName << ")" << std::endl; |
||||
continue; // unsupported version of device, skip it
|
||||
} |
||||
try |
||||
{ |
||||
setDevice(devices[i]); |
||||
} |
||||
catch (...) |
||||
{ |
||||
std::cerr << "ERROR: Can't select OpenCL device: " << devices[i]->deviceName |
||||
<< "(" << devices[i]->platform->platformName << ")" << std::endl; |
||||
goto not_found; |
||||
} |
||||
return true; |
||||
} |
||||
} |
||||
} |
||||
not_found: |
||||
std::cerr << "ERROR: Required OpenCL device not found, check configuration: " << (configuration == NULL ? "" : configuration) << std::endl |
||||
<< " Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl |
||||
<< " Device types: "; |
||||
for (size_t t = 0; t < deviceTypes.size(); t++) |
||||
{ |
||||
std::cerr << deviceTypes[t] << " "; |
||||
} |
||||
std::cerr << std::endl << " Device name: " << (deviceName.length() == 0 ? "any" : deviceName) << std::endl; |
||||
return false; |
||||
} |
||||
|
||||
static cv::Mutex __initializedMutex; |
||||
static bool __initialized = false; |
||||
static int initializeOpenCLDevices() |
||||
{ |
||||
assert(!__initialized); |
||||
__initialized = true; |
||||
|
||||
assert(global_devices.size() == 0); |
||||
|
||||
std::vector<cl::Platform> platforms; |
||||
try |
||||
{ |
||||
openCLSafeCall(cl::Platform::get(&platforms)); |
||||
} |
||||
catch (cv::Exception& e) |
||||
{ |
||||
return 0; // OpenCL not found
|
||||
} |
||||
|
||||
global_platforms.resize(platforms.size()); |
||||
|
||||
for (size_t i = 0; i < platforms.size(); ++i) |
||||
{ |
||||
PlatformInfoImpl& platformInfo = global_platforms[i]; |
||||
platformInfo.info._id = i; |
||||
|
||||
cl::Platform& platform = platforms[i]; |
||||
|
||||
platformInfo.platform_id = platform(); |
||||
openCLSafeCall(platform.getInfo(CL_PLATFORM_PROFILE, &platformInfo.info.platformProfile)); |
||||
openCLSafeCall(platform.getInfo(CL_PLATFORM_VERSION, &platformInfo.info.platformVersion)); |
||||
openCLSafeCall(platform.getInfo(CL_PLATFORM_NAME, &platformInfo.info.platformName)); |
||||
openCLSafeCall(platform.getInfo(CL_PLATFORM_VENDOR, &platformInfo.info.platformVendor)); |
||||
openCLSafeCall(platform.getInfo(CL_PLATFORM_EXTENSIONS, &platformInfo.info.platformExtensons)); |
||||
|
||||
parseOpenCLVersion(platformInfo.info.platformVersion, |
||||
platformInfo.info.platformVersionMajor, platformInfo.info.platformVersionMinor); |
||||
|
||||
std::vector<cl::Device> devices; |
||||
cl_int status = platform.getDevices(CL_DEVICE_TYPE_ALL, &devices); |
||||
if(status != CL_DEVICE_NOT_FOUND) |
||||
openCLVerifyCall(status); |
||||
|
||||
if(devices.size() > 0) |
||||
{ |
||||
int baseIndx = global_devices.size(); |
||||
global_devices.resize(baseIndx + devices.size()); |
||||
platformInfo.deviceIDs.resize(devices.size()); |
||||
platformInfo.info.devices.resize(devices.size()); |
||||
|
||||
for(size_t j = 0; j < devices.size(); ++j) |
||||
{ |
||||
cl::Device& device = devices[j]; |
||||
|
||||
DeviceInfoImpl& deviceInfo = global_devices[baseIndx + j]; |
||||
deviceInfo.info._id = baseIndx + j; |
||||
deviceInfo.platform_id = platform(); |
||||
deviceInfo.device_id = device(); |
||||
|
||||
deviceInfo.info.platform = &platformInfo.info; |
||||
platformInfo.deviceIDs[j] = deviceInfo.info._id; |
||||
|
||||
cl_device_type type = cl_device_type(-1); |
||||
openCLSafeCall(device.getInfo(CL_DEVICE_TYPE, &type)); |
||||
deviceInfo.info.deviceType = DeviceType(type); |
||||
|
||||
openCLSafeCall(device.getInfo(CL_DEVICE_PROFILE, &deviceInfo.info.deviceProfile)); |
||||
openCLSafeCall(device.getInfo(CL_DEVICE_VERSION, &deviceInfo.info.deviceVersion)); |
||||
openCLSafeCall(device.getInfo(CL_DEVICE_NAME, &deviceInfo.info.deviceName)); |
||||
openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR, &deviceInfo.info.deviceVendor)); |
||||
cl_uint vendorID = 0; |
||||
openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR_ID, &vendorID)); |
||||
deviceInfo.info.deviceVendorId = vendorID; |
||||
openCLSafeCall(device.getInfo(CL_DRIVER_VERSION, &deviceInfo.info.deviceDriverVersion)); |
||||
openCLSafeCall(device.getInfo(CL_DEVICE_EXTENSIONS, &deviceInfo.info.deviceExtensions)); |
||||
|
||||
parseOpenCLVersion(deviceInfo.info.deviceVersion, |
||||
deviceInfo.info.deviceVersionMajor, deviceInfo.info.deviceVersionMinor); |
||||
|
||||
size_t maxWorkGroupSize = 0; |
||||
openCLSafeCall(device.getInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE, &maxWorkGroupSize)); |
||||
deviceInfo.info.maxWorkGroupSize = maxWorkGroupSize; |
||||
|
||||
cl_uint maxDimensions = 0; |
||||
openCLSafeCall(device.getInfo(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, &maxDimensions)); |
||||
std::vector<size_t> maxWorkItemSizes(maxDimensions); |
||||
openCLSafeCall(clGetDeviceInfo(device(), CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * maxDimensions, |
||||
(void *)&maxWorkItemSizes[0], 0)); |
||||
deviceInfo.info.maxWorkItemSizes = maxWorkItemSizes; |
||||
|
||||
cl_uint maxComputeUnits = 0; |
||||
openCLSafeCall(device.getInfo(CL_DEVICE_MAX_COMPUTE_UNITS, &maxComputeUnits)); |
||||
deviceInfo.info.maxComputeUnits = maxComputeUnits; |
||||
|
||||
cl_ulong localMemorySize = 0; |
||||
openCLSafeCall(device.getInfo(CL_DEVICE_LOCAL_MEM_SIZE, &localMemorySize)); |
||||
deviceInfo.info.localMemorySize = (size_t)localMemorySize; |
||||
|
||||
|
||||
cl_bool unifiedMemory = false; |
||||
openCLSafeCall(device.getInfo(CL_DEVICE_HOST_UNIFIED_MEMORY, &unifiedMemory)); |
||||
deviceInfo.info.isUnifiedMemory = unifiedMemory != 0; |
||||
|
||||
//initialize extra options for compilation. Currently only fp64 is included.
|
||||
//Assume 4KB is enough to store all possible extensions.
|
||||
openCLSafeCall(device.getInfo(CL_DEVICE_EXTENSIONS, &deviceInfo.info.deviceExtensions)); |
||||
|
||||
size_t fp64_khr = deviceInfo.info.deviceExtensions.find("cl_khr_fp64"); |
||||
if(fp64_khr != std::string::npos) |
||||
{ |
||||
deviceInfo.info.compilationExtraOptions += "-D DOUBLE_SUPPORT"; |
||||
deviceInfo.info.haveDoubleSupport = true; |
||||
} |
||||
else |
||||
{ |
||||
deviceInfo.info.haveDoubleSupport = false; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
for (size_t i = 0; i < platforms.size(); ++i) |
||||
{ |
||||
PlatformInfoImpl& platformInfo = global_platforms[i]; |
||||
for(size_t j = 0; j < platformInfo.deviceIDs.size(); ++j) |
||||
{ |
||||
DeviceInfoImpl& deviceInfo = global_devices[platformInfo.deviceIDs[j]]; |
||||
platformInfo.info.devices[j] = &deviceInfo.info; |
||||
} |
||||
} |
||||
|
||||
return global_devices.size(); |
||||
} |
||||
|
||||
|
||||
DeviceInfo::DeviceInfo() |
||||
: _id(-1), deviceType(DeviceType(0)), |
||||
deviceVendorId(-1), |
||||
maxWorkGroupSize(0), maxComputeUnits(0), localMemorySize(0), |
||||
deviceVersionMajor(0), deviceVersionMinor(0), |
||||
haveDoubleSupport(false), isUnifiedMemory(false), |
||||
platform(NULL) |
||||
{ |
||||
// nothing
|
||||
} |
||||
|
||||
PlatformInfo::PlatformInfo() |
||||
: _id(-1), |
||||
platformVersionMajor(0), platformVersionMinor(0) |
||||
{ |
||||
// nothing
|
||||
} |
||||
|
||||
//////////////////////////////// OpenCL context ////////////////////////
|
||||
//This is a global singleton class used to represent a OpenCL context.
|
||||
class ContextImpl : public Context |
||||
{ |
||||
public: |
||||
const cl_device_id clDeviceID; |
||||
cl_context clContext; |
||||
cl_command_queue clCmdQueue; |
||||
const DeviceInfo& deviceInfo; |
||||
|
||||
protected: |
||||
ContextImpl(const DeviceInfo& deviceInfo, cl_device_id clDeviceID) |
||||
: clDeviceID(clDeviceID), clContext(NULL), clCmdQueue(NULL), deviceInfo(deviceInfo) |
||||
{ |
||||
// nothing
|
||||
} |
||||
~ContextImpl(); |
||||
public: |
||||
static void setContext(const DeviceInfo* deviceInfo); |
||||
|
||||
bool supportsFeature(FEATURE_TYPE featureType) const; |
||||
|
||||
static void cleanupContext(void); |
||||
}; |
||||
|
||||
static cv::Mutex currentContextMutex; |
||||
static ContextImpl* currentContext = NULL; |
||||
|
||||
Context* Context::getContext() |
||||
{ |
||||
if (currentContext == NULL) |
||||
{ |
||||
if (!__initialized || !__deviceSelected) |
||||
{ |
||||
cv::AutoLock lock(__initializedMutex); |
||||
if (!__initialized) |
||||
{ |
||||
if (initializeOpenCLDevices() == 0) |
||||
{ |
||||
CV_Error(CV_GpuNotSupported, "OpenCL not available"); |
||||
} |
||||
} |
||||
if (!__deviceSelected) |
||||
{ |
||||
if (!selectOpenCLDevice()) |
||||
{ |
||||
CV_Error(CV_GpuNotSupported, "Can't select OpenCL device"); |
||||
} |
||||
} |
||||
} |
||||
CV_Assert(currentContext != NULL); |
||||
} |
||||
return currentContext; |
||||
} |
||||
|
||||
bool Context::supportsFeature(FEATURE_TYPE featureType) const |
||||
{ |
||||
return ((ContextImpl*)this)->supportsFeature(featureType); |
||||
} |
||||
|
||||
const DeviceInfo& Context::getDeviceInfo() const |
||||
{ |
||||
return ((ContextImpl*)this)->deviceInfo; |
||||
} |
||||
|
||||
const void* Context::getOpenCLContextPtr() const |
||||
{ |
||||
return &(((ContextImpl*)this)->clContext); |
||||
} |
||||
|
||||
const void* Context::getOpenCLCommandQueuePtr() const |
||||
{ |
||||
return &(((ContextImpl*)this)->clCmdQueue); |
||||
} |
||||
|
||||
const void* Context::getOpenCLDeviceIDPtr() const |
||||
{ |
||||
return &(((ContextImpl*)this)->clDeviceID); |
||||
} |
||||
|
||||
|
||||
bool ContextImpl::supportsFeature(FEATURE_TYPE featureType) const |
||||
{ |
||||
switch (featureType) |
||||
{ |
||||
case FEATURE_CL_DOUBLE: |
||||
return deviceInfo.haveDoubleSupport; |
||||
case FEATURE_CL_UNIFIED_MEM: |
||||
return deviceInfo.isUnifiedMemory; |
||||
case FEATURE_CL_VER_1_2: |
||||
return deviceInfo.deviceVersionMajor > 1 || (deviceInfo.deviceVersionMajor == 1 && deviceInfo.deviceVersionMinor >= 2); |
||||
} |
||||
CV_Error(CV_StsBadArg, "Invalid feature type"); |
||||
return false; |
||||
} |
||||
|
||||
#if defined(WIN32) |
||||
static bool __termination = false; |
||||
#endif |
||||
|
||||
ContextImpl::~ContextImpl() |
||||
{ |
||||
#ifdef WIN32 |
||||
// if process is on termination stage (ExitProcess was called and other threads were terminated)
|
||||
// then disable command queue release because it may cause program hang
|
||||
if (!__termination) |
||||
#endif |
||||
{ |
||||
if(clCmdQueue) |
||||
{ |
||||
openCLSafeCall(clReleaseCommandQueue(clCmdQueue)); // some cleanup problems are here
|
||||
} |
||||
|
||||
if(clContext) |
||||
{ |
||||
openCLSafeCall(clReleaseContext(clContext)); |
||||
} |
||||
} |
||||
clCmdQueue = NULL; |
||||
clContext = NULL; |
||||
} |
||||
|
||||
void fft_teardown(); |
||||
void clBlasTeardown(); |
||||
|
||||
void ContextImpl::cleanupContext(void) |
||||
{ |
||||
fft_teardown(); |
||||
clBlasTeardown(); |
||||
|
||||
cv::AutoLock lock(currentContextMutex); |
||||
if (currentContext) |
||||
delete currentContext; |
||||
currentContext = NULL; |
||||
} |
||||
|
||||
void ContextImpl::setContext(const DeviceInfo* deviceInfo) |
||||
{ |
||||
CV_Assert(deviceInfo->_id >= 0 && deviceInfo->_id < (int)global_devices.size()); |
||||
|
||||
{ |
||||
cv::AutoLock lock(currentContextMutex); |
||||
if (currentContext) |
||||
{ |
||||
if (currentContext->deviceInfo._id == deviceInfo->_id) |
||||
return; |
||||
} |
||||
} |
||||
|
||||
DeviceInfoImpl& infoImpl = global_devices[deviceInfo->_id]; |
||||
CV_Assert(deviceInfo == &infoImpl.info); |
||||
|
||||
cl_int status = 0; |
||||
cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(infoImpl.platform_id), 0 }; |
||||
cl_context clContext = clCreateContext(cps, 1, &infoImpl.device_id, NULL, NULL, &status); |
||||
openCLVerifyCall(status); |
||||
// TODO add CL_QUEUE_PROFILING_ENABLE
|
||||
cl_command_queue clCmdQueue = clCreateCommandQueue(clContext, infoImpl.device_id, 0, &status); |
||||
openCLVerifyCall(status); |
||||
|
||||
ContextImpl* ctx = new ContextImpl(infoImpl.info, infoImpl.device_id); |
||||
ctx->clCmdQueue = clCmdQueue; |
||||
ctx->clContext = clContext; |
||||
|
||||
ContextImpl* old = NULL; |
||||
{ |
||||
cv::AutoLock lock(currentContextMutex); |
||||
old = currentContext; |
||||
currentContext = ctx; |
||||
} |
||||
if (old != NULL) |
||||
{ |
||||
delete old; |
||||
} |
||||
} |
||||
|
||||
int getOpenCLPlatforms(PlatformsInfo& platforms) |
||||
{ |
||||
if (!__initialized) |
||||
initializeOpenCLDevices(); |
||||
|
||||
platforms.clear(); |
||||
|
||||
for (size_t id = 0; id < global_platforms.size(); ++id) |
||||
{ |
||||
PlatformInfoImpl& impl = global_platforms[id]; |
||||
platforms.push_back(&impl.info); |
||||
} |
||||
|
||||
return platforms.size(); |
||||
} |
||||
|
||||
int getOpenCLDevices(std::vector<const DeviceInfo*> &devices, int deviceType, const PlatformInfo* platform) |
||||
{ |
||||
if (!__initialized) |
||||
initializeOpenCLDevices(); |
||||
|
||||
devices.clear(); |
||||
|
||||
switch(deviceType) |
||||
{ |
||||
case CVCL_DEVICE_TYPE_DEFAULT: |
||||
case CVCL_DEVICE_TYPE_CPU: |
||||
case CVCL_DEVICE_TYPE_GPU: |
||||
case CVCL_DEVICE_TYPE_ACCELERATOR: |
||||
case CVCL_DEVICE_TYPE_ALL: |
||||
break; |
||||
default: |
||||
return 0; |
||||
} |
||||
|
||||
if (platform == NULL) |
||||
{ |
||||
for (size_t id = 0; id < global_devices.size(); ++id) |
||||
{ |
||||
DeviceInfoImpl& deviceInfo = global_devices[id]; |
||||
if (((int)deviceInfo.info.deviceType & deviceType) != 0) |
||||
{ |
||||
devices.push_back(&deviceInfo.info); |
||||
} |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
for (size_t id = 0; id < platform->devices.size(); ++id) |
||||
{ |
||||
const DeviceInfo* deviceInfo = platform->devices[id]; |
||||
if (((int)deviceInfo->deviceType & deviceType) == deviceType) |
||||
{ |
||||
devices.push_back(deviceInfo); |
||||
} |
||||
} |
||||
} |
||||
|
||||
return (int)devices.size(); |
||||
} |
||||
|
||||
void setDevice(const DeviceInfo* info) |
||||
{ |
||||
if (!__deviceSelected) |
||||
__deviceSelected = true; |
||||
|
||||
ContextImpl::setContext(info); |
||||
} |
||||
|
||||
bool supportsFeature(FEATURE_TYPE featureType) |
||||
{ |
||||
return Context::getContext()->supportsFeature(featureType); |
||||
} |
||||
|
||||
struct __Module |
||||
{ |
||||
__Module() { /* moved to Context::getContext(): initializeOpenCLDevices(); */ } |
||||
~__Module() { ContextImpl::cleanupContext(); } |
||||
}; |
||||
static __Module __module; |
||||
|
||||
|
||||
} // namespace ocl
|
||||
} // namespace cv
|
||||
|
||||
|
||||
#if defined(WIN32) && defined(CVAPI_EXPORTS) |
||||
|
||||
extern "C" |
||||
BOOL WINAPI DllMain(HINSTANCE /*hInst*/, DWORD fdwReason, LPVOID lpReserved) |
||||
{ |
||||
if (fdwReason == DLL_PROCESS_DETACH) |
||||
{ |
||||
if (lpReserved != NULL) // called after ExitProcess() call
|
||||
cv::ocl::__termination = true; |
||||
} |
||||
return TRUE; |
||||
} |
||||
|
||||
#endif |
@ -0,0 +1,409 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Guoping Long, longguoping@gmail.com
|
||||
// Niko Li, newlife20080214@gmail.com
|
||||
// Yao Wang, bitwangyaoyao@gmail.com
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp" |
||||
#include <iomanip> |
||||
#include <fstream> |
||||
#include "cl_programcache.hpp" |
||||
|
||||
//#define PRINT_KERNEL_RUN_TIME
|
||||
#define RUN_TIMES 100 |
||||
#ifndef CL_MEM_USE_PERSISTENT_MEM_AMD |
||||
#define CL_MEM_USE_PERSISTENT_MEM_AMD 0 |
||||
#endif |
||||
//#define AMD_DOUBLE_DIFFER
|
||||
|
||||
namespace cv { |
||||
namespace ocl { |
||||
|
||||
DevMemType gDeviceMemType = DEVICE_MEM_DEFAULT; |
||||
DevMemRW gDeviceMemRW = DEVICE_MEM_R_W; |
||||
int gDevMemTypeValueMap[5] = {0, |
||||
CL_MEM_ALLOC_HOST_PTR, |
||||
CL_MEM_USE_HOST_PTR, |
||||
CL_MEM_COPY_HOST_PTR, |
||||
CL_MEM_USE_PERSISTENT_MEM_AMD}; |
||||
int gDevMemRWValueMap[3] = {CL_MEM_READ_WRITE, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY}; |
||||
|
||||
void finish() |
||||
{ |
||||
clFinish(getClCommandQueue(Context::getContext())); |
||||
} |
||||
|
||||
bool isCpuDevice() |
||||
{ |
||||
const DeviceInfo& info = Context::getContext()->getDeviceInfo(); |
||||
return (info.deviceType == CVCL_DEVICE_TYPE_CPU); |
||||
} |
||||
|
||||
size_t queryWaveFrontSize(cl_kernel kernel) |
||||
{ |
||||
const DeviceInfo& info = Context::getContext()->getDeviceInfo(); |
||||
if (info.deviceType == CVCL_DEVICE_TYPE_CPU) |
||||
return 1; |
||||
size_t wavefront = 0; |
||||
CV_Assert(kernel != NULL); |
||||
openCLSafeCall(clGetKernelWorkGroupInfo(kernel, getClDeviceID(Context::getContext()), |
||||
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &wavefront, NULL)); |
||||
return wavefront; |
||||
} |
||||
|
||||
|
||||
void openCLReadBuffer(Context *ctx, cl_mem dst_buffer, void *host_buffer, size_t size) |
||||
{ |
||||
cl_int status; |
||||
status = clEnqueueReadBuffer(getClCommandQueue(ctx), dst_buffer, CL_TRUE, 0, |
||||
size, host_buffer, 0, NULL, NULL); |
||||
openCLVerifyCall(status); |
||||
} |
||||
|
||||
cl_mem openCLCreateBuffer(Context *ctx, size_t flag , size_t size) |
||||
{ |
||||
cl_int status; |
||||
cl_mem buffer = clCreateBuffer(getClContext(ctx), (cl_mem_flags)flag, size, NULL, &status); |
||||
openCLVerifyCall(status); |
||||
return buffer; |
||||
} |
||||
|
||||
void openCLMallocPitch(Context *ctx, void **dev_ptr, size_t *pitch, |
||||
size_t widthInBytes, size_t height) |
||||
{ |
||||
openCLMallocPitchEx(ctx, dev_ptr, pitch, widthInBytes, height, gDeviceMemRW, gDeviceMemType); |
||||
} |
||||
|
||||
void openCLMallocPitchEx(Context *ctx, void **dev_ptr, size_t *pitch, |
||||
size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type) |
||||
{ |
||||
cl_int status; |
||||
*dev_ptr = clCreateBuffer(getClContext(ctx), gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type], |
||||
widthInBytes * height, 0, &status); |
||||
openCLVerifyCall(status); |
||||
*pitch = widthInBytes; |
||||
} |
||||
|
||||
void openCLMemcpy2D(Context *ctx, void *dst, size_t dpitch, |
||||
const void *src, size_t spitch, |
||||
size_t width, size_t height, openCLMemcpyKind kind, int channels) |
||||
{ |
||||
size_t buffer_origin[3] = {0, 0, 0}; |
||||
size_t host_origin[3] = {0, 0, 0}; |
||||
size_t region[3] = {width, height, 1}; |
||||
if(kind == clMemcpyHostToDevice) |
||||
{ |
||||
if(dpitch == width || channels == 3 || height == 1) |
||||
{ |
||||
openCLSafeCall(clEnqueueWriteBuffer(getClCommandQueue(ctx), (cl_mem)dst, CL_TRUE, |
||||
0, width * height, src, 0, NULL, NULL)); |
||||
} |
||||
else |
||||
{ |
||||
openCLSafeCall(clEnqueueWriteBufferRect(getClCommandQueue(ctx), (cl_mem)dst, CL_TRUE, |
||||
buffer_origin, host_origin, region, dpitch, 0, spitch, 0, src, 0, 0, 0)); |
||||
} |
||||
} |
||||
else if(kind == clMemcpyDeviceToHost) |
||||
{ |
||||
if(spitch == width || channels == 3 || height == 1) |
||||
{ |
||||
openCLSafeCall(clEnqueueReadBuffer(getClCommandQueue(ctx), (cl_mem)src, CL_TRUE, |
||||
0, width * height, dst, 0, NULL, NULL)); |
||||
} |
||||
else |
||||
{ |
||||
openCLSafeCall(clEnqueueReadBufferRect(getClCommandQueue(ctx), (cl_mem)src, CL_TRUE, |
||||
buffer_origin, host_origin, region, spitch, 0, dpitch, 0, dst, 0, 0, 0)); |
||||
} |
||||
} |
||||
} |
||||
|
||||
void openCLCopyBuffer2D(Context *ctx, void *dst, size_t dpitch, int dst_offset, |
||||
const void *src, size_t spitch, |
||||
size_t width, size_t height, int src_offset) |
||||
{ |
||||
size_t src_origin[3] = {src_offset % spitch, src_offset / spitch, 0}; |
||||
size_t dst_origin[3] = {dst_offset % dpitch, dst_offset / dpitch, 0}; |
||||
size_t region[3] = {width, height, 1}; |
||||
|
||||
openCLSafeCall(clEnqueueCopyBufferRect(getClCommandQueue(ctx), (cl_mem)src, (cl_mem)dst, src_origin, dst_origin, |
||||
region, spitch, 0, dpitch, 0, 0, 0, 0)); |
||||
} |
||||
|
||||
void openCLFree(void *devPtr) |
||||
{ |
||||
openCLSafeCall(clReleaseMemObject((cl_mem)devPtr)); |
||||
} |
||||
|
||||
cl_kernel openCLGetKernelFromSource(const Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName) |
||||
{ |
||||
return openCLGetKernelFromSource(ctx, source, kernelName, NULL); |
||||
} |
||||
|
||||
cl_kernel openCLGetKernelFromSource(const Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName, |
||||
const char *build_options) |
||||
{ |
||||
cl_kernel kernel; |
||||
cl_int status = 0; |
||||
CV_Assert(ProgramCache::getProgramCache() != NULL); |
||||
cl_program program = ProgramCache::getProgramCache()->getProgram(ctx, source, build_options); |
||||
CV_Assert(program != NULL); |
||||
kernel = clCreateKernel(program, kernelName.c_str(), &status); |
||||
openCLVerifyCall(status); |
||||
openCLVerifyCall(clReleaseProgram(program)); |
||||
return kernel; |
||||
} |
||||
|
||||
void openCLVerifyKernel(const Context *ctx, cl_kernel kernel, size_t *localThreads) |
||||
{ |
||||
size_t kernelWorkGroupSize; |
||||
openCLSafeCall(clGetKernelWorkGroupInfo(kernel, getClDeviceID(ctx), |
||||
CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0)); |
||||
CV_Assert( localThreads[0] <= ctx->getDeviceInfo().maxWorkItemSizes[0] ); |
||||
CV_Assert( localThreads[1] <= ctx->getDeviceInfo().maxWorkItemSizes[1] ); |
||||
CV_Assert( localThreads[2] <= ctx->getDeviceInfo().maxWorkItemSizes[2] ); |
||||
CV_Assert( localThreads[0] * localThreads[1] * localThreads[2] <= kernelWorkGroupSize ); |
||||
CV_Assert( localThreads[0] * localThreads[1] * localThreads[2] <= ctx->getDeviceInfo().maxWorkGroupSize ); |
||||
} |
||||
|
||||
#ifdef PRINT_KERNEL_RUN_TIME |
||||
static double total_execute_time = 0; |
||||
static double total_kernel_time = 0; |
||||
#endif |
||||
void openCLExecuteKernel_(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3], |
||||
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels, |
||||
int depth, const char *build_options) |
||||
{ |
||||
//construct kernel name
|
||||
//The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
|
||||
//for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char)
|
||||
stringstream idxStr; |
||||
if(channels != -1) |
||||
idxStr << "_C" << channels; |
||||
if(depth != -1) |
||||
idxStr << "_D" << depth; |
||||
kernelName += idxStr.str(); |
||||
|
||||
cl_kernel kernel; |
||||
kernel = openCLGetKernelFromSource(ctx, source, kernelName, build_options); |
||||
|
||||
if ( localThreads != NULL) |
||||
{ |
||||
globalThreads[0] = roundUp(globalThreads[0], localThreads[0]); |
||||
globalThreads[1] = roundUp(globalThreads[1], localThreads[1]); |
||||
globalThreads[2] = roundUp(globalThreads[2], localThreads[2]); |
||||
|
||||
cv::ocl::openCLVerifyKernel(ctx, kernel, localThreads); |
||||
} |
||||
for(size_t i = 0; i < args.size(); i ++) |
||||
openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second)); |
||||
|
||||
#ifndef PRINT_KERNEL_RUN_TIME |
||||
openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads, |
||||
localThreads, 0, NULL, NULL)); |
||||
#else |
||||
cl_event event = NULL; |
||||
openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads, |
||||
localThreads, 0, NULL, &event)); |
||||
|
||||
cl_ulong start_time, end_time, queue_time; |
||||
double execute_time = 0; |
||||
double total_time = 0; |
||||
|
||||
openCLSafeCall(clWaitForEvents(1, &event)); |
||||
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, |
||||
sizeof(cl_ulong), &start_time, 0)); |
||||
|
||||
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, |
||||
sizeof(cl_ulong), &end_time, 0)); |
||||
|
||||
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, |
||||
sizeof(cl_ulong), &queue_time, 0)); |
||||
|
||||
execute_time = (double)(end_time - start_time) / (1000 * 1000); |
||||
total_time = (double)(end_time - queue_time) / (1000 * 1000); |
||||
|
||||
total_execute_time += execute_time; |
||||
total_kernel_time += total_time; |
||||
clReleaseEvent(event); |
||||
#endif |
||||
|
||||
clFlush(getClCommandQueue(ctx)); |
||||
openCLSafeCall(clReleaseKernel(kernel)); |
||||
} |
||||
|
||||
void openCLExecuteKernel(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName, |
||||
size_t globalThreads[3], size_t localThreads[3], |
||||
vector< pair<size_t, const void *> > &args, int channels, int depth) |
||||
{ |
||||
openCLExecuteKernel(ctx, source, kernelName, globalThreads, localThreads, args, |
||||
channels, depth, NULL); |
||||
} |
||||
void openCLExecuteKernel(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName, |
||||
size_t globalThreads[3], size_t localThreads[3], |
||||
vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options) |
||||
|
||||
{ |
||||
#ifndef PRINT_KERNEL_RUN_TIME |
||||
openCLExecuteKernel_(ctx, source, kernelName, globalThreads, localThreads, args, channels, depth, |
||||
build_options); |
||||
#else |
||||
string data_type[] = { "uchar", "char", "ushort", "short", "int", "float", "double"}; |
||||
cout << endl; |
||||
cout << "Function Name: " << kernelName; |
||||
if(depth >= 0) |
||||
cout << " |data type: " << data_type[depth]; |
||||
cout << " |channels: " << channels; |
||||
cout << " |Time Unit: " << "ms" << endl; |
||||
|
||||
total_execute_time = 0; |
||||
total_kernel_time = 0; |
||||
cout << "-------------------------------------" << endl; |
||||
|
||||
cout << setiosflags(ios::left) << setw(15) << "excute time"; |
||||
cout << setiosflags(ios::left) << setw(15) << "lauch time"; |
||||
cout << setiosflags(ios::left) << setw(15) << "kernel time" << endl; |
||||
int i = 0; |
||||
for(i = 0; i < RUN_TIMES; i++) |
||||
openCLExecuteKernel_(ctx, source, kernelName, globalThreads, localThreads, args, channels, depth, |
||||
build_options); |
||||
|
||||
cout << "average kernel excute time: " << total_execute_time / RUN_TIMES << endl; // "ms" << endl;
|
||||
cout << "average kernel total time: " << total_kernel_time / RUN_TIMES << endl; // "ms" << endl;
|
||||
#endif |
||||
} |
||||
|
||||
double openCLExecuteKernelInterop(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName, |
||||
size_t globalThreads[3], size_t localThreads[3], |
||||
vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options, |
||||
bool finish, bool measureKernelTime, bool cleanUp) |
||||
|
||||
{ |
||||
//construct kernel name
|
||||
//The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
|
||||
//for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char)
|
||||
stringstream idxStr; |
||||
if(channels != -1) |
||||
idxStr << "_C" << channels; |
||||
if(depth != -1) |
||||
idxStr << "_D" << depth; |
||||
kernelName += idxStr.str(); |
||||
|
||||
cl_kernel kernel; |
||||
kernel = openCLGetKernelFromSource(ctx, source, kernelName, build_options); |
||||
|
||||
double kernelTime = 0.0; |
||||
|
||||
if( globalThreads != NULL) |
||||
{ |
||||
if ( localThreads != NULL) |
||||
{ |
||||
globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0]; |
||||
globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1]; |
||||
globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2]; |
||||
|
||||
//size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
|
||||
cv::ocl::openCLVerifyKernel(ctx, kernel, localThreads); |
||||
} |
||||
for(size_t i = 0; i < args.size(); i ++) |
||||
openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second)); |
||||
|
||||
if(measureKernelTime == false) |
||||
{ |
||||
openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads, |
||||
localThreads, 0, NULL, NULL)); |
||||
} |
||||
else |
||||
{ |
||||
cl_event event = NULL; |
||||
openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads, |
||||
localThreads, 0, NULL, &event)); |
||||
|
||||
cl_ulong end_time, queue_time; |
||||
|
||||
openCLSafeCall(clWaitForEvents(1, &event)); |
||||
|
||||
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, |
||||
sizeof(cl_ulong), &end_time, 0)); |
||||
|
||||
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, |
||||
sizeof(cl_ulong), &queue_time, 0)); |
||||
|
||||
kernelTime = (double)(end_time - queue_time) / (1000 * 1000); |
||||
|
||||
clReleaseEvent(event); |
||||
} |
||||
} |
||||
|
||||
if(finish) |
||||
{ |
||||
clFinish(getClCommandQueue(ctx)); |
||||
} |
||||
|
||||
if(cleanUp) |
||||
{ |
||||
openCLSafeCall(clReleaseKernel(kernel)); |
||||
} |
||||
|
||||
return kernelTime; |
||||
} |
||||
|
||||
cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value, |
||||
const size_t size) |
||||
{ |
||||
int status; |
||||
cl_mem con_struct; |
||||
|
||||
con_struct = clCreateBuffer(context, CL_MEM_READ_ONLY, size, NULL, &status); |
||||
openCLSafeCall(status); |
||||
|
||||
openCLSafeCall(clEnqueueWriteBuffer(command_queue, con_struct, 1, 0, size, |
||||
value, 0, 0, 0)); |
||||
|
||||
return con_struct; |
||||
} |
||||
|
||||
}//namespace ocl
|
||||
}//namespace cv
|
@ -0,0 +1,530 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Guoping Long, longguoping@gmail.com
|
||||
// Niko Li, newlife20080214@gmail.com
|
||||
// Yao Wang, bitwangyaoyao@gmail.com
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp" |
||||
#include <iomanip> |
||||
#include <fstream> |
||||
#include "cl_programcache.hpp" |
||||
|
||||
// workaround for OpenCL C++ bindings
|
||||
#if defined(HAVE_OPENCL12) |
||||
#include "opencv2/ocl/cl_runtime/cl_runtime_opencl12_wrappers.hpp" |
||||
#elif defined(HAVE_OPENCL11) |
||||
#include "opencv2/ocl/cl_runtime/cl_runtime_opencl11_wrappers.hpp" |
||||
#else |
||||
#error Invalid OpenCL configuration |
||||
#endif |
||||
|
||||
#if defined _MSC_VER && _MSC_VER >= 1200 |
||||
# pragma warning( disable: 4100 4244 4267 4510 4512 4610) |
||||
#endif |
||||
#undef __CL_ENABLE_EXCEPTIONS |
||||
#include <CL/cl.hpp> |
||||
|
||||
namespace cv { namespace ocl { |
||||
|
||||
#define MAX_PROG_CACHE_SIZE 1024 |
||||
/*
|
||||
* The binary caching system to eliminate redundant program source compilation. |
||||
* Strictly, this is not a cache because we do not implement evictions right now. |
||||
* We shall add such features to trade-off memory consumption and performance when necessary. |
||||
*/ |
||||
|
||||
cv::Mutex ProgramCache::mutexFiles; |
||||
cv::Mutex ProgramCache::mutexCache; |
||||
|
||||
std::auto_ptr<ProgramCache> _programCache; |
||||
ProgramCache* ProgramCache::getProgramCache() |
||||
{ |
||||
if (NULL == _programCache.get()) |
||||
_programCache.reset(new ProgramCache()); |
||||
return _programCache.get(); |
||||
} |
||||
|
||||
ProgramCache::ProgramCache() |
||||
{ |
||||
codeCache.clear(); |
||||
cacheSize = 0; |
||||
} |
||||
|
||||
ProgramCache::~ProgramCache() |
||||
{ |
||||
releaseProgram(); |
||||
} |
||||
|
||||
cl_program ProgramCache::progLookup(const string& srcsign) |
||||
{ |
||||
map<string, cl_program>::iterator iter; |
||||
iter = codeCache.find(srcsign); |
||||
if(iter != codeCache.end()) |
||||
return iter->second; |
||||
else |
||||
return NULL; |
||||
} |
||||
|
||||
void ProgramCache::addProgram(const string& srcsign, cl_program program) |
||||
{ |
||||
if (!progLookup(srcsign)) |
||||
{ |
||||
clRetainProgram(program); |
||||
codeCache.insert(map<string, cl_program>::value_type(srcsign, program)); |
||||
} |
||||
} |
||||
|
||||
void ProgramCache::releaseProgram() |
||||
{ |
||||
map<string, cl_program>::iterator iter; |
||||
for(iter = codeCache.begin(); iter != codeCache.end(); iter++) |
||||
{ |
||||
openCLSafeCall(clReleaseProgram(iter->second)); |
||||
} |
||||
codeCache.clear(); |
||||
cacheSize = 0; |
||||
} |
||||
|
||||
static int enable_disk_cache = true || |
||||
#ifdef _DEBUG |
||||
false; |
||||
#else |
||||
true; |
||||
#endif |
||||
static String binpath = ""; |
||||
|
||||
void setBinaryDiskCache(int mode, String path) |
||||
{ |
||||
enable_disk_cache = 0; |
||||
binpath = ""; |
||||
|
||||
if(mode == CACHE_NONE) |
||||
{ |
||||
return; |
||||
} |
||||
enable_disk_cache = |
||||
#ifdef _DEBUG |
||||
(mode & CACHE_DEBUG) == CACHE_DEBUG; |
||||
#else |
||||
(mode & CACHE_RELEASE) == CACHE_RELEASE; |
||||
#endif |
||||
if(enable_disk_cache && !path.empty()) |
||||
{ |
||||
binpath = path; |
||||
} |
||||
} |
||||
|
||||
void setBinaryPath(const char *path) |
||||
{ |
||||
binpath = path; |
||||
} |
||||
|
||||
static const int MAX_ENTRIES = 64; |
||||
|
||||
struct ProgramFileCache |
||||
{ |
||||
struct CV_DECL_ALIGNED(1) ProgramFileHeader |
||||
{ |
||||
int hashLength; |
||||
//char hash[];
|
||||
}; |
||||
|
||||
struct CV_DECL_ALIGNED(1) ProgramFileTable |
||||
{ |
||||
int numberOfEntries; |
||||
//int firstEntryOffset[];
|
||||
}; |
||||
|
||||
struct CV_DECL_ALIGNED(1) ProgramFileConfigurationEntry |
||||
{ |
||||
int nextEntry; |
||||
int dataSize; |
||||
int optionsLength; |
||||
//char options[];
|
||||
// char data[];
|
||||
}; |
||||
|
||||
string fileName_; |
||||
const char* hash_; |
||||
std::fstream f; |
||||
|
||||
ProgramFileCache(const string& fileName, const char* hash) |
||||
: fileName_(fileName), hash_(hash) |
||||
{ |
||||
if (hash_ != NULL) |
||||
{ |
||||
f.open(fileName_.c_str(), ios::in|ios::out|ios::binary); |
||||
if(f.is_open()) |
||||
{ |
||||
int hashLength = 0; |
||||
f.read((char*)&hashLength, sizeof(int)); |
||||
std::vector<char> fhash(hashLength + 1); |
||||
f.read(&fhash[0], hashLength); |
||||
if (f.eof() || strncmp(hash_, &fhash[0], hashLength) != 0) |
||||
{ |
||||
f.close(); |
||||
remove(fileName_.c_str()); |
||||
return; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
int getHash(const string& options) |
||||
{ |
||||
int hash = 0; |
||||
for (size_t i = 0; i < options.length(); i++) |
||||
{ |
||||
hash = (hash << 2) ^ (hash >> 17) ^ options[i]; |
||||
} |
||||
return (hash + (hash >> 16)) & (MAX_ENTRIES - 1); |
||||
} |
||||
|
||||
bool readConfigurationFromFile(const string& options, std::vector<char>& buf) |
||||
{ |
||||
if (hash_ == NULL) |
||||
return false; |
||||
|
||||
if (!f.is_open()) |
||||
return false; |
||||
|
||||
f.seekg(0, std::fstream::end); |
||||
size_t fileSize = (size_t)f.tellg(); |
||||
if (fileSize == 0) |
||||
{ |
||||
std::cerr << "Invalid file (empty): " << fileName_ << std::endl; |
||||
f.close(); |
||||
remove(fileName_.c_str()); |
||||
return false; |
||||
} |
||||
f.seekg(0, std::fstream::beg); |
||||
|
||||
int hashLength = 0; |
||||
f.read((char*)&hashLength, sizeof(int)); |
||||
CV_Assert(hashLength > 0); |
||||
f.seekg(sizeof(hashLength) + hashLength, std::fstream::beg); |
||||
|
||||
int numberOfEntries = 0; |
||||
f.read((char*)&numberOfEntries, sizeof(int)); |
||||
CV_Assert(numberOfEntries > 0); |
||||
if (numberOfEntries != MAX_ENTRIES) |
||||
{ |
||||
std::cerr << "Invalid file: " << fileName_ << std::endl; |
||||
f.close(); |
||||
remove(fileName_.c_str()); |
||||
return false; |
||||
} |
||||
|
||||
std::vector<int> firstEntryOffset(numberOfEntries); |
||||
f.read((char*)&firstEntryOffset[0], sizeof(int)*numberOfEntries); |
||||
|
||||
int entryNum = getHash(options); |
||||
|
||||
int entryOffset = firstEntryOffset[entryNum]; |
||||
ProgramFileConfigurationEntry entry; |
||||
while (entryOffset > 0) |
||||
{ |
||||
f.seekg(entryOffset, std::fstream::beg); |
||||
assert(sizeof(entry) == sizeof(int)*3); |
||||
f.read((char*)&entry, sizeof(entry)); |
||||
std::vector<char> foptions(entry.optionsLength); |
||||
if ((int)options.length() == entry.optionsLength) |
||||
{ |
||||
if (entry.optionsLength > 0) |
||||
f.read(&foptions[0], entry.optionsLength); |
||||
if (memcmp(&foptions[0], options.c_str(), entry.optionsLength) == 0) |
||||
{ |
||||
buf.resize(entry.dataSize); |
||||
f.read(&buf[0], entry.dataSize); |
||||
f.seekg(0, std::fstream::beg); |
||||
return true; |
||||
} |
||||
} |
||||
if (entry.nextEntry <= 0) |
||||
break; |
||||
entryOffset = entry.nextEntry; |
||||
} |
||||
return false; |
||||
} |
||||
|
||||
bool writeConfigurationToFile(const string& options, std::vector<char>& buf) |
||||
{ |
||||
if (hash_ == NULL) |
||||
return true; // don't save dynamic kernels
|
||||
|
||||
if (!f.is_open()) |
||||
{ |
||||
f.open(fileName_.c_str(), ios::in|ios::out|ios::binary); |
||||
if (!f.is_open()) |
||||
{ |
||||
f.open(fileName_.c_str(), ios::out|ios::binary); |
||||
if (!f.is_open()) |
||||
return false; |
||||
} |
||||
} |
||||
|
||||
f.seekg(0, std::fstream::end); |
||||
size_t fileSize = (size_t)f.tellg(); |
||||
if (fileSize == 0) |
||||
{ |
||||
f.seekp(0, std::fstream::beg); |
||||
int hashLength = strlen(hash_); |
||||
f.write((char*)&hashLength, sizeof(int)); |
||||
f.write(hash_, hashLength); |
||||
|
||||
int numberOfEntries = MAX_ENTRIES; |
||||
f.write((char*)&numberOfEntries, sizeof(int)); |
||||
std::vector<int> firstEntryOffset(MAX_ENTRIES, 0); |
||||
f.write((char*)&firstEntryOffset[0], sizeof(int)*numberOfEntries); |
||||
f.close(); |
||||
f.open(fileName_.c_str(), ios::in|ios::out|ios::binary); |
||||
CV_Assert(f.is_open()); |
||||
f.seekg(0, std::fstream::end); |
||||
fileSize = (size_t)f.tellg(); |
||||
} |
||||
f.seekg(0, std::fstream::beg); |
||||
|
||||
int hashLength = 0; |
||||
f.read((char*)&hashLength, sizeof(int)); |
||||
CV_Assert(hashLength > 0); |
||||
f.seekg(sizeof(hashLength) + hashLength, std::fstream::beg); |
||||
|
||||
int numberOfEntries = 0; |
||||
f.read((char*)&numberOfEntries, sizeof(int)); |
||||
CV_Assert(numberOfEntries > 0); |
||||
if (numberOfEntries != MAX_ENTRIES) |
||||
{ |
||||
std::cerr << "Invalid file: " << fileName_ << std::endl; |
||||
f.close(); |
||||
remove(fileName_.c_str()); |
||||
return false; |
||||
} |
||||
|
||||
size_t tableEntriesOffset = (size_t)f.tellg(); |
||||
std::vector<int> firstEntryOffset(numberOfEntries); |
||||
f.read((char*)&firstEntryOffset[0], sizeof(int)*numberOfEntries); |
||||
|
||||
int entryNum = getHash(options); |
||||
|
||||
int entryOffset = firstEntryOffset[entryNum]; |
||||
ProgramFileConfigurationEntry entry; |
||||
while (entryOffset > 0) |
||||
{ |
||||
f.seekg(entryOffset, std::fstream::beg); |
||||
assert(sizeof(entry) == sizeof(int)*3); |
||||
f.read((char*)&entry, sizeof(entry)); |
||||
std::vector<char> foptions(entry.optionsLength); |
||||
if ((int)options.length() == entry.optionsLength) |
||||
{ |
||||
if (entry.optionsLength > 0) |
||||
f.read(&foptions[0], entry.optionsLength); |
||||
CV_Assert(memcmp(&foptions, options.c_str(), entry.optionsLength) != 0); |
||||
} |
||||
if (entry.nextEntry <= 0) |
||||
break; |
||||
entryOffset = entry.nextEntry; |
||||
} |
||||
if (entryOffset > 0) |
||||
{ |
||||
f.seekp(entryOffset, std::fstream::beg); |
||||
entry.nextEntry = fileSize; |
||||
f.write((char*)&entry, sizeof(entry)); |
||||
} |
||||
else |
||||
{ |
||||
firstEntryOffset[entryNum] = fileSize; |
||||
f.seekp(tableEntriesOffset, std::fstream::beg); |
||||
f.write((char*)&firstEntryOffset[0], sizeof(int)*numberOfEntries); |
||||
} |
||||
f.seekp(fileSize, std::fstream::beg); |
||||
entry.nextEntry = 0; |
||||
entry.dataSize = buf.size(); |
||||
entry.optionsLength = options.length(); |
||||
f.write((char*)&entry, sizeof(entry)); |
||||
f.write(options.c_str(), entry.optionsLength); |
||||
f.write(&buf[0], entry.dataSize); |
||||
return true; |
||||
} |
||||
|
||||
cl_program getOrBuildProgram(const Context* ctx, const cv::ocl::ProgramEntry* source, const string& options) |
||||
{ |
||||
cl_int status = 0; |
||||
cl_program program = NULL; |
||||
std::vector<char> binary; |
||||
if (!enable_disk_cache || !readConfigurationFromFile(options, binary)) |
||||
{ |
||||
program = clCreateProgramWithSource(getClContext(ctx), 1, (const char**)&source->programStr, NULL, &status); |
||||
openCLVerifyCall(status); |
||||
cl_device_id device = getClDeviceID(ctx); |
||||
status = clBuildProgram(program, 1, &device, options.c_str(), NULL, NULL); |
||||
if(status == CL_SUCCESS) |
||||
{ |
||||
if (enable_disk_cache) |
||||
{ |
||||
size_t binarySize; |
||||
openCLSafeCall(clGetProgramInfo(program, |
||||
CL_PROGRAM_BINARY_SIZES, |
||||
sizeof(size_t), |
||||
&binarySize, NULL)); |
||||
|
||||
std::vector<char> binary(binarySize); |
||||
|
||||
char* ptr = &binary[0]; |
||||
openCLSafeCall(clGetProgramInfo(program, |
||||
CL_PROGRAM_BINARIES, |
||||
sizeof(char*), |
||||
&ptr, |
||||
NULL)); |
||||
|
||||
if (!writeConfigurationToFile(options, binary)) |
||||
{ |
||||
std::cerr << "Can't write data to file: " << fileName_ << std::endl; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
cl_device_id device = getClDeviceID(ctx); |
||||
size_t size = binary.size(); |
||||
const char* ptr = &binary[0]; |
||||
program = clCreateProgramWithBinary(getClContext(ctx), |
||||
1, &device, |
||||
(const size_t *)&size, (const unsigned char **)&ptr, |
||||
NULL, &status); |
||||
openCLVerifyCall(status); |
||||
status = clBuildProgram(program, 1, &device, options.c_str(), NULL, NULL); |
||||
} |
||||
|
||||
if(status != CL_SUCCESS) |
||||
{ |
||||
if(status == CL_BUILD_PROGRAM_FAILURE) |
||||
{ |
||||
cl_int logStatus; |
||||
char *buildLog = NULL; |
||||
size_t buildLogSize = 0; |
||||
logStatus = clGetProgramBuildInfo(program, |
||||
getClDeviceID(ctx), CL_PROGRAM_BUILD_LOG, buildLogSize, |
||||
buildLog, &buildLogSize); |
||||
if(logStatus != CL_SUCCESS) |
||||
std::cout << "Failed to build the program and get the build info." << endl; |
||||
buildLog = new char[buildLogSize]; |
||||
CV_DbgAssert(!!buildLog); |
||||
memset(buildLog, 0, buildLogSize); |
||||
openCLSafeCall(clGetProgramBuildInfo(program, getClDeviceID(ctx), |
||||
CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL)); |
||||
std::cout << "\nBUILD LOG: " << options << "\n"; |
||||
std::cout << buildLog << endl; |
||||
delete [] buildLog; |
||||
} |
||||
openCLVerifyCall(status); |
||||
} |
||||
return program; |
||||
} |
||||
}; |
||||
|
||||
cl_program ProgramCache::getProgram(const Context *ctx, const cv::ocl::ProgramEntry* source, |
||||
const char *build_options) |
||||
{ |
||||
stringstream src_sign; |
||||
|
||||
src_sign << (int64)(source->programStr); |
||||
src_sign << getClContext(ctx); |
||||
if (NULL != build_options) |
||||
{ |
||||
src_sign << "_" << build_options; |
||||
} |
||||
|
||||
{ |
||||
cv::AutoLock lockCache(mutexCache); |
||||
cl_program program = ProgramCache::getProgramCache()->progLookup(src_sign.str()); |
||||
if (!!program) |
||||
{ |
||||
clRetainProgram(program); |
||||
return program; |
||||
} |
||||
} |
||||
|
||||
cv::AutoLock lockCache(mutexFiles); |
||||
|
||||
// second check
|
||||
{ |
||||
cv::AutoLock lockCache(mutexCache); |
||||
cl_program program = ProgramCache::getProgramCache()->progLookup(src_sign.str()); |
||||
if (!!program) |
||||
{ |
||||
clRetainProgram(program); |
||||
return program; |
||||
} |
||||
} |
||||
|
||||
string all_build_options; |
||||
if (!ctx->getDeviceInfo().compilationExtraOptions.empty()) |
||||
all_build_options += ctx->getDeviceInfo().compilationExtraOptions; |
||||
if (build_options != NULL) |
||||
{ |
||||
all_build_options += " "; |
||||
all_build_options += build_options; |
||||
} |
||||
const DeviceInfo& devInfo = ctx->getDeviceInfo(); |
||||
string filename = binpath + (source->name ? source->name : "NULL") + "_" + devInfo.platform->platformName + "_" + devInfo.deviceName + ".clb"; |
||||
|
||||
ProgramFileCache programFileCache(filename, source->programHash); |
||||
cl_program program = programFileCache.getOrBuildProgram(ctx, source, all_build_options); |
||||
|
||||
//Cache the binary for future use if build_options is null
|
||||
if( (this->cacheSize += 1) < MAX_PROG_CACHE_SIZE) |
||||
{ |
||||
cv::AutoLock lockCache(mutexCache); |
||||
this->addProgram(src_sign.str(), program); |
||||
} |
||||
else |
||||
{ |
||||
cout << "Warning: code cache has been full.\n"; |
||||
} |
||||
return program; |
||||
} |
||||
|
||||
} // namespace ocl
|
||||
} // namespace cv
|
@ -0,0 +1,6 @@ |
||||
#ifndef __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__ |
||||
#define __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__ |
||||
|
||||
@CL_FN_INLINE_WRAPPERS@ |
||||
|
||||
#endif // __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue