diff --git a/modules/core/include/opencv2/core/directx.hpp b/modules/core/include/opencv2/core/directx.hpp index 837548e51b..275df720de 100644 --- a/modules/core/include/opencv2/core/directx.hpp +++ b/modules/core/include/opencv2/core/directx.hpp @@ -71,9 +71,28 @@ using namespace cv::ocl; //! @{ // TODO static functions in the Context class +//! @brief Creates OpenCL context from D3D11 device +// +//! @param pD3D11Device - pointer to D3D11 device +//! @return Returns reference to OpenCL Context CV_EXPORTS Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device); + +//! @brief Creates OpenCL context from D3D10 device +// +//! @param pD3D10Device - pointer to D3D10 device +//! @return Returns reference to OpenCL Context CV_EXPORTS Context& initializeContextFromD3D10Device(ID3D10Device* pD3D10Device); + +//! @brief Creates OpenCL context from Direct3DDevice9Ex device +// +//! @param pDirect3DDevice9Ex - pointer to Direct3DDevice9Ex device +//! @return Returns reference to OpenCL Context CV_EXPORTS Context& initializeContextFromDirect3DDevice9Ex(IDirect3DDevice9Ex* pDirect3DDevice9Ex); + +//! @brief Creates OpenCL context from Direct3DDevice9 device +// +//! @param pDirect3DDevice9 - pointer to Direct3Device9 device +//! @return Returns reference to OpenCL Context CV_EXPORTS Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDirect3DDevice9); //! @} @@ -83,19 +102,70 @@ CV_EXPORTS Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDire //! @addtogroup core_directx //! @{ +//! @brief Converts InputArray to ID3D11Texture2D +// +//! @note Note: function does memory copy from src to +//! pD3D11Texture2D +// +//! @param src - source InputArray +//! @param pD3D11Texture2D - destination D3D11 texture CV_EXPORTS void convertToD3D11Texture2D(InputArray src, ID3D11Texture2D* pD3D11Texture2D); + +//! @brief Converts ID3D11Texture2D to OutputArray +// +//! @note Note: function does memory copy from pD3D11Texture2D +//! to dst +// +//! @param pD3D11Texture2D - source D3D11 texture +//! @param dst - destination OutputArray CV_EXPORTS void convertFromD3D11Texture2D(ID3D11Texture2D* pD3D11Texture2D, OutputArray dst); +//! @brief Converts InputArray to ID3D10Texture2D +// +//! @note Note: function does memory copy from src to +//! pD3D10Texture2D +// +//! @param src - source InputArray +//! @param pD3D10Texture2D - destination D3D10 texture CV_EXPORTS void convertToD3D10Texture2D(InputArray src, ID3D10Texture2D* pD3D10Texture2D); + +//! @brief Converts ID3D10Texture2D to OutputArray +// +//! @note Note: function does memory copy from pD3D10Texture2D +//! to dst +// +//! @param pD3D10Texture2D - source D3D10 texture +//! @param dst - destination OutputArray CV_EXPORTS void convertFromD3D10Texture2D(ID3D10Texture2D* pD3D10Texture2D, OutputArray dst); +//! @brief Converts InputArray to IDirect3DSurface9 +// +//! @note Note: function does memory copy from src to +//! pDirect3DSurface9 +// +//! @param src - source InputArray +//! @param pDirect3DSurface9 - destination D3D10 texture +//! @param surfaceSharedHandle - shared handle CV_EXPORTS void convertToDirect3DSurface9(InputArray src, IDirect3DSurface9* pDirect3DSurface9, void* surfaceSharedHandle = NULL); + +//! @brief Converts IDirect3DSurface9 to OutputArray +// +//! @note Note: function does memory copy from pDirect3DSurface9 +//! to dst +// +//! @param pDirect3DSurface9 - source D3D10 texture +//! @param dst - destination OutputArray +//! @param surfaceSharedHandle - shared handle CV_EXPORTS void convertFromDirect3DSurface9(IDirect3DSurface9* pDirect3DSurface9, OutputArray dst, void* surfaceSharedHandle = NULL); -// Get OpenCV type from DirectX type, return -1 if there is no equivalent +//! @brief Get OpenCV type from DirectX type +//! @param iDXGI_FORMAT - enum DXGI_FORMAT for D3D10/D3D11 +//! @return OpenCV type or -1 if there is no equivalent CV_EXPORTS int getTypeFromDXGI_FORMAT(const int iDXGI_FORMAT); // enum DXGI_FORMAT for D3D10/D3D11 -// Get OpenCV type from DirectX type, return -1 if there is no equivalent +//! @brief Get OpenCV type from DirectX type +//! @param iD3DFORMAT - enum D3DTYPE for D3D9 +//! @return OpenCV type or -1 if there is no equivalent CV_EXPORTS int getTypeFromD3DFORMAT(const int iD3DFORMAT); // enum D3DTYPE for D3D9 //! @} diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp index 173722f61f..bc989a3285 100644 --- a/modules/core/include/opencv2/core/ocl.hpp +++ b/modules/core/include/opencv2/core/ocl.hpp @@ -276,6 +276,58 @@ protected: Impl* p; }; +/* +//! @brief Attaches OpenCL context to OpenCV +// +//! @note Note: +// OpenCV will check if available OpenCL platform has platformName name, +// then assign context to OpenCV and call clRetainContext function. +// The deviceID device will be used as target device and new command queue +// will be created. +// +// Params: +//! @param platformName - name of OpenCL platform to attach, +//! this string is used to check if platform is available +//! to OpenCV at runtime +//! @param platfromID - ID of platform attached context was created for +//! @param context - OpenCL context to be attached to OpenCV +//! @param deviceID - ID of device, must be created from attached context +*/ +CV_EXPORTS void attachContext(const String& platformName, void* platformID, void* context, void* deviceID); + +/* +//! @brief Convert OpenCL buffer to UMat +// +//! @note Note: +// OpenCL buffer (cl_mem_buffer) should contain 2D image data, compatible with OpenCV. +// Memory content is not copied from clBuffer to UMat. Instead, buffer handle assigned +// to UMat and clRetainMemObject is called. +// +// Params: +//! @param cl_mem_buffer - source clBuffer handle +//! @param step - num of bytes in single row +//! @param rows - number of rows +//! @param cols - number of cols +//! @param type - OpenCV type of image +//! @param dst - destination UMat +*/ +CV_EXPORTS void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst); + +/* +//! @brief Convert OpenCL image2d_t to UMat +// +//! @note Note: +// OpenCL image2d_t (cl_mem_image), should be compatible with OpenCV +// UMat formats. +// Memory content is copied from image to UMat with +// clEnqueueCopyImageToBuffer function. +// +// Params: +//! @param cl_mem_image - source image2d_t handle +//! @param dst - destination UMat +*/ +CV_EXPORTS void convertFromImage(void* cl_mem_image, UMat& dst); + // TODO Move to internal header void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device); diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 5d68a36832..4ee770c679 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -858,9 +858,9 @@ OCL_FUNC_P(cl_context, clCreateContext, OCL_FUNC(cl_int, clReleaseContext, (cl_context context), (context)) -/* -OCL_FUNC(cl_int, clRetainContext, (cl_context context), (context)) +OCL_FUNC(cl_int, clRetainContext, (cl_context context), (context)) +/* OCL_FUNC_P(cl_context, clCreateContextFromType, (const cl_context_properties * properties, cl_device_type device_type, @@ -945,7 +945,6 @@ OCL_FUNC(cl_int, clGetSupportedImageFormats, (context, flags, image_type, num_entries, image_formats, num_image_formats)) -/* OCL_FUNC(cl_int, clGetMemObjectInfo, (cl_mem memobj, cl_mem_info param_name, @@ -962,6 +961,7 @@ OCL_FUNC(cl_int, clGetImageInfo, size_t * param_value_size_ret), (image, param_name, param_value_size, param_value, param_value_size_ret)) +/* OCL_FUNC(cl_int, clCreateKernelsInProgram, (cl_program program, cl_uint num_kernels, @@ -1038,20 +1038,20 @@ OCL_FUNC(cl_int, clEnqueueCopyImage, cl_event * event), (command_queue, src_image, dst_image, src_origin, dst_origin, region, num_events_in_wait_list, event_wait_list, event)) +*/ OCL_FUNC(cl_int, clEnqueueCopyImageToBuffer, (cl_command_queue command_queue, cl_mem src_image, cl_mem dst_buffer, - const size_t * src_origin[3], - const size_t * region[3], + const size_t * src_origin, + const size_t * region, size_t dst_offset, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event), (command_queue, src_image, dst_buffer, src_origin, region, dst_offset, num_events_in_wait_list, event_wait_list, event)) -*/ OCL_FUNC(cl_int, clEnqueueCopyBufferToImage, (cl_command_queue command_queue, @@ -1100,10 +1100,10 @@ OCL_FUNC(cl_int, clGetKernelInfo, size_t * param_value_size_ret), (kernel, param_name, param_value_size, param_value, param_value_size_ret)) -OCL_FUNC(cl_int, clRetainMemObject, (cl_mem memobj), (memobj)) - */ +OCL_FUNC(cl_int, clRetainMemObject, (cl_mem memobj), (memobj)) + OCL_FUNC(cl_int, clReleaseMemObject, (cl_mem memobj), (memobj)) @@ -1348,7 +1348,7 @@ OCL_FUNC(cl_int, clReleaseEvent, (cl_event event), (event)) #define CL_VERSION_1_2 #endif -#endif +#endif // HAVE_OPENCL #ifdef _DEBUG #define CV_OclDbgAssert CV_DbgAssert @@ -2925,6 +2925,83 @@ CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags) #endif // HAVE_OPENCL_SVM +static void get_platform_name(cl_platform_id id, String& name) +{ + // get platform name string length + size_t sz = 0; + if (CL_SUCCESS != clGetPlatformInfo(id, CL_PLATFORM_NAME, 0, 0, &sz)) + CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clGetPlatformInfo failed!"); + + // get platform name string + AutoBuffer buf(sz + 1); + if (CL_SUCCESS != clGetPlatformInfo(id, CL_PLATFORM_NAME, sz, buf, 0)) + CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clGetPlatformInfo failed!"); + + // just in case, ensure trailing zero for ASCIIZ string + buf[sz] = 0; + + name = (const char*)buf; +} + +/* +// Attaches OpenCL context to OpenCV +*/ +void attachContext(const String& platformName, void* platformID, void* context, void* deviceID) +{ + cl_uint cnt = 0; + + if(CL_SUCCESS != clGetPlatformIDs(0, 0, &cnt)) + CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clGetPlatformIDs failed!"); + + if (cnt == 0) + CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "no OpenCL platform available!"); + + std::vector platforms(cnt); + + if(CL_SUCCESS != clGetPlatformIDs(cnt, &platforms[0], 0)) + CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clGetPlatformIDs failed!"); + + bool platformAvailable = false; + + // check if external platformName contained in list of available platforms in OpenCV + for (unsigned int i = 0; i < cnt; i++) + { + String availablePlatformName; + get_platform_name(platforms[i], availablePlatformName); + // external platform is found in the list of available platforms + if (platformName == availablePlatformName) + { + platformAvailable = true; + break; + } + } + + if (!platformAvailable) + CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "No matched platforms available!"); + + // check if platformID corresponds to platformName + String actualPlatformName; + get_platform_name((cl_platform_id)platformID, actualPlatformName); + if (platformName != actualPlatformName) + CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "No matched platforms available!"); + + // do not initialize OpenCL context + Context ctx = Context::getDefault(false); + + // attach supplied context to OpenCV + initializeContextFromHandle(ctx, platformID, context, deviceID); + + if(CL_SUCCESS != clRetainContext((cl_context)context)) + CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clRetainContext failed!"); + + // clear command queue, if any + getCoreTlsData().get()->oclQueue.finish(); + Queue q; + getCoreTlsData().get()->oclQueue = q; + + return; +} // attachContext() + void initializeContextFromHandle(Context& ctx, void* platform, void* _context, void* _device) { @@ -3150,10 +3227,10 @@ struct Kernel::Impl bool haveTempDstUMats; }; -}} +}} // namespace cv::ocl + +extern "C" { -extern "C" -{ static void CL_CALLBACK oclCleanupCallback(cl_event, cl_int, void *p) { ((cv::ocl::Kernel::Impl*)p)->finit(); @@ -5166,6 +5243,167 @@ MatAllocator* getOpenCLAllocator() return allocator; } +}} // namespace cv::ocl + + +namespace cv { + +// three funcs below are implemented in umatrix.cpp +void setSize( UMat& m, int _dims, const int* _sz, const size_t* _steps, + bool autoSteps = false ); + +void updateContinuityFlag(UMat& m); +void finalizeHdr(UMat& m); + +} // namespace cv + + +namespace cv { namespace ocl { + +/* +// Convert OpenCL buffer memory to UMat +*/ +void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst) +{ + int d = 2; + int sizes[] = { rows, cols }; + + CV_Assert(0 <= d && d <= CV_MAX_DIM); + + dst.release(); + + dst.flags = (type & Mat::TYPE_MASK) | Mat::MAGIC_VAL; + dst.usageFlags = USAGE_DEFAULT; + + setSize(dst, d, sizes, 0, true); + dst.offset = 0; + + cl_mem memobj = (cl_mem)cl_mem_buffer; + cl_mem_object_type mem_type = 0; + + CV_Assert(clGetMemObjectInfo(memobj, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0) == CL_SUCCESS); + + CV_Assert(CL_MEM_OBJECT_BUFFER == mem_type); + + size_t total = 0; + CV_Assert(clGetMemObjectInfo(memobj, CL_MEM_SIZE, sizeof(size_t), &total, 0) == CL_SUCCESS); + + CV_Assert(clRetainMemObject(memobj) == CL_SUCCESS); + + CV_Assert((int)step >= cols * CV_ELEM_SIZE(type)); + CV_Assert(total >= rows * step); + + // attach clBuffer to UMatData + dst.u = new UMatData(getOpenCLAllocator()); + dst.u->data = 0; + dst.u->allocatorFlags_ = 0; // not allocated from any OpenCV buffer pool + dst.u->flags = 0; + dst.u->handle = cl_mem_buffer; + dst.u->origdata = 0; + dst.u->prevAllocator = 0; + dst.u->size = total; + + finalizeHdr(dst); + dst.addref(); + + return; +} // convertFromBuffer() + + +/* +// Convert OpenCL image2d_t memory to UMat +*/ +void convertFromImage(void* cl_mem_image, UMat& dst) +{ + cl_mem clImage = (cl_mem)cl_mem_image; + cl_mem_object_type mem_type = 0; + + CV_Assert(clGetMemObjectInfo(clImage, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0) == CL_SUCCESS); + + CV_Assert(CL_MEM_OBJECT_IMAGE2D == mem_type); + + cl_image_format fmt = { 0, 0 }; + CV_Assert(clGetImageInfo(clImage, CL_IMAGE_FORMAT, sizeof(cl_image_format), &fmt, 0) == CL_SUCCESS); + + int depth = CV_8U; + switch (fmt.image_channel_data_type) + { + case CL_UNORM_INT8: + case CL_UNSIGNED_INT8: + depth = CV_8U; + break; + + case CL_SNORM_INT8: + case CL_SIGNED_INT8: + depth = CV_8S; + break; + + case CL_UNORM_INT16: + case CL_UNSIGNED_INT16: + depth = CV_16U; + break; + + case CL_SNORM_INT16: + case CL_SIGNED_INT16: + depth = CV_16S; + break; + + case CL_SIGNED_INT32: + depth = CV_32S; + break; + + case CL_FLOAT: + depth = CV_32F; + break; + + default: + CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_data_type"); + } + + int type = CV_8UC1; + switch (fmt.image_channel_order) + { + case CL_R: + type = CV_MAKE_TYPE(depth, 1); + break; + + case CL_RGBA: + case CL_BGRA: + case CL_ARGB: + type = CV_MAKE_TYPE(depth, 4); + break; + + default: + CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_order"); + break; + } + + size_t step = 0; + CV_Assert(clGetImageInfo(clImage, CL_IMAGE_ROW_PITCH, sizeof(size_t), &step, 0) == CL_SUCCESS); + + size_t w = 0; + CV_Assert(clGetImageInfo(clImage, CL_IMAGE_WIDTH, sizeof(size_t), &w, 0) == CL_SUCCESS); + + size_t h = 0; + CV_Assert(clGetImageInfo(clImage, CL_IMAGE_HEIGHT, sizeof(size_t), &h, 0) == CL_SUCCESS); + + dst.create((int)h, (int)w, type); + + cl_mem clBuffer = (cl_mem)dst.handle(ACCESS_READ); + + cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); + + size_t offset = 0; + size_t src_origin[3] = { 0, 0, 0 }; + size_t region[3] = { w, h, 1 }; + CV_Assert(clEnqueueCopyImageToBuffer(q, clImage, clBuffer, src_origin, region, offset, 0, NULL, NULL) == CL_SUCCESS); + + CV_Assert(clFinish(q) == CL_SUCCESS); + + return; +} // convertFromImage() + + ///////////////////////////////////////////// Utility functions ///////////////////////////////////////////////// static void getDevices(std::vector& devices, cl_platform_id platform) diff --git a/modules/core/src/umatrix.cpp b/modules/core/src/umatrix.cpp index 1b42f1ee1e..48aa86635d 100644 --- a/modules/core/src/umatrix.cpp +++ b/modules/core/src/umatrix.cpp @@ -46,6 +46,13 @@ namespace cv { +// forward decls, implementation is below in this file +void setSize(UMat& m, int _dims, const int* _sz, const size_t* _steps, + bool autoSteps = false); + +void updateContinuityFlag(UMat& m); +void finalizeHdr(UMat& m); + // it should be a prime number for the best hash function enum { UMAT_NLOCKS = 31 }; static Mutex umatLocks[UMAT_NLOCKS]; @@ -123,8 +130,8 @@ void swap( UMat& a, UMat& b ) } -static inline void setSize( UMat& m, int _dims, const int* _sz, - const size_t* _steps, bool autoSteps=false ) +void setSize( UMat& m, int _dims, const int* _sz, + const size_t* _steps, bool autoSteps ) { CV_Assert( 0 <= _dims && _dims <= CV_MAX_DIM ); if( m.dims != _dims ) @@ -176,7 +183,8 @@ static inline void setSize( UMat& m, int _dims, const int* _sz, } } -static void updateContinuityFlag(UMat& m) + +void updateContinuityFlag(UMat& m) { int i, j; for( i = 0; i < m.dims; i++ ) @@ -199,7 +207,7 @@ static void updateContinuityFlag(UMat& m) } -static void finalizeHdr(UMat& m) +void finalizeHdr(UMat& m) { updateContinuityFlag(m); int d = m.dims; @@ -207,6 +215,7 @@ static void finalizeHdr(UMat& m) m.rows = m.cols = -1; } + UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const { UMat hdr; diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 467ca162a7..ef6cd772f4 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -66,6 +66,8 @@ endif() add_subdirectory(cpp) # FIXIT: can't use cvconfig.h in samples: add_subdirectory(gpu) +add_subdirectory(opencl) + if(WIN32) add_subdirectory(directx) endif() diff --git a/samples/opencl/CMakeLists.txt b/samples/opencl/CMakeLists.txt new file mode 100644 index 0000000000..a4525650e1 --- /dev/null +++ b/samples/opencl/CMakeLists.txt @@ -0,0 +1,68 @@ +# cmake 3.1 needed for find_package(OpenCL) + +if(CMAKE_VERSION VERSION_LESS "3.1") + message(STATUS "OpenCL samples require CMakes 3.1+") + return() +endif() + +set( + OPENCV_OPENCL_SAMPLES_REQUIRED_DEPS + opencv_core + opencv_imgproc + opencv_video + opencv_imgcodecs + opencv_videoio + opencv_highgui) + +ocv_check_dependencies(${OPENCV_OPENCL_SAMPLES_REQUIRED_DEPS}) + +if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND) + + find_package(OpenCL 1.2 REQUIRED) + + set(project "opencl") + string(TOUPPER "${project}" project_upper) + + project("${project}_samples") + + ocv_include_modules_recurse(${OPENCV_OPENCL_SAMPLES_REQUIRED_DEPS}) + + include_directories(${OpenCL_INCLUDE_DIR}) + + # --------------------------------------------- + # Define executable targets + # --------------------------------------------- + MACRO(OPENCV_DEFINE_OPENCL_EXAMPLE name srcs) + set(the_target "example_${project}_${name}") + add_executable(${the_target} ${srcs}) + + ocv_target_link_libraries( + ${the_target} + ${OPENCV_LINKER_LIBS} + ${OPENCV_OPENCL_SAMPLES_REQUIRED_DEPS} + ${OpenCL_LIBRARY}) + + set_target_properties(${the_target} PROPERTIES + OUTPUT_NAME "${project}-example-${name}" + PROJECT_LABEL "(EXAMPLE_${project_upper}) ${name}") + + if(ENABLE_SOLUTION_FOLDERS) + set_target_properties(${the_target} PROPERTIES FOLDER "samples//${project}") + endif() + + if(WIN32) + if(MSVC AND NOT BUILD_SHARED_LIBS) + set_target_properties(${the_target} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:atlthunk.lib /NODEFAULTLIB:atlsd.lib /DEBUG") + endif() + install(TARGETS ${the_target} RUNTIME DESTINATION "${OPENCV_SAMPLES_BIN_INSTALL_PATH}/${project}" COMPONENT main) + endif() + ENDMACRO() + + file(GLOB all_samples RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp) + + foreach(sample_filename ${all_samples}) + get_filename_component(sample ${sample_filename} NAME_WE) + file(GLOB sample_srcs RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${sample}.*) + OPENCV_DEFINE_OPENCL_EXAMPLE(${sample} ${sample_srcs}) + endforeach() +endif() diff --git a/samples/opencl/opencl-opencv-interop.cpp b/samples/opencl/opencl-opencv-interop.cpp new file mode 100644 index 0000000000..37a902e0dd --- /dev/null +++ b/samples/opencl/opencl-opencv-interop.cpp @@ -0,0 +1,966 @@ +/* +// The example of interoperability between OpenCL and OpenCV. +// This will loop through frames of video either from input media file +// or camera device and do processing of these data in OpenCL and then +// in OpenCV. In OpenCL it does inversion of pixels in half of frame and +// in OpenCV it does bluring the whole frame. +*/ +#include +#include +#include +#include +#include +#include +#include +#include + +#if __APPLE__ +#include +#else +#include +#endif + +#include +#include +#include +#include +#include + + +using namespace std; +using namespace cv; + +namespace opencl { + +class PlatformInfo +{ +public: + PlatformInfo() + {} + + ~PlatformInfo() + {} + + cl_int QueryInfo(cl_platform_id id) + { + query_param(id, CL_PLATFORM_PROFILE, m_profile); + query_param(id, CL_PLATFORM_VERSION, m_version); + query_param(id, CL_PLATFORM_NAME, m_name); + query_param(id, CL_PLATFORM_VENDOR, m_vendor); + query_param(id, CL_PLATFORM_EXTENSIONS, m_extensions); + return CL_SUCCESS; + } + + std::string Profile() { return m_profile; } + std::string Version() { return m_version; } + std::string Name() { return m_name; } + std::string Vendor() { return m_vendor; } + std::string Extensions() { return m_extensions; } + +private: + cl_int query_param(cl_platform_id id, cl_platform_info param, std::string& paramStr) + { + cl_int res; + + size_t psize; + cv::AutoBuffer buf; + + res = clGetPlatformInfo(id, param, 0, 0, &psize); + if (CL_SUCCESS != res) + throw std::runtime_error(std::string("clGetPlatformInfo failed")); + + buf.resize(psize); + res = clGetPlatformInfo(id, param, psize, buf, 0); + if (CL_SUCCESS != res) + throw std::runtime_error(std::string("clGetPlatformInfo failed")); + + // just in case, ensure trailing zero for ASCIIZ string + buf[psize] = 0; + + paramStr = buf; + + return CL_SUCCESS; + } + +private: + std::string m_profile; + std::string m_version; + std::string m_name; + std::string m_vendor; + std::string m_extensions; +}; + + +class DeviceInfo +{ +public: + DeviceInfo() + {} + + ~DeviceInfo() + {} + + cl_int QueryInfo(cl_device_id id) + { + query_param(id, CL_DEVICE_TYPE, m_type); + query_param(id, CL_DEVICE_VENDOR_ID, m_vendor_id); + query_param(id, CL_DEVICE_MAX_COMPUTE_UNITS, m_max_compute_units); + query_param(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, m_max_work_item_dimensions); + query_param(id, CL_DEVICE_MAX_WORK_ITEM_SIZES, m_max_work_item_sizes); + query_param(id, CL_DEVICE_MAX_WORK_GROUP_SIZE, m_max_work_group_size); + query_param(id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, m_preferred_vector_width_char); + query_param(id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, m_preferred_vector_width_short); + query_param(id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, m_preferred_vector_width_int); + query_param(id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, m_preferred_vector_width_long); + query_param(id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, m_preferred_vector_width_float); + query_param(id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, m_preferred_vector_width_double); +#if defined(CL_VERSION_1_1) + query_param(id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, m_preferred_vector_width_half); + query_param(id, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, m_native_vector_width_char); + query_param(id, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, m_native_vector_width_short); + query_param(id, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, m_native_vector_width_int); + query_param(id, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, m_native_vector_width_long); + query_param(id, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, m_native_vector_width_float); + query_param(id, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, m_native_vector_width_double); + query_param(id, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, m_native_vector_width_half); +#endif + query_param(id, CL_DEVICE_MAX_CLOCK_FREQUENCY, m_max_clock_frequency); + query_param(id, CL_DEVICE_ADDRESS_BITS, m_address_bits); + query_param(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, m_max_mem_alloc_size); + query_param(id, CL_DEVICE_IMAGE_SUPPORT, m_image_support); + query_param(id, CL_DEVICE_MAX_READ_IMAGE_ARGS, m_max_read_image_args); + query_param(id, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, m_max_write_image_args); +#if defined(CL_VERSION_2_0) + query_param(id, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, m_max_read_write_image_args); +#endif + query_param(id, CL_DEVICE_IMAGE2D_MAX_WIDTH, m_image2d_max_width); + query_param(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT, m_image2d_max_height); + query_param(id, CL_DEVICE_IMAGE3D_MAX_WIDTH, m_image3d_max_width); + query_param(id, CL_DEVICE_IMAGE3D_MAX_HEIGHT, m_image3d_max_height); + query_param(id, CL_DEVICE_IMAGE3D_MAX_DEPTH, m_image3d_max_depth); +#if defined(CL_VERSION_1_2) + query_param(id, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, m_image_max_buffer_size); + query_param(id, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, m_image_max_array_size); +#endif + query_param(id, CL_DEVICE_MAX_SAMPLERS, m_max_samplers); +#if defined(CL_VERSION_1_2) + query_param(id, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, m_image_pitch_alignment); + query_param(id, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, m_image_base_address_alignment); +#endif +#if defined(CL_VERSION_2_0) + query_param(id, CL_DEVICE_MAX_PIPE_ARGS, m_max_pipe_args); + query_param(id, CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, m_pipe_max_active_reservations); + query_param(id, CL_DEVICE_PIPE_MAX_PACKET_SIZE, m_pipe_max_packet_size); +#endif + query_param(id, CL_DEVICE_MAX_PARAMETER_SIZE, m_max_parameter_size); + query_param(id, CL_DEVICE_MEM_BASE_ADDR_ALIGN, m_mem_base_addr_align); + query_param(id, CL_DEVICE_SINGLE_FP_CONFIG, m_single_fp_config); +#if defined(CL_VERSION_1_2) + query_param(id, CL_DEVICE_DOUBLE_FP_CONFIG, m_double_fp_config); +#endif + query_param(id, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, m_global_mem_cache_type); + query_param(id, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, m_global_mem_cacheline_size); + query_param(id, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, m_global_mem_cache_size); + query_param(id, CL_DEVICE_GLOBAL_MEM_SIZE, m_global_mem_size); + query_param(id, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, m_max_constant_buffer_size); + query_param(id, CL_DEVICE_MAX_CONSTANT_ARGS, m_max_constant_args); +#if defined(CL_VERSION_2_0) + query_param(id, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, m_max_global_variable_size); + query_param(id, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, m_global_variable_preferred_total_size); +#endif + query_param(id, CL_DEVICE_LOCAL_MEM_TYPE, m_local_mem_type); + query_param(id, CL_DEVICE_LOCAL_MEM_SIZE, m_local_mem_size); + query_param(id, CL_DEVICE_ERROR_CORRECTION_SUPPORT, m_error_correction_support); +#if defined(CL_VERSION_1_1) + query_param(id, CL_DEVICE_HOST_UNIFIED_MEMORY, m_host_unified_memory); +#endif + query_param(id, CL_DEVICE_PROFILING_TIMER_RESOLUTION, m_profiling_timer_resolution); + query_param(id, CL_DEVICE_ENDIAN_LITTLE, m_endian_little); + query_param(id, CL_DEVICE_AVAILABLE, m_available); + query_param(id, CL_DEVICE_COMPILER_AVAILABLE, m_compiler_available); +#if defined(CL_VERSION_1_2) + query_param(id, CL_DEVICE_LINKER_AVAILABLE, m_linker_available); +#endif + query_param(id, CL_DEVICE_EXECUTION_CAPABILITIES, m_execution_capabilities); + query_param(id, CL_DEVICE_QUEUE_PROPERTIES, m_queue_properties); +#if defined(CL_VERSION_2_0) + query_param(id, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, m_queue_on_host_properties); + query_param(id, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, m_queue_on_device_properties); + query_param(id, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, m_queue_on_device_preferred_size); + query_param(id, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, m_queue_on_device_max_size); + query_param(id, CL_DEVICE_MAX_ON_DEVICE_QUEUES, m_max_on_device_queues); + query_param(id, CL_DEVICE_MAX_ON_DEVICE_EVENTS, m_max_on_device_events); +#endif +#if defined(CL_VERSION_1_2) + query_param(id, CL_DEVICE_BUILT_IN_KERNELS, m_built_in_kernels); +#endif + query_param(id, CL_DEVICE_PLATFORM, m_platform); + query_param(id, CL_DEVICE_NAME, m_name); + query_param(id, CL_DEVICE_VENDOR, m_vendor); + query_param(id, CL_DRIVER_VERSION, m_driver_version); + query_param(id, CL_DEVICE_PROFILE, m_profile); + query_param(id, CL_DEVICE_VERSION, m_version); +#if defined(CL_VERSION_1_1) + query_param(id, CL_DEVICE_OPENCL_C_VERSION, m_opencl_c_version); +#endif + query_param(id, CL_DEVICE_EXTENSIONS, m_extensions); +#if defined(CL_VERSION_1_2) + query_param(id, CL_DEVICE_PRINTF_BUFFER_SIZE, m_printf_buffer_size); + query_param(id, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, m_preferred_interop_user_sync); + query_param(id, CL_DEVICE_PARENT_DEVICE, m_parent_device); + query_param(id, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, m_partition_max_sub_devices); + query_param(id, CL_DEVICE_PARTITION_PROPERTIES, m_partition_properties); + query_param(id, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, m_partition_affinity_domain); + query_param(id, CL_DEVICE_PARTITION_TYPE, m_partition_type); + query_param(id, CL_DEVICE_REFERENCE_COUNT, m_reference_count); +#endif + return CL_SUCCESS; + } + + std::string Name() { return m_name; } + +private: + template + cl_int query_param(cl_device_id id, cl_device_info param, T& value) + { + cl_int res; + size_t size = 0; + + res = clGetDeviceInfo(id, param, 0, 0, &size); + if (CL_SUCCESS != res && size != 0) + throw std::runtime_error(std::string("clGetDeviceInfo failed")); + + if (0 == size) + return CL_SUCCESS; + + if (sizeof(T) != size) + throw std::runtime_error(std::string("clGetDeviceInfo: param size mismatch")); + + res = clGetDeviceInfo(id, param, size, &value, 0); + if (CL_SUCCESS != res) + throw std::runtime_error(std::string("clGetDeviceInfo failed")); + + return CL_SUCCESS; + } + + template + cl_int query_param(cl_device_id id, cl_device_info param, std::vector& value) + { + cl_int res; + size_t size; + + res = clGetDeviceInfo(id, param, 0, 0, &size); + if (CL_SUCCESS != res) + throw std::runtime_error(std::string("clGetDeviceInfo failed")); + + if (0 == size) + return CL_SUCCESS; + + value.resize(size / sizeof(T)); + + res = clGetDeviceInfo(id, param, size, &value[0], 0); + if (CL_SUCCESS != res) + throw std::runtime_error(std::string("clGetDeviceInfo failed")); + + return CL_SUCCESS; + } + + cl_int query_param(cl_device_id id, cl_device_info param, std::string& value) + { + cl_int res; + size_t size; + + res = clGetDeviceInfo(id, param, 0, 0, &size); + if (CL_SUCCESS != res) + throw std::runtime_error(std::string("clGetDeviceInfo failed")); + + value.resize(size + 1); + + res = clGetDeviceInfo(id, param, size, &value[0], 0); + if (CL_SUCCESS != res) + throw std::runtime_error(std::string("clGetDeviceInfo failed")); + + // just in case, ensure trailing zero for ASCIIZ string + value[size] = 0; + + return CL_SUCCESS; + } + +private: + cl_device_type m_type; + cl_uint m_vendor_id; + cl_uint m_max_compute_units; + cl_uint m_max_work_item_dimensions; + std::vector m_max_work_item_sizes; + size_t m_max_work_group_size; + cl_uint m_preferred_vector_width_char; + cl_uint m_preferred_vector_width_short; + cl_uint m_preferred_vector_width_int; + cl_uint m_preferred_vector_width_long; + cl_uint m_preferred_vector_width_float; + cl_uint m_preferred_vector_width_double; +#if defined(CL_VERSION_1_1) + cl_uint m_preferred_vector_width_half; + cl_uint m_native_vector_width_char; + cl_uint m_native_vector_width_short; + cl_uint m_native_vector_width_int; + cl_uint m_native_vector_width_long; + cl_uint m_native_vector_width_float; + cl_uint m_native_vector_width_double; + cl_uint m_native_vector_width_half; +#endif + cl_uint m_max_clock_frequency; + cl_uint m_address_bits; + cl_ulong m_max_mem_alloc_size; + cl_bool m_image_support; + cl_uint m_max_read_image_args; + cl_uint m_max_write_image_args; +#if defined(CL_VERSION_2_0) + cl_uint m_max_read_write_image_args; +#endif + size_t m_image2d_max_width; + size_t m_image2d_max_height; + size_t m_image3d_max_width; + size_t m_image3d_max_height; + size_t m_image3d_max_depth; +#if defined(CL_VERSION_1_2) + size_t m_image_max_buffer_size; + size_t m_image_max_array_size; +#endif + cl_uint m_max_samplers; +#if defined(CL_VERSION_1_2) + cl_uint m_image_pitch_alignment; + cl_uint m_image_base_address_alignment; +#endif +#if defined(CL_VERSION_2_0) + cl_uint m_max_pipe_args; + cl_uint m_pipe_max_active_reservations; + cl_uint m_pipe_max_packet_size; +#endif + size_t m_max_parameter_size; + cl_uint m_mem_base_addr_align; + cl_device_fp_config m_single_fp_config; +#if defined(CL_VERSION_1_2) + cl_device_fp_config m_double_fp_config; +#endif + cl_device_mem_cache_type m_global_mem_cache_type; + cl_uint m_global_mem_cacheline_size; + cl_ulong m_global_mem_cache_size; + cl_ulong m_global_mem_size; + cl_ulong m_max_constant_buffer_size; + cl_uint m_max_constant_args; +#if defined(CL_VERSION_2_0) + size_t m_max_global_variable_size; + size_t m_global_variable_preferred_total_size; +#endif + cl_device_local_mem_type m_local_mem_type; + cl_ulong m_local_mem_size; + cl_bool m_error_correction_support; +#if defined(CL_VERSION_1_1) + cl_bool m_host_unified_memory; +#endif + size_t m_profiling_timer_resolution; + cl_bool m_endian_little; + cl_bool m_available; + cl_bool m_compiler_available; +#if defined(CL_VERSION_1_2) + cl_bool m_linker_available; +#endif + cl_device_exec_capabilities m_execution_capabilities; + cl_command_queue_properties m_queue_properties; +#if defined(CL_VERSION_2_0) + cl_command_queue_properties m_queue_on_host_properties; + cl_command_queue_properties m_queue_on_device_properties; + cl_uint m_queue_on_device_preferred_size; + cl_uint m_queue_on_device_max_size; + cl_uint m_max_on_device_queues; + cl_uint m_max_on_device_events; +#endif +#if defined(CL_VERSION_1_2) + std::string m_built_in_kernels; +#endif + cl_platform_id m_platform; + std::string m_name; + std::string m_vendor; + std::string m_driver_version; + std::string m_profile; + std::string m_version; +#if defined(CL_VERSION_1_1) + std::string m_opencl_c_version; +#endif + std::string m_extensions; +#if defined(CL_VERSION_1_2) + size_t m_printf_buffer_size; + cl_bool m_preferred_interop_user_sync; + cl_device_id m_parent_device; + cl_uint m_partition_max_sub_devices; + std::vector m_partition_properties; + cl_device_affinity_domain m_partition_affinity_domain; + std::vector m_partition_type; + cl_uint m_reference_count; +#endif +}; + +} // namespace opencl + + +class App +{ +public: + App(CommandLineParser& cmd); + ~App(); + + int initOpenCL(); + int initVideoSource(); + + int process_frame_with_open_cl(cv::Mat& frame, bool use_buffer, cl_mem* cl_buffer); + int process_cl_buffer_with_opencv(cl_mem buffer, size_t step, int rows, int cols, int type, cv::UMat& u); + int process_cl_image_with_opencv(cl_mem image, cv::UMat& u); + + int run(); + + bool isRunning() { return m_running; } + bool doProcess() { return m_process; } + bool useBuffer() { return m_use_buffer; } + + void setRunning(bool running) { m_running = running; } + void setDoProcess(bool process) { m_process = process; } + void setUseBuffer(bool use_buffer) { m_use_buffer = use_buffer; } + +protected: + bool nextFrame(cv::Mat& frame) { return m_cap.read(frame); } + void handleKey(char key); + void timerStart(); + void timerEnd(); + std::string fpsStr() const; + std::string message() const; + +private: + bool m_running; + bool m_process; + bool m_use_buffer; + + int64 m_t0; + int64 m_t1; + double m_fps; + + string m_file_name; + int m_camera_id; + cv::VideoCapture m_cap; + cv::Mat m_frame; + cv::Mat m_frameGray; + + opencl::PlatformInfo m_platformInfo; + opencl::DeviceInfo m_deviceInfo; + std::vector m_platform_ids; + cl_context m_context; + cl_device_id m_device_id; + cl_command_queue m_queue; + cl_program m_program; + cl_kernel m_kernelBuf; + cl_kernel m_kernelImg; + cl_mem m_mem_obj; + cl_event m_event; +}; + + +App::App(CommandLineParser& cmd) +{ + cout << "\nPress ESC to exit\n" << endl; + cout << "\n 'p' to toggle ON/OFF processing\n" << endl; + cout << "\n SPACE to switch between OpenCL buffer/image\n" << endl; + + m_camera_id = cmd.get("camera"); + m_file_name = cmd.get("video"); + + m_running = false; + m_process = false; + m_use_buffer = false; + + m_context = 0; + m_device_id = 0; + m_queue = 0; + m_program = 0; + m_kernelBuf = 0; + m_kernelImg = 0; + m_mem_obj = 0; + m_event = 0; +} // ctor + + +App::~App() +{ + if (m_queue) + { + clFinish(m_queue); + clReleaseCommandQueue(m_queue); + m_queue = 0; + } + + if (m_program) + { + clReleaseProgram(m_program); + m_program = 0; + } + + if (m_mem_obj) + { + clReleaseMemObject(m_mem_obj); + m_mem_obj = 0; + } + + if (m_event) + { + clReleaseEvent(m_event); + } + + if (m_kernelBuf) + { + clReleaseKernel(m_kernelBuf); + m_kernelBuf = 0; + } + + if (m_kernelImg) + { + clReleaseKernel(m_kernelImg); + m_kernelImg = 0; + } + + if (m_device_id) + { + clReleaseDevice(m_device_id); + m_device_id = 0; + } + + if (m_context) + { + clReleaseContext(m_context); + m_context = 0; + } +} // dtor + + +int App::initOpenCL() +{ + cl_int res = CL_SUCCESS; + cl_uint num_entries = 0; + + res = clGetPlatformIDs(0, 0, &num_entries); + if (CL_SUCCESS != res) + return -1; + + m_platform_ids.resize(num_entries); + + res = clGetPlatformIDs(num_entries, &m_platform_ids[0], 0); + if (CL_SUCCESS != res) + return -1; + + unsigned int i; + + // create context from first platform with GPU device + for (i = 0; i < m_platform_ids.size(); i++) + { + cl_context_properties props[] = + { + CL_CONTEXT_PLATFORM, + (cl_context_properties)(m_platform_ids[i]), + 0 + }; + + m_context = clCreateContextFromType(props, CL_DEVICE_TYPE_GPU, 0, 0, &res); + if (0 == m_context || CL_SUCCESS != res) + continue; + + res = clGetContextInfo(m_context, CL_CONTEXT_DEVICES, sizeof(cl_device_id), &m_device_id, 0); + if (CL_SUCCESS != res) + return -1; + + m_queue = clCreateCommandQueue(m_context, m_device_id, 0, &res); + if (0 == m_queue || CL_SUCCESS != res) + return -1; + + const char* kernelSrc = + "__kernel " + "void bitwise_inv_buf_8uC1(" + " __global unsigned char* pSrcDst," + " int srcDstStep," + " int rows," + " int cols)" + "{" + " int x = get_global_id(0);" + " int y = get_global_id(1);" + " int idx = mad24(y, srcDstStep, x);" + " pSrcDst[idx] = ~pSrcDst[idx];" + "}" + "__kernel " + "void bitwise_inv_img_8uC1(" + " read_only image2d_t srcImg," + " write_only image2d_t dstImg)" + "{" + " int x = get_global_id(0);" + " int y = get_global_id(1);" + " int2 coord = (int2)(x, y);" + " uint4 val = read_imageui(srcImg, coord);" + " val.x = (~val.x) & 0x000000FF;" + " write_imageui(dstImg, coord, val);" + "}"; + size_t len = strlen(kernelSrc); + m_program = clCreateProgramWithSource(m_context, 1, &kernelSrc, &len, &res); + if (0 == m_program || CL_SUCCESS != res) + return -1; + + res = clBuildProgram(m_program, 1, &m_device_id, 0, 0, 0); + if (CL_SUCCESS != res) + return -1; + + m_kernelBuf = clCreateKernel(m_program, "bitwise_inv_buf_8uC1", &res); + if (0 == m_kernelBuf || CL_SUCCESS != res) + return -1; + + m_kernelImg = clCreateKernel(m_program, "bitwise_inv_img_8uC1", &res); + if (0 == m_kernelImg || CL_SUCCESS != res) + return -1; + + m_platformInfo.QueryInfo(m_platform_ids[i]); + m_deviceInfo.QueryInfo(m_device_id); + + // attach OpenCL context to OpenCV + cv::ocl::attachContext(m_platformInfo.Name(), m_platform_ids[i], m_context, m_device_id); + + break; + } + + return m_context != 0 ? CL_SUCCESS : -1; +} // initOpenCL() + + +int App::initVideoSource() +{ + try + { + if (!m_file_name.empty() && m_camera_id == -1) + { + m_cap.open(m_file_name.c_str()); + if (!m_cap.isOpened()) + throw std::runtime_error(std::string("can't open video file: " + m_file_name)); + } + else if (m_camera_id != -1) + { + m_cap.open(m_camera_id); + if (!m_cap.isOpened()) + { + std::stringstream msg; + msg << "can't open camera: " << m_camera_id; + throw std::runtime_error(msg.str()); + } + } + else + throw std::runtime_error(std::string("specify video source")); + } + + catch (std::exception e) + { + cerr << "ERROR: " << e.what() << std::endl; + return -1; + } + + return 0; +} // initVideoSource() + + +// this function is an example of "typical" OpenCL processing pipeline +// It creates OpenCL buffer or image, depending on use_buffer flag, +// from input media frame and process these data +// (inverts each pixel value in half of frame) with OpenCL kernel +int App::process_frame_with_open_cl(cv::Mat& frame, bool use_buffer, cl_mem* mem_obj) +{ + cl_int res = CL_SUCCESS; + + CV_Assert(mem_obj); + + cl_kernel kernel = 0; + cl_mem mem = mem_obj[0]; + + if (0 == mem) + { + // first time initialization + + cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; + if (use_buffer) + { + // allocate OpenCL memory to keep single frame, + // reuse this memory for subsecuent frames + // memory will be deallocated at dtor + mem = clCreateBuffer(m_context, flags, frame.total(), frame.ptr(), &res); + if (0 == mem || CL_SUCCESS != res) + return -1; + + res = clSetKernelArg(m_kernelBuf, 0, sizeof(cl_mem), &mem); + if (CL_SUCCESS != res) + return -1; + + res = clSetKernelArg(m_kernelBuf, 1, sizeof(int), &frame.step[0]); + if (CL_SUCCESS != res) + return -1; + + res = clSetKernelArg(m_kernelBuf, 2, sizeof(int), &frame.rows); + if (CL_SUCCESS != res) + return -1; + + int cols2 = frame.cols / 2; + res = clSetKernelArg(m_kernelBuf, 3, sizeof(int), &cols2); + if (CL_SUCCESS != res) + return -1; + + kernel = m_kernelBuf; + } + else + { + cl_image_format fmt; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNSIGNED_INT8; + + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = frame.cols; + desc.image_height = frame.rows; + desc.image_depth = 0; + desc.image_array_size = 0; + desc.image_row_pitch = frame.step[0]; + desc.image_slice_pitch = 0; + desc.num_mip_levels = 0; + desc.num_samples = 0; + desc.buffer = 0; + mem = clCreateImage(m_context, flags, &fmt, &desc, frame.ptr(), &res); + if (0 == mem || CL_SUCCESS != res) + return -1; + + res = clSetKernelArg(m_kernelImg, 0, sizeof(cl_mem), &mem); + if (CL_SUCCESS != res) + return -1; + + res = clSetKernelArg(m_kernelImg, 1, sizeof(cl_mem), &mem); + if (CL_SUCCESS != res) + return -1; + + kernel = m_kernelImg; + } + } + + m_event = clCreateUserEvent(m_context, &res); + if (0 == m_event || CL_SUCCESS != res) + return -1; + + // process left half of frame in OpenCL + size_t size[] = { frame.cols / 2, frame.rows }; + res = clEnqueueNDRangeKernel(m_queue, kernel, 2, 0, size, 0, 0, 0, &m_event); + if (CL_SUCCESS != res) + return -1; + + res = clWaitForEvents(1, &m_event); + if (CL_SUCCESS != res) + return - 1; + + mem_obj[0] = mem; + + return 0; +} + + +// this function is an example of interoperability between OpenCL buffer +// and OpenCV UMat objects. It converts (without copying data) OpenCL buffer +// to OpenCV UMat and then do blur on these data +int App::process_cl_buffer_with_opencv(cl_mem buffer, size_t step, int rows, int cols, int type, cv::UMat& u) +{ + cv::ocl::convertFromBuffer(buffer, step, rows, cols, type, u); + + // process right half of frame in OpenCV + cv::Point pt(u.cols / 2, 0); + cv::Size sz(u.cols / 2, u.rows); + cv::Rect roi(pt, sz); + cv::UMat uroi(u, roi); + cv::blur(uroi, uroi, cv::Size(7, 7), cv::Point(-3, -3)); + + if (buffer) + clReleaseMemObject(buffer); + m_mem_obj = 0; + + return 0; +} + + +// this function is an example of interoperability between OpenCL image +// and OpenCV UMat objects. It converts OpenCL image +// to OpenCV UMat and then do blur on these data +int App::process_cl_image_with_opencv(cl_mem image, cv::UMat& u) +{ + cv::ocl::convertFromImage(image, u); + + // process right half of frame in OpenCV + cv::Point pt(u.cols / 2, 0); + cv::Size sz(u.cols / 2, u.rows); + cv::Rect roi(pt, sz); + cv::UMat uroi(u, roi); + cv::blur(uroi, uroi, cv::Size(7, 7), cv::Point(-3, -3)); + + if (image) + clReleaseMemObject(image); + m_mem_obj = 0; + + return 0; +} + + +int App::run() +{ + if (0 != initOpenCL()) + return -1; + + if (0 != initVideoSource()) + return -1; + + Mat img_to_show; + + // set running state until ESC pressed + setRunning(true); + // set process flag to show some data processing + // can be toggled on/off by 'p' button + setDoProcess(true); + // set use buffer flag, + // when it is set to true, will demo interop opencl buffer and cv::Umat, + // otherwise demo interop opencl image and cv::UMat + // can be switched on/of by SPACE button + setUseBuffer(true); + + // Iterate over all frames + while (isRunning() && nextFrame(m_frame)) + { + cv::cvtColor(m_frame, m_frameGray, COLOR_BGR2GRAY); + + UMat uframe; + + // work + timerStart(); + + if (doProcess()) + { + process_frame_with_open_cl(m_frameGray, useBuffer(), &m_mem_obj); + + if (useBuffer()) + process_cl_buffer_with_opencv( + m_mem_obj, m_frameGray.step[0], m_frameGray.rows, m_frameGray.cols, m_frameGray.type(), uframe); + else + process_cl_image_with_opencv(m_mem_obj, uframe); + } + else + { + m_frameGray.copyTo(uframe); + } + + timerEnd(); + + uframe.copyTo(img_to_show); + + putText(img_to_show, "Version : " + m_platformInfo.Version(), Point(5, 30), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2); + putText(img_to_show, "Name : " + m_platformInfo.Name(), Point(5, 60), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2); + putText(img_to_show, "Device : " + m_deviceInfo.Name(), Point(5, 90), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2); + cv::String memtype = useBuffer() ? "buffer" : "image"; + putText(img_to_show, "interop with OpenCL " + memtype, Point(5, 120), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2); + putText(img_to_show, "FPS : " + fpsStr(), Point(5, 150), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2); + + imshow("opencl_interop", img_to_show); + + handleKey((char)waitKey(3)); + } + + return 0; +} + + +void App::handleKey(char key) +{ + switch (key) + { + case 27: + setRunning(false); + break; + + case ' ': + setUseBuffer(!useBuffer()); + break; + + case 'p': + case 'P': + setDoProcess( !doProcess() ); + break; + + default: + break; + } +} + + +inline void App::timerStart() +{ + m_t0 = getTickCount(); +} + + +inline void App::timerEnd() +{ + m_t1 = getTickCount(); + int64 delta = m_t1 - m_t0; + double freq = getTickFrequency(); + m_fps = freq / delta; +} + + +inline string App::fpsStr() const +{ + stringstream ss; + ss << std::fixed << std::setprecision(1) << m_fps; + return ss.str(); +} + + +int main(int argc, char** argv) +{ + const char* keys = + "{ help h ? | | print help message }" + "{ camera c | -1 | use camera as input }" + "{ video v | | use video as input }"; + + CommandLineParser cmd(argc, argv, keys); + if (cmd.has("help")) + { + cmd.printMessage(); + return EXIT_SUCCESS; + } + + App app(cmd); + + try + { + app.run(); + } + + catch (const cv::Exception& e) + { + cout << "error: " << e.what() << endl; + return 1; + } + + catch (const std::exception& e) + { + cout << "error: " << e.what() << endl; + return 1; + } + + catch (...) + { + cout << "unknown exception" << endl; + return 1; + } + + return EXIT_SUCCESS; +} // main()