diff --git a/CMakeLists.txt b/CMakeLists.txt index 2f4fd3323d..07d8ec6f25 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -162,6 +162,7 @@ OCV_OPTION(WITH_XIMEA "Include XIMEA cameras support" OFF OCV_OPTION(WITH_XINE "Include Xine support (GPL)" OFF IF (UNIX AND NOT APPLE AND NOT ANDROID) ) OCV_OPTION(WITH_CLP "Include Clp support (EPL)" OFF) OCV_OPTION(WITH_OPENCL "Include OpenCL Runtime support" ON IF (NOT IOS) ) +OCV_OPTION(WITH_OPENCL_SVM "Include OpenCL Shared Virtual Memory support" OFF ) # experimental OCV_OPTION(WITH_OPENCLAMDFFT "Include AMD OpenCL FFT library support" ON IF (NOT ANDROID AND NOT IOS) ) OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON IF (NOT ANDROID AND NOT IOS) ) OCV_OPTION(WITH_DIRECTX "Include DirectX support" ON IF WIN32 ) diff --git a/cmake/OpenCVDetectOpenCL.cmake b/cmake/OpenCVDetectOpenCL.cmake index f732546e51..ce76ad1732 100644 --- a/cmake/OpenCVDetectOpenCL.cmake +++ b/cmake/OpenCVDetectOpenCL.cmake @@ -26,6 +26,10 @@ if(OPENCL_FOUND) set(HAVE_OPENCL 1) + if(WITH_OPENCL_SVM) + set(HAVE_OPENCL_SVM 1) + endif() + if(HAVE_OPENCL_STATIC) set(OPENCL_LIBRARIES "${OPENCL_LIBRARY}") else() diff --git a/cmake/templates/cvconfig.h.in b/cmake/templates/cvconfig.h.in index 3eea4fafe4..f8c1c40357 100644 --- a/cmake/templates/cvconfig.h.in +++ b/cmake/templates/cvconfig.h.in @@ -122,6 +122,7 @@ /* OpenCL Support */ #cmakedefine HAVE_OPENCL #cmakedefine HAVE_OPENCL_STATIC +#cmakedefine HAVE_OPENCL_SVM /* OpenEXR codec */ #cmakedefine HAVE_OPENEXR diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index 522b8b8154..8b0d94f6e1 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -415,7 +415,7 @@ public: const size_t dstofs[], const size_t dststep[], bool sync) const; // default implementation returns DummyBufferPoolController - virtual BufferPoolController* getBufferPoolController() const; + virtual BufferPoolController* getBufferPoolController(const char* id = NULL) const; }; @@ -481,7 +481,7 @@ struct CV_EXPORTS UMatData int refcount; uchar* data; uchar* origdata; - size_t size, capacity; + size_t size; int flags; void* handle; diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp index 3b023fb09e..f87e15ee6a 100644 --- a/modules/core/include/opencv2/core/ocl.hpp +++ b/modules/core/include/opencv2/core/ocl.hpp @@ -56,6 +56,8 @@ CV_EXPORTS_W bool haveAmdFft(); CV_EXPORTS_W void setUseOpenCL(bool flag); CV_EXPORTS_W void finish(); +CV_EXPORTS bool haveSVM(); + class CV_EXPORTS Context; class CV_EXPORTS Device; class CV_EXPORTS Kernel; @@ -248,7 +250,10 @@ public: void* ptr() const; friend void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device); -protected: + + bool useSVM() const; + void setUseSVM(bool enabled); + struct Impl; Impl* p; }; @@ -666,8 +671,17 @@ protected: CV_EXPORTS MatAllocator* getOpenCLAllocator(); -CV_EXPORTS_W bool isPerformanceCheckBypassed(); -#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::isPerformanceCheckBypassed() || (condition)) + +#ifdef __OPENCV_BUILD +namespace internal { + +CV_EXPORTS bool isPerformanceCheckBypassed(); +#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::internal::isPerformanceCheckBypassed() || (condition)) + +CV_EXPORTS bool isCLBuffer(UMat& u); + +} // namespace internal +#endif //! @} diff --git a/modules/core/include/opencv2/core/opencl/opencl_svm.hpp b/modules/core/include/opencv2/core/opencl/opencl_svm.hpp new file mode 100644 index 0000000000..e9f7ba0232 --- /dev/null +++ b/modules/core/include/opencv2/core/opencl/opencl_svm.hpp @@ -0,0 +1,81 @@ +/* See LICENSE file in the root OpenCV directory */ + +#ifndef __OPENCV_CORE_OPENCL_SVM_HPP__ +#define __OPENCV_CORE_OPENCL_SVM_HPP__ + +// +// Internal usage only (binary compatibility is not guaranteed) +// +#ifndef __OPENCV_BUILD +#error Internal header file +#endif + +#if defined(HAVE_OPENCL) && defined(HAVE_OPENCL_SVM) +#include "runtime/opencl_core.hpp" +#include "runtime/opencl_svm_20.hpp" +#include "runtime/opencl_svm_hsa_extension.hpp" + +namespace cv { namespace ocl { namespace svm { + +struct SVMCapabilities +{ + enum Value + { + SVM_COARSE_GRAIN_BUFFER = (1 << 0), + SVM_FINE_GRAIN_BUFFER = (1 << 1), + SVM_FINE_GRAIN_SYSTEM = (1 << 2), + SVM_ATOMICS = (1 << 3), + }; + int value_; + + SVMCapabilities(int capabilities = 0) : value_(capabilities) { } + operator int() const { return value_; } + + inline bool isNoSVMSupport() const { return value_ == 0; } + inline bool isSupportCoarseGrainBuffer() const { return (value_ & SVM_COARSE_GRAIN_BUFFER) != 0; } + inline bool isSupportFineGrainBuffer() const { return (value_ & SVM_FINE_GRAIN_BUFFER) != 0; } + inline bool isSupportFineGrainSystem() const { return (value_ & SVM_FINE_GRAIN_SYSTEM) != 0; } + inline bool isSupportAtomics() const { return (value_ & SVM_ATOMICS) != 0; } +}; + +CV_EXPORTS const SVMCapabilities getSVMCapabilitites(const ocl::Context& context); + +struct SVMFunctions +{ + clSVMAllocAMD_fn fn_clSVMAlloc; + clSVMFreeAMD_fn fn_clSVMFree; + clSetKernelArgSVMPointerAMD_fn fn_clSetKernelArgSVMPointer; + //clSetKernelExecInfoAMD_fn fn_clSetKernelExecInfo; + //clEnqueueSVMFreeAMD_fn fn_clEnqueueSVMFree; + clEnqueueSVMMemcpyAMD_fn fn_clEnqueueSVMMemcpy; + clEnqueueSVMMemFillAMD_fn fn_clEnqueueSVMMemFill; + clEnqueueSVMMapAMD_fn fn_clEnqueueSVMMap; + clEnqueueSVMUnmapAMD_fn fn_clEnqueueSVMUnmap; + + inline SVMFunctions() + : fn_clSVMAlloc(NULL), fn_clSVMFree(NULL), + fn_clSetKernelArgSVMPointer(NULL), /*fn_clSetKernelExecInfo(NULL),*/ + /*fn_clEnqueueSVMFree(NULL),*/ fn_clEnqueueSVMMemcpy(NULL), fn_clEnqueueSVMMemFill(NULL), + fn_clEnqueueSVMMap(NULL), fn_clEnqueueSVMUnmap(NULL) + { + // nothing + } + + inline bool isValid() const + { + return fn_clSVMAlloc != NULL && fn_clSVMFree && fn_clSetKernelArgSVMPointer && + /*fn_clSetKernelExecInfo && fn_clEnqueueSVMFree &&*/ fn_clEnqueueSVMMemcpy && + fn_clEnqueueSVMMemFill && fn_clEnqueueSVMMap && fn_clEnqueueSVMUnmap; + } +}; + +// We should guarantee that SVMFunctions lifetime is not less than context's lifetime +CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context); + +CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags); + +}}} //namespace cv::ocl::svm +#endif + +#endif // __OPENCV_CORE_OPENCL_SVM_HPP__ +/* End of file. */ diff --git a/modules/core/include/opencv2/core/opencl/runtime/opencl_core.hpp b/modules/core/include/opencv2/core/opencl/runtime/opencl_core.hpp index b19563cbc2..bd30f813d7 100644 --- a/modules/core/include/opencv2/core/opencl/runtime/opencl_core.hpp +++ b/modules/core/include/opencv2/core/opencl/runtime/opencl_core.hpp @@ -62,6 +62,18 @@ #endif #endif +#ifdef HAVE_OPENCL_SVM +#define clSVMAlloc clSVMAlloc_ +#define clSVMFree clSVMFree_ +#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_ +#define clSetKernelExecInfo clSetKernelExecInfo_ +#define clEnqueueSVMFree clEnqueueSVMFree_ +#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_ +#define clEnqueueSVMMemFill clEnqueueSVMMemFill_ +#define clEnqueueSVMMap clEnqueueSVMMap_ +#define clEnqueueSVMUnmap clEnqueueSVMUnmap_ +#endif + #include "autogenerated/opencl_core.hpp" #endif // HAVE_OPENCL_STATIC diff --git a/modules/core/include/opencv2/core/opencl/runtime/opencl_svm_20.hpp b/modules/core/include/opencv2/core/opencl/runtime/opencl_svm_20.hpp new file mode 100644 index 0000000000..7f0ff91d12 --- /dev/null +++ b/modules/core/include/opencv2/core/opencl/runtime/opencl_svm_20.hpp @@ -0,0 +1,52 @@ +/* See LICENSE file in the root OpenCV directory */ + +#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__ +#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__ + +#if defined(HAVE_OPENCL_SVM) +#include "opencl_core.hpp" + +#include "opencl_svm_definitions.hpp" + +#ifndef HAVE_OPENCL_STATIC + +#undef clSVMAlloc +#define clSVMAlloc clSVMAlloc_pfn +#undef clSVMFree +#define clSVMFree clSVMFree_pfn +#undef clSetKernelArgSVMPointer +#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_pfn +#undef clSetKernelExecInfo +//#define clSetKernelExecInfo clSetKernelExecInfo_pfn +#undef clEnqueueSVMFree +//#define clEnqueueSVMFree clEnqueueSVMFree_pfn +#undef clEnqueueSVMMemcpy +#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_pfn +#undef clEnqueueSVMMemFill +#define clEnqueueSVMMemFill clEnqueueSVMMemFill_pfn +#undef clEnqueueSVMMap +#define clEnqueueSVMMap clEnqueueSVMMap_pfn +#undef clEnqueueSVMUnmap +#define clEnqueueSVMUnmap clEnqueueSVMUnmap_pfn + +extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSVMAlloc)(cl_context context, cl_svm_mem_flags flags, size_t size, unsigned int alignment); +extern CL_RUNTIME_EXPORT void (CL_API_CALL *clSVMFree)(cl_context context, void* svm_pointer); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clSetKernelArgSVMPointer)(cl_kernel kernel, cl_uint arg_index, const void* arg_value); +//extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSetKernelExecInfo)(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void* param_value); +//extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMFree)(cl_command_queue command_queue, cl_uint num_svm_pointers, void* svm_pointers[], +// void (CL_CALLBACK *pfn_free_func)(cl_command_queue queue, cl_uint num_svm_pointers, void* svm_pointers[], void* user_data), void* user_data, +// cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMemcpy)(cl_command_queue command_queue, cl_bool blocking_copy, void* dst_ptr, const void* src_ptr, size_t size, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMemFill)(cl_command_queue command_queue, void* svm_ptr, const void* pattern, size_t pattern_size, size_t size, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMap)(cl_command_queue command_queue, cl_bool blocking_map, cl_map_flags map_flags, void* svm_ptr, size_t size, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMUnmap)(cl_command_queue command_queue, void* svm_ptr, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); + +#endif // HAVE_OPENCL_STATIC + +#endif // HAVE_OPENCL_SVM + +#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__ diff --git a/modules/core/include/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp b/modules/core/include/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp new file mode 100644 index 0000000000..a4fd5fc810 --- /dev/null +++ b/modules/core/include/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp @@ -0,0 +1,42 @@ +/* See LICENSE file in the root OpenCV directory */ + +#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__ +#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__ + +#if defined(HAVE_OPENCL_SVM) +#if defined(CL_VERSION_2_0) + +// OpenCL 2.0 contains SVM definitions + +#else + +typedef cl_bitfield cl_device_svm_capabilities; +typedef cl_bitfield cl_svm_mem_flags; +typedef cl_uint cl_kernel_exec_info; + +// +// TODO Add real values after OpenCL 2.0 release +// + +#ifndef CL_DEVICE_SVM_CAPABILITIES +#define CL_DEVICE_SVM_CAPABILITIES 0x1053 + +#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0) +#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1) +#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2) +#define CL_DEVICE_SVM_ATOMICS (1 << 3) +#endif + +#ifndef CL_MEM_SVM_FINE_GRAIN_BUFFER +#define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10) +#endif + +#ifndef CL_MEM_SVM_ATOMICS +#define CL_MEM_SVM_ATOMICS (1 << 11) +#endif + + +#endif // CL_VERSION_2_0 +#endif // HAVE_OPENCL_SVM + +#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__ diff --git a/modules/core/include/opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp b/modules/core/include/opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp new file mode 100644 index 0000000000..9e50408f06 --- /dev/null +++ b/modules/core/include/opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp @@ -0,0 +1,166 @@ +/* See LICENSE file in the root OpenCV directory */ + +#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__ +#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__ + +#if defined(HAVE_OPENCL_SVM) +#include "opencl_core.hpp" + +#ifndef CL_DEVICE_SVM_CAPABILITIES_AMD +// +// Part of the file is an extract from the cl_ext.h file from AMD APP SDK package. +// Below is the original copyright. +// +/******************************************************************************* + * Copyright (c) 2008-2013 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +/******************************************* + * Shared Virtual Memory (SVM) extension + *******************************************/ +typedef cl_bitfield cl_device_svm_capabilities_amd; +typedef cl_bitfield cl_svm_mem_flags_amd; +typedef cl_uint cl_kernel_exec_info_amd; + +/* cl_device_info */ +#define CL_DEVICE_SVM_CAPABILITIES_AMD 0x1053 +#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT_AMD 0x1054 + +/* cl_device_svm_capabilities_amd */ +#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD (1 << 0) +#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_AMD (1 << 1) +#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_AMD (1 << 2) +#define CL_DEVICE_SVM_ATOMICS_AMD (1 << 3) + +/* cl_svm_mem_flags_amd */ +#define CL_MEM_SVM_FINE_GRAIN_BUFFER_AMD (1 << 10) +#define CL_MEM_SVM_ATOMICS_AMD (1 << 11) + +/* cl_mem_info */ +#define CL_MEM_USES_SVM_POINTER_AMD 0x1109 + +/* cl_kernel_exec_info_amd */ +#define CL_KERNEL_EXEC_INFO_SVM_PTRS_AMD 0x11B6 +#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_AMD 0x11B7 + +/* cl_command_type */ +#define CL_COMMAND_SVM_FREE_AMD 0x1209 +#define CL_COMMAND_SVM_MEMCPY_AMD 0x120A +#define CL_COMMAND_SVM_MEMFILL_AMD 0x120B +#define CL_COMMAND_SVM_MAP_AMD 0x120C +#define CL_COMMAND_SVM_UNMAP_AMD 0x120D + +typedef CL_API_ENTRY void* +(CL_API_CALL * clSVMAllocAMD_fn)( + cl_context /* context */, + cl_svm_mem_flags_amd /* flags */, + size_t /* size */, + unsigned int /* alignment */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY void +(CL_API_CALL * clSVMFreeAMD_fn)( + cl_context /* context */, + void* /* svm_pointer */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueSVMFreeAMD_fn)( + cl_command_queue /* command_queue */, + cl_uint /* num_svm_pointers */, + void** /* svm_pointers */, + void (CL_CALLBACK *)( /*pfn_free_func*/ + cl_command_queue /* queue */, + cl_uint /* num_svm_pointers */, + void** /* svm_pointers */, + void* /* user_data */), + void* /* user_data */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueSVMMemcpyAMD_fn)( + cl_command_queue /* command_queue */, + cl_bool /* blocking_copy */, + void* /* dst_ptr */, + const void* /* src_ptr */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueSVMMemFillAMD_fn)( + cl_command_queue /* command_queue */, + void* /* svm_ptr */, + const void* /* pattern */, + size_t /* pattern_size */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueSVMMapAMD_fn)( + cl_command_queue /* command_queue */, + cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, + void* /* svm_ptr */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueSVMUnmapAMD_fn)( + cl_command_queue /* command_queue */, + void* /* svm_ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clSetKernelArgSVMPointerAMD_fn)( + cl_kernel /* kernel */, + cl_uint /* arg_index */, + const void * /* arg_value */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clSetKernelExecInfoAMD_fn)( + cl_kernel /* kernel */, + cl_kernel_exec_info_amd /* param_name */, + size_t /* param_value_size */, + const void * /* param_value */ +) CL_EXT_SUFFIX__VERSION_1_2; + +#endif + +#endif // HAVE_OPENCL_SVM + +#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__ diff --git a/modules/core/src/matmul.cpp b/modules/core/src/matmul.cpp index 6c8bad2444..feffc8d32f 100644 --- a/modules/core/src/matmul.cpp +++ b/modules/core/src/matmul.cpp @@ -721,6 +721,16 @@ static bool ocl_gemm_amdblas( InputArray matA, InputArray matB, double alpha, return false; UMat A = matA.getUMat(), B = matB.getUMat(), D = matD.getUMat(); + if (!ocl::internal::isCLBuffer(A) || !ocl::internal::isCLBuffer(B) || !ocl::internal::isCLBuffer(D)) + { + return false; + } + if (haveC) + { + UMat C = matC.getUMat(); + if (!ocl::internal::isCLBuffer(C)) + return false; + } if (haveC) ctrans ? transpose(matC, D) : matC.copyTo(D); else diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 3bd9f2c622..e1e9caa837 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -159,8 +159,9 @@ void MatAllocator::copy(UMatData* usrc, UMatData* udst, int dims, const size_t s memcpy(ptrs[1], ptrs[0], planesz); } -BufferPoolController* MatAllocator::getBufferPoolController() const +BufferPoolController* MatAllocator::getBufferPoolController(const char* id) const { + (void)id; static DummyBufferPoolController dummy; return &dummy; } diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index c18d8ba61a..efe3b936d5 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -48,6 +48,8 @@ #define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0 #define CV_OPENCL_SHOW_RUN_ERRORS 0 +#define CV_OPENCL_SHOW_SVM_ERROR_LOG 1 +#define CV_OPENCL_SHOW_SVM_LOG 0 #include "opencv2/core/bufferpool.hpp" #ifndef LOG_BUFFER_POOL @@ -111,6 +113,20 @@ static size_t getConfigurationParameterForSize(const char* name, size_t defaultV CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str())); } +#if CV_OPENCL_SHOW_SVM_LOG +// TODO add timestamp logging +#define CV_OPENCL_SVM_TRACE_P printf("line %d (ocl.cpp): ", __LINE__); printf +#else +#define CV_OPENCL_SVM_TRACE_P(...) +#endif + +#if CV_OPENCL_SHOW_SVM_ERROR_LOG +// TODO add timestamp logging +#define CV_OPENCL_SVM_TRACE_ERROR_P printf("Error on line %d (ocl.cpp): ", __LINE__); printf +#else +#define CV_OPENCL_SVM_TRACE_ERROR_P(...) +#endif + #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp" #include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp" @@ -920,6 +936,7 @@ OCL_FUNC(cl_int, clGetSupportedImageFormats, cl_uint * num_image_formats), (context, flags, image_type, num_entries, image_formats, num_image_formats)) + /* OCL_FUNC(cl_int, clGetMemObjectInfo, (cl_mem memobj, @@ -1342,6 +1359,12 @@ static bool isRaiseError() #define CV_OclDbgAssert(expr) do { if (isRaiseError()) { CV_Assert(expr); } else { (void)(expr); } } while ((void)0, 0) #endif +#ifdef HAVE_OPENCL_SVM +#include "opencv2/core/opencl/runtime/opencl_svm_20.hpp" +#include "opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp" +#include "opencv2/core/opencl/opencl_svm.hpp" +#endif + namespace cv { namespace ocl { struct UMat2D @@ -1627,6 +1650,15 @@ bool haveAmdFft() #endif +bool haveSVM() +{ +#ifdef HAVE_OPENCL_SVM + return true; +#else + return false; +#endif +} + void finish() { Queue::getDefault().finish(); @@ -2357,12 +2389,86 @@ not_found: } #endif +#ifdef HAVE_OPENCL_SVM +namespace svm { + +enum AllocatorFlags { // don't use first 16 bits + OPENCL_SVM_COARSE_GRAIN_BUFFER = 1 << 16, // clSVMAlloc + SVM map/unmap + OPENCL_SVM_FINE_GRAIN_BUFFER = 2 << 16, // clSVMAlloc + OPENCL_SVM_FINE_GRAIN_SYSTEM = 3 << 16, // direct access + OPENCL_SVM_BUFFER_MASK = 3 << 16, + OPENCL_SVM_BUFFER_MAP = 4 << 16 +}; + +static bool checkForceSVMUmatUsage() +{ + static bool initialized = false; + static bool force = false; + if (!initialized) + { + force = getBoolParameter("OPENCV_OPENCL_SVM_FORCE_UMAT_USAGE", false); + initialized = true; + } + return force; +} +static bool checkDisableSVMUMatUsage() +{ + static bool initialized = false; + static bool force = false; + if (!initialized) + { + force = getBoolParameter("OPENCV_OPENCL_SVM_DISABLE_UMAT_USAGE", false); + initialized = true; + } + return force; +} +static bool checkDisableSVM() +{ + static bool initialized = false; + static bool force = false; + if (!initialized) + { + force = getBoolParameter("OPENCV_OPENCL_SVM_DISABLE", false); + initialized = true; + } + return force; +} +// see SVMCapabilities +static unsigned int getSVMCapabilitiesMask() +{ + static bool initialized = false; + static unsigned int mask = 0; + if (!initialized) + { + const char* envValue = getenv("OPENCV_OPENCL_SVM_CAPABILITIES_MASK"); + if (envValue == NULL) + { + return ~0U; // all bits 1 + } + mask = atoi(envValue); + initialized = true; + } + return mask; +} +} // namespace +#endif + struct Context::Impl { - Impl() + static Context::Impl* get(Context& context) { return context.p; } + + void __init() { refcount = 1; handle = 0; +#ifdef HAVE_OPENCL_SVM + svmInitialized = false; +#endif + } + + Impl() + { + __init(); } void setDefault() @@ -2401,8 +2507,7 @@ struct Context::Impl Impl(int dtype0) { - refcount = 1; - handle = 0; + __init(); cl_int retval = 0; cl_platform_id pl = (cl_platform_id)Platform::getDefault().ptr(); @@ -2419,7 +2524,7 @@ struct Context::Impl AutoBuffer dlistbuf(nd0*2+1); cl_device_id* dlist = (cl_device_id*)(void**)dlistbuf; cl_device_id* dlist_new = dlist + nd0; - CV_OclDbgAssert(clGetDeviceIDs( pl, dtype, nd0, dlist, &nd0 ) == CL_SUCCESS); + CV_OclDbgAssert(clGetDeviceIDs( pl, dtype, nd0, dlist, &nd0 ) == CL_SUCCESS); String name0; for(i = 0; i < nd0; i++) @@ -2496,6 +2601,144 @@ struct Context::Impl }; typedef std::map phash_t; phash_t phash; + +#ifdef HAVE_OPENCL_SVM + bool svmInitialized; + bool svmAvailable; + bool svmEnabled; + svm::SVMCapabilities svmCapabilities; + svm::SVMFunctions svmFunctions; + + void svmInit() + { + CV_Assert(handle != NULL); + const Device& device = devices[0]; + cl_device_svm_capabilities deviceCaps = 0; + CV_Assert(((void)0, CL_DEVICE_SVM_CAPABILITIES == CL_DEVICE_SVM_CAPABILITIES_AMD)); // Check assumption + cl_int status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_SVM_CAPABILITIES, sizeof(deviceCaps), &deviceCaps, NULL); + if (status != CL_SUCCESS) + { + CV_OPENCL_SVM_TRACE_ERROR_P("CL_DEVICE_SVM_CAPABILITIES via clGetDeviceInfo failed: %d\n", status); + goto noSVM; + } + CV_OPENCL_SVM_TRACE_P("CL_DEVICE_SVM_CAPABILITIES returned: 0x%x\n", (int)deviceCaps); + CV_Assert(((void)0, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER == CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD)); // Check assumption + svmCapabilities.value_ = + ((deviceCaps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_COARSE_GRAIN_BUFFER : 0) | + ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_FINE_GRAIN_BUFFER : 0) | + ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) ? svm::SVMCapabilities::SVM_FINE_GRAIN_SYSTEM : 0) | + ((deviceCaps & CL_DEVICE_SVM_ATOMICS) ? svm::SVMCapabilities::SVM_ATOMICS : 0); + svmCapabilities.value_ &= svm::getSVMCapabilitiesMask(); + if (svmCapabilities.value_ == 0) + { + CV_OPENCL_SVM_TRACE_ERROR_P("svmCapabilities is empty\n"); + goto noSVM; + } + try + { + // Try OpenCL 2.0 + CV_OPENCL_SVM_TRACE_P("Try SVM from OpenCL 2.0 ...\n"); + void* ptr = clSVMAlloc(handle, CL_MEM_READ_WRITE, 100, 0); + if (!ptr) + { + CV_OPENCL_SVM_TRACE_ERROR_P("clSVMAlloc returned NULL...\n"); + CV_ErrorNoReturn(Error::StsBadArg, "clSVMAlloc returned NULL"); + } + try + { + bool error = false; + cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); + if (CL_SUCCESS != clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE, ptr, 100, 0, NULL, NULL)) + { + CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMMap failed...\n"); + CV_ErrorNoReturn(Error::StsBadArg, "clEnqueueSVMMap FAILED"); + } + clFinish(q); + try + { + ((int*)ptr)[0] = 100; + } + catch (...) + { + CV_OPENCL_SVM_TRACE_ERROR_P("SVM buffer access test FAILED\n"); + error = true; + } + if (CL_SUCCESS != clEnqueueSVMUnmap(q, ptr, 0, NULL, NULL)) + { + CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMUnmap failed...\n"); + CV_ErrorNoReturn(Error::StsBadArg, "clEnqueueSVMUnmap FAILED"); + } + clFinish(q); + if (error) + { + CV_ErrorNoReturn(Error::StsBadArg, "OpenCL SVM buffer access test was FAILED"); + } + } + catch (...) + { + CV_OPENCL_SVM_TRACE_ERROR_P("OpenCL SVM buffer access test was FAILED\n"); + clSVMFree(handle, ptr); + throw; + } + clSVMFree(handle, ptr); + svmFunctions.fn_clSVMAlloc = clSVMAlloc; + svmFunctions.fn_clSVMFree = clSVMFree; + svmFunctions.fn_clSetKernelArgSVMPointer = clSetKernelArgSVMPointer; + //svmFunctions.fn_clSetKernelExecInfo = clSetKernelExecInfo; + //svmFunctions.fn_clEnqueueSVMFree = clEnqueueSVMFree; + svmFunctions.fn_clEnqueueSVMMemcpy = clEnqueueSVMMemcpy; + svmFunctions.fn_clEnqueueSVMMemFill = clEnqueueSVMMemFill; + svmFunctions.fn_clEnqueueSVMMap = clEnqueueSVMMap; + svmFunctions.fn_clEnqueueSVMUnmap = clEnqueueSVMUnmap; + } + catch (...) + { + CV_OPENCL_SVM_TRACE_P("clSVMAlloc failed, trying HSA extension...\n"); + try + { + // Try HSA extension + String extensions = device.extensions(); + if (extensions.find("cl_amd_svm") == String::npos) + { + CV_OPENCL_SVM_TRACE_P("Device extension doesn't have cl_amd_svm: %s\n", extensions.c_str()); + goto noSVM; + } + cl_platform_id p = NULL; + status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &p, NULL); + CV_Assert(status == CL_SUCCESS); + svmFunctions.fn_clSVMAlloc = (clSVMAllocAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMAllocAMD"); + svmFunctions.fn_clSVMFree = (clSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMFreeAMD"); + svmFunctions.fn_clSetKernelArgSVMPointer = (clSetKernelArgSVMPointerAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelArgSVMPointerAMD"); + //svmFunctions.fn_clSetKernelExecInfo = (clSetKernelExecInfoAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelExecInfoAMD"); + //svmFunctions.fn_clEnqueueSVMFree = (clEnqueueSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMFreeAMD"); + svmFunctions.fn_clEnqueueSVMMemcpy = (clEnqueueSVMMemcpyAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemcpyAMD"); + svmFunctions.fn_clEnqueueSVMMemFill = (clEnqueueSVMMemFillAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemFillAMD"); + svmFunctions.fn_clEnqueueSVMMap = (clEnqueueSVMMapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMapAMD"); + svmFunctions.fn_clEnqueueSVMUnmap = (clEnqueueSVMUnmapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMUnmapAMD"); + CV_Assert(svmFunctions.isValid()); + } + catch (...) + { + CV_OPENCL_SVM_TRACE_P("Something is totally wrong\n"); + goto noSVM; + } + } + + svmAvailable = true; + svmEnabled = !svm::checkDisableSVM(); + svmInitialized = true; + CV_OPENCL_SVM_TRACE_P("OpenCV OpenCL SVM support initialized\n"); + return; + noSVM: + CV_OPENCL_SVM_TRACE_P("OpenCL SVM is not detected\n"); + svmAvailable = false; + svmEnabled = false; + svmCapabilities.value_ = 0; + svmInitialized = true; + svmFunctions.fn_clSVMAlloc = NULL; + return; + } +#endif }; @@ -2610,6 +2853,71 @@ Program Context::getProg(const ProgramSource& prog, return p ? p->getProg(prog, buildopts, errmsg) : Program(); } + + +#ifdef HAVE_OPENCL_SVM +bool Context::useSVM() const +{ + Context::Impl* i = p; + CV_Assert(i); + if (!i->svmInitialized) + i->svmInit(); + return i->svmEnabled; +} +void Context::setUseSVM(bool enabled) +{ + Context::Impl* i = p; + CV_Assert(i); + if (!i->svmInitialized) + i->svmInit(); + if (enabled && !i->svmAvailable) + { + CV_ErrorNoReturn(Error::StsError, "OpenCL Shared Virtual Memory (SVM) is not supported by OpenCL device"); + } + i->svmEnabled = enabled; +} +#else +bool Context::useSVM() const { return false; } +void Context::setUseSVM(bool enabled) { CV_Assert(!enabled); } +#endif + +#ifdef HAVE_OPENCL_SVM +namespace svm { + +const SVMCapabilities getSVMCapabilitites(const ocl::Context& context) +{ + Context::Impl* i = context.p; + CV_Assert(i); + if (!i->svmInitialized) + i->svmInit(); + return i->svmCapabilities; +} + +CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context) +{ + Context::Impl* i = context.p; + CV_Assert(i); + CV_Assert(i->svmInitialized); // getSVMCapabilitites() must be called first + CV_Assert(i->svmFunctions.fn_clSVMAlloc != NULL); + return &i->svmFunctions; +} + +CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags) +{ + if (checkForceSVMUmatUsage()) + return true; + if (checkDisableSVMUMatUsage()) + return false; + if ((usageFlags & USAGE_ALLOCATE_SHARED_MEMORY) != 0) + return true; + return false; // don't use SVM by default +} + +} // namespace cv::ocl::svm +#endif // HAVE_OPENCL_SVM + + + void initializeContextFromHandle(Context& ctx, void* platform, void* _context, void* _device) { cl_context context = (cl_context)_context; @@ -2979,12 +3287,33 @@ int Kernel::set(int i, const KernelArg& arg) return -1; } +#ifdef HAVE_OPENCL_SVM + if ((arg.m->u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) + { + const Context& ctx = Context::getDefault(); + const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); + uchar*& svmDataPtr = (uchar*&)arg.m->u->handle; + CV_OPENCL_SVM_TRACE_P("clSetKernelArgSVMPointer: %p\n", svmDataPtr); +#if 1 // TODO + cl_int status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, svmDataPtr); +#else + cl_int status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, &svmDataPtr); +#endif + CV_Assert(status == CL_SUCCESS); + } + else +#endif + { + CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h) == CL_SUCCESS); + } + if (ptronly) - CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i++, sizeof(h), &h) == CL_SUCCESS); + { + i++; + } else if( arg.m->dims <= 2 ) { UMat2D u2d(*arg.m); - CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h) == CL_SUCCESS); CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step) == CL_SUCCESS); CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset) == CL_SUCCESS); i += 3; @@ -3000,7 +3329,6 @@ int Kernel::set(int i, const KernelArg& arg) else { UMat3D u3d(*arg.m); - CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h) == CL_SUCCESS); CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.slicestep), &u3d.slicestep) == CL_SUCCESS); CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.step), &u3d.step) == CL_SUCCESS); CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(u3d.offset), &u3d.offset) == CL_SUCCESS); @@ -3433,39 +3761,55 @@ ProgramSource::hash_t ProgramSource::hash() const //////////////////////////////////////////// OpenCLAllocator ////////////////////////////////////////////////// +template class OpenCLBufferPool { protected: ~OpenCLBufferPool() { } public: - virtual cl_mem allocate(size_t size, CV_OUT size_t& capacity) = 0; - virtual void release(cl_mem handle, size_t capacity) = 0; + virtual T allocate(size_t size) = 0; + virtual void release(T buffer) = 0; }; -class OpenCLBufferPoolImpl : public BufferPoolController, public OpenCLBufferPool +template +class OpenCLBufferPoolBaseImpl : public BufferPoolController, public OpenCLBufferPool { -public: - struct BufferEntry - { - cl_mem clBuffer_; - size_t capacity_; - }; +private: + inline Derived& derived() { return *static_cast(this); } protected: Mutex mutex_; size_t currentReservedSize; size_t maxReservedSize; - std::list reservedEntries_; // LRU order + std::list allocatedEntries_; // Allocated and used entries + std::list reservedEntries_; // LRU order. Allocated, but not used entries + + // synchronized + bool _findAndRemoveEntryFromAllocatedList(CV_OUT BufferEntry& entry, T buffer) + { + typename std::list::iterator i = allocatedEntries_.begin(); + for (; i != allocatedEntries_.end(); ++i) + { + BufferEntry& e = *i; + if (e.clBuffer_ == buffer) + { + entry = e; + allocatedEntries_.erase(i); + return true; + } + } + return false; + } // synchronized bool _findAndRemoveEntryFromReservedList(CV_OUT BufferEntry& entry, const size_t size) { if (reservedEntries_.empty()) return false; - std::list::iterator i = reservedEntries_.begin(); - std::list::iterator result_pos = reservedEntries_.end(); - BufferEntry result = {NULL, 0}; + typename std::list::iterator i = reservedEntries_.begin(); + typename std::list::iterator result_pos = reservedEntries_.end(); + BufferEntry result; size_t minDiff = (size_t)(-1); for (; i != reservedEntries_.end(); ++i) { @@ -3489,6 +3833,7 @@ protected: reservedEntries_.erase(result_pos); entry = result; currentReservedSize -= entry.capacity_; + allocatedEntries_.push_back(entry); return true; } return false; @@ -3503,7 +3848,7 @@ protected: const BufferEntry& entry = reservedEntries_.back(); CV_DbgAssert(currentReservedSize >= entry.capacity_); currentReservedSize -= entry.capacity_; - _releaseBufferEntry(entry); + derived()._releaseBufferEntry(entry); reservedEntries_.pop_back(); } } @@ -3523,72 +3868,45 @@ protected: return 1024*1024; } - void _allocateBufferEntry(BufferEntry& entry, size_t size) - { - CV_DbgAssert(entry.clBuffer_ == NULL); - entry.capacity_ = alignSize(size, (int)_allocationGranularity(size)); - Context& ctx = Context::getDefault(); - cl_int retval = CL_SUCCESS; - entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE, entry.capacity_, 0, &retval); - CV_Assert(retval == CL_SUCCESS); - CV_Assert(entry.clBuffer_ != NULL); - if(retval == CL_SUCCESS) - { - CV_IMPL_ADD(CV_IMPL_OCL); - } - LOG_BUFFER_POOL("OpenCL allocate %lld (0x%llx) bytes: %p\n", - (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_); - } - - void _releaseBufferEntry(const BufferEntry& entry) - { - CV_Assert(entry.capacity_ != 0); - CV_Assert(entry.clBuffer_ != NULL); - LOG_BUFFER_POOL("OpenCL release buffer: %p, %lld (0x%llx) bytes\n", - entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_); - clReleaseMemObject(entry.clBuffer_); - } public: - OpenCLBufferPoolImpl() - : currentReservedSize(0), maxReservedSize(0) + OpenCLBufferPoolBaseImpl() + : currentReservedSize(0), + maxReservedSize(0) { - int poolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0; - maxReservedSize = getConfigurationParameterForSize("OPENCV_OPENCL_BUFFERPOOL_LIMIT", poolSize); + // nothing } - virtual ~OpenCLBufferPoolImpl() + virtual ~OpenCLBufferPoolBaseImpl() { freeAllReservedBuffers(); CV_Assert(reservedEntries_.empty()); } public: - virtual cl_mem allocate(size_t size, CV_OUT size_t& capacity) + virtual T allocate(size_t size) { - BufferEntry entry = {NULL, 0}; - if (maxReservedSize > 0) + AutoLock locker(mutex_); + BufferEntry entry; + if (maxReservedSize > 0 && _findAndRemoveEntryFromReservedList(entry, size)) { - AutoLock locker(mutex_); - if (_findAndRemoveEntryFromReservedList(entry, size)) - { - CV_DbgAssert(size <= entry.capacity_); - LOG_BUFFER_POOL("Reuse reserved buffer: %p\n", entry.clBuffer_); - capacity = entry.capacity_; - return entry.clBuffer_; - } + CV_DbgAssert(size <= entry.capacity_); + LOG_BUFFER_POOL("Reuse reserved buffer: %p\n", entry.clBuffer_); + } + else + { + derived()._allocateBufferEntry(entry, size); } - _allocateBufferEntry(entry, size); - capacity = entry.capacity_; return entry.clBuffer_; } - virtual void release(cl_mem handle, size_t capacity) + virtual void release(T buffer) { - BufferEntry entry = {handle, capacity}; + AutoLock locker(mutex_); + BufferEntry entry; + CV_Assert(_findAndRemoveEntryFromAllocatedList(entry, buffer)); if (maxReservedSize == 0 || entry.capacity_ > maxReservedSize / 8) { - _releaseBufferEntry(entry); + derived()._releaseBufferEntry(entry); } else { - AutoLock locker(mutex_); reservedEntries_.push_front(entry); currentReservedSize += entry.capacity_; _checkSizeOfReservedEntries(); @@ -3604,7 +3922,7 @@ public: maxReservedSize = size; if (maxReservedSize < oldMaxReservedSize) { - std::list::iterator i = reservedEntries_.begin(); + typename std::list::iterator i = reservedEntries_.begin(); for (; i != reservedEntries_.end();) { const BufferEntry& entry = *i; @@ -3612,7 +3930,7 @@ public: { CV_DbgAssert(currentReservedSize >= entry.capacity_); currentReservedSize -= entry.capacity_; - _releaseBufferEntry(entry); + derived()._releaseBufferEntry(entry); i = reservedEntries_.erase(i); continue; } @@ -3624,16 +3942,123 @@ public: virtual void freeAllReservedBuffers() { AutoLock locker(mutex_); - std::list::const_iterator i = reservedEntries_.begin(); + typename std::list::const_iterator i = reservedEntries_.begin(); for (; i != reservedEntries_.end(); ++i) { const BufferEntry& entry = *i; - _releaseBufferEntry(entry); + derived()._releaseBufferEntry(entry); } reservedEntries_.clear(); } }; +struct CLBufferEntry +{ + cl_mem clBuffer_; + size_t capacity_; + CLBufferEntry() : clBuffer_((cl_mem)NULL), capacity_(0) { } +}; + +class OpenCLBufferPoolImpl : public OpenCLBufferPoolBaseImpl +{ +public: + typedef struct CLBufferEntry BufferEntry; +protected: + int createFlags_; +public: + OpenCLBufferPoolImpl(int createFlags = 0) + : createFlags_(createFlags) + { + } + + void _allocateBufferEntry(BufferEntry& entry, size_t size) + { + CV_DbgAssert(entry.clBuffer_ == NULL); + entry.capacity_ = alignSize(size, (int)_allocationGranularity(size)); + Context& ctx = Context::getDefault(); + cl_int retval = CL_SUCCESS; + entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE|createFlags_, entry.capacity_, 0, &retval); + CV_Assert(retval == CL_SUCCESS); + CV_Assert(entry.clBuffer_ != NULL); + if(retval == CL_SUCCESS) + { + CV_IMPL_ADD(CV_IMPL_OCL); + } + LOG_BUFFER_POOL("OpenCL allocate %lld (0x%llx) bytes: %p\n", + (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_); + allocatedEntries_.push_back(entry); + } + + void _releaseBufferEntry(const BufferEntry& entry) + { + CV_Assert(entry.capacity_ != 0); + CV_Assert(entry.clBuffer_ != NULL); + LOG_BUFFER_POOL("OpenCL release buffer: %p, %lld (0x%llx) bytes\n", + entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_); + clReleaseMemObject(entry.clBuffer_); + } +}; + +#ifdef HAVE_OPENCL_SVM +struct CLSVMBufferEntry +{ + void* clBuffer_; + size_t capacity_; + CLSVMBufferEntry() : clBuffer_(NULL), capacity_(0) { } +}; +class OpenCLSVMBufferPoolImpl : public OpenCLBufferPoolBaseImpl +{ +public: + typedef struct CLSVMBufferEntry BufferEntry; +public: + OpenCLSVMBufferPoolImpl() + { + } + + void _allocateBufferEntry(BufferEntry& entry, size_t size) + { + CV_DbgAssert(entry.clBuffer_ == NULL); + entry.capacity_ = alignSize(size, (int)_allocationGranularity(size)); + + Context& ctx = Context::getDefault(); + const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx); + bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer(); + cl_svm_mem_flags memFlags = CL_MEM_READ_WRITE | + (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0); + + const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); + CV_DbgAssert(svmFns->isValid()); + + CV_OPENCL_SVM_TRACE_P("clSVMAlloc: %d\n", (int)entry.capacity_); + void *buf = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, entry.capacity_, 0); + CV_Assert(buf); + + entry.clBuffer_ = buf; + { + CV_IMPL_ADD(CV_IMPL_OCL); + } + LOG_BUFFER_POOL("OpenCL SVM allocate %lld (0x%llx) bytes: %p\n", + (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_); + allocatedEntries_.push_back(entry); + } + + void _releaseBufferEntry(const BufferEntry& entry) + { + CV_Assert(entry.capacity_ != 0); + CV_Assert(entry.clBuffer_ != NULL); + LOG_BUFFER_POOL("OpenCL release SVM buffer: %p, %lld (0x%llx) bytes\n", + entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_); + Context& ctx = Context::getDefault(); + const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); + CV_DbgAssert(svmFns->isValid()); + CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", entry.clBuffer_); + svmFns->fn_clSVMFree((cl_context)ctx.ptr(), entry.clBuffer_); + } +}; +#endif + + + #if defined _MSC_VER #pragma warning(disable:4127) // conditional expression is constant #endif @@ -3697,12 +4122,37 @@ private: class OpenCLAllocator : public MatAllocator { mutable OpenCLBufferPoolImpl bufferPool; + mutable OpenCLBufferPoolImpl bufferPoolHostPtr; +#ifdef HAVE_OPENCL_SVM + mutable OpenCLSVMBufferPoolImpl bufferPoolSVM; +#endif + enum AllocatorFlags { - ALLOCATOR_FLAGS_BUFFER_POOL_USED = 1 << 0 + ALLOCATOR_FLAGS_BUFFER_POOL_USED = 1 << 0, + ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED = 1 << 1 +#ifdef HAVE_OPENCL_SVM + ,ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED = 1 << 2 +#endif }; public: - OpenCLAllocator() { matStdAllocator = Mat::getStdAllocator(); } + OpenCLAllocator() + : bufferPool(0), + bufferPoolHostPtr(CL_MEM_ALLOC_HOST_PTR) + { + size_t defaultPoolSize, poolSize; + defaultPoolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0; + poolSize = getConfigurationParameterForSize("OPENCV_OPENCL_BUFFERPOOL_LIMIT", defaultPoolSize); + bufferPool.setMaxReservedSize(poolSize); + poolSize = getConfigurationParameterForSize("OPENCV_OPENCL_HOST_PTR_BUFFERPOOL_LIMIT", defaultPoolSize); + bufferPoolHostPtr.setMaxReservedSize(poolSize); +#ifdef HAVE_OPENCL_SVM + poolSize = getConfigurationParameterForSize("OPENCV_OPENCL_SVM_BUFFERPOOL_LIMIT", defaultPoolSize); + bufferPoolSVM.setMaxReservedSize(poolSize); +#endif + + matStdAllocator = Mat::getStdAllocator(); + } UMatData* defaultAllocate(int dims, const int* sizes, int type, void* data, size_t* step, int flags, UMatUsageFlags usageFlags) const @@ -3739,33 +4189,47 @@ public: } Context& ctx = Context::getDefault(); + int createFlags = 0, flags0 = 0; getBestFlags(ctx, flags, usageFlags, createFlags, flags0); - size_t capacity = 0; void* handle = NULL; int allocatorFlags = 0; + +#ifdef HAVE_OPENCL_SVM + const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx); + if (ctx.useSVM() && svm::useSVM(usageFlags) && !svmCaps.isNoSVMSupport()) + { + allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED; + handle = bufferPoolSVM.allocate(total); + + // this property is constant, so single buffer pool can be used here + bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer(); + allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER; + } + else +#endif if (createFlags == 0) { - handle = bufferPool.allocate(total, capacity); - if (!handle) - return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags); allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_USED; + handle = bufferPool.allocate(total); + } + else if (createFlags == CL_MEM_ALLOC_HOST_PTR) + { + allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED; + handle = bufferPoolHostPtr.allocate(total); } else { - capacity = total; - cl_int retval = 0; - handle = clCreateBuffer((cl_context)ctx.ptr(), - CL_MEM_READ_WRITE|createFlags, total, 0, &retval); - if( !handle || retval != CL_SUCCESS ) - return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags); - CV_IMPL_ADD(CV_IMPL_OCL) + CV_Assert(handle != NULL); // Unsupported, throw } + + if (!handle) + return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags); + UMatData* u = new UMatData(this); u->data = 0; u->size = total; - u->capacity = capacity; u->handle = handle; u->flags = flags0; u->allocatorFlags_ = allocatorFlags; @@ -3788,22 +4252,81 @@ public: getBestFlags(ctx, accessFlags, usageFlags, createFlags, flags0); cl_context ctx_handle = (cl_context)ctx.ptr(); - cl_int retval = 0; - int tempUMatFlags = UMatData::TEMP_UMAT; - u->handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|CL_MEM_READ_WRITE, - u->size, u->origdata, &retval); - if((!u->handle || retval != CL_SUCCESS) && !(accessFlags & ACCESS_FAST)) + int allocatorFlags = 0; + int tempUMatFlags = 0; + void* handle = NULL; + cl_int retval = CL_SUCCESS; + +#ifdef HAVE_OPENCL_SVM + svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx); + bool useSVM = ctx.useSVM() && svm::useSVM(usageFlags); + if (useSVM && svmCaps.isSupportFineGrainSystem()) { - u->handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags, - u->size, u->origdata, &retval); - tempUMatFlags = UMatData::TEMP_COPIED_UMAT; + allocatorFlags = svm::OPENCL_SVM_FINE_GRAIN_SYSTEM; + tempUMatFlags = UMatData::TEMP_UMAT; + handle = u->origdata; + CV_OPENCL_SVM_TRACE_P("Use fine grain system: %d (%p)\n", (int)u->size, handle); + } + else if (useSVM && (svmCaps.isSupportFineGrainBuffer() || svmCaps.isSupportCoarseGrainBuffer())) + { + if (!(accessFlags & ACCESS_FAST)) // memcpy used + { + bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer(); + cl_svm_mem_flags memFlags = createFlags | + (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0); + + const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); + CV_DbgAssert(svmFns->isValid()); + + CV_OPENCL_SVM_TRACE_P("clSVMAlloc + copy: %d\n", (int)u->size); + handle = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, u->size, 0); + CV_Assert(handle); + + cl_command_queue q = NULL; + if (!isFineGrainBuffer) + { + q = (cl_command_queue)Queue::getDefault().ptr(); + CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", handle, (int)u->size); + cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE, + handle, u->size, + 0, NULL, NULL); + CV_Assert(status == CL_SUCCESS); + + } + memcpy(handle, u->origdata, u->size); + if (!isFineGrainBuffer) + { + CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", handle); + cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, handle, 0, NULL, NULL); + CV_Assert(status == CL_SUCCESS); + } + + tempUMatFlags = UMatData::TEMP_UMAT | UMatData::TEMP_COPIED_UMAT; + allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER + : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER; + } + } + else +#endif + { + tempUMatFlags = UMatData::TEMP_UMAT; + handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags, + u->size, u->origdata, &retval); + if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST)) + { + handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags, + u->size, u->origdata, &retval); + tempUMatFlags |= UMatData::TEMP_COPIED_UMAT; + } } - if(!u->handle || retval != CL_SUCCESS) + if(!handle || retval != CL_SUCCESS) return false; + u->handle = handle; u->prevAllocator = u->currAllocator; u->currAllocator = this; u->flags |= tempUMatFlags; + u->allocatorFlags_ = allocatorFlags; } if(accessFlags & ACCESS_WRITE) u->markHostCopyObsolete(true); @@ -3848,34 +4371,93 @@ public: CV_Assert(u->urefcount >= 0); CV_Assert(u->refcount >= 0); - // TODO: !!! when we add Shared Virtual Memory Support, - // this function (as well as the others) should be corrected CV_Assert(u->handle != 0 && u->urefcount == 0); if(u->tempUMat()) { // UMatDataAutoLock lock(u); + if( u->hostCopyObsolete() && u->refcount > 0 ) { - cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); - if( u->tempCopiedUMat() ) +#ifdef HAVE_OPENCL_SVM + if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) { - AlignedDataPtr alignedPtr(u->origdata, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT); - CV_OclDbgAssert(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, - u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS); + Context& ctx = Context::getDefault(); + const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); + CV_DbgAssert(svmFns->isValid()); + + if( u->tempCopiedUMat() ) + { + CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER || + (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER); + bool isFineGrainBuffer = (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER; + cl_command_queue q = NULL; + if (!isFineGrainBuffer) + { + CV_DbgAssert(((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0)); + q = (cl_command_queue)Queue::getDefault().ptr(); + CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size); + cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ, + u->handle, u->size, + 0, NULL, NULL); + CV_Assert(status == CL_SUCCESS); + } + clFinish(q); + memcpy(u->origdata, u->handle, u->size); + if (!isFineGrainBuffer) + { + CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle); + cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL); + CV_Assert(status == CL_SUCCESS); + } + } + else + { + CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM); + // nothing + } } else +#endif + { + cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); + if( u->tempCopiedUMat() ) + { + AlignedDataPtr alignedPtr(u->origdata, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT); + CV_OclDbgAssert(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, + u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS); + } + else + { + // TODO Is it really needed for clCreateBuffer with CL_MEM_USE_HOST_PTR? + cl_int retval = 0; + void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE, + (CL_MAP_READ | CL_MAP_WRITE), + 0, u->size, 0, 0, 0, &retval); + CV_OclDbgAssert(retval == CL_SUCCESS); + CV_OclDbgAssert(clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0) == CL_SUCCESS); + CV_OclDbgAssert(clFinish(q) == CL_SUCCESS); + } + } + u->markHostCopyObsolete(false); + } +#ifdef HAVE_OPENCL_SVM + if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) + { + if( u->tempCopiedUMat() ) { - cl_int retval = 0; - void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE, - (CL_MAP_READ | CL_MAP_WRITE), - 0, u->size, 0, 0, 0, &retval); - CV_OclDbgAssert(retval == CL_SUCCESS); - CV_OclDbgAssert(clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0) == CL_SUCCESS); - CV_OclDbgAssert(clFinish(q) == CL_SUCCESS); + Context& ctx = Context::getDefault(); + const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); + CV_DbgAssert(svmFns->isValid()); + + CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", u->handle); + svmFns->fn_clSVMFree((cl_context)ctx.ptr(), u->handle); } } - u->markHostCopyObsolete(false); - clReleaseMemObject((cl_mem)u->handle); + else +#endif + { + clReleaseMemObject((cl_mem)u->handle); + } u->handle = 0; u->currAllocator = u->prevAllocator; if(u->data && u->copyOnMap() && !(u->flags & UMatData::USER_ALLOCATED)) @@ -3894,14 +4476,42 @@ public: } if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_USED) { - bufferPool.release((cl_mem)u->handle, u->capacity); + bufferPool.release((cl_mem)u->handle); + } + else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED) + { + bufferPoolHostPtr.release((cl_mem)u->handle); + } +#ifdef HAVE_OPENCL_SVM + else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED) + { + if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM) + { + //nothing + } + else if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER || + (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER) + { + Context& ctx = Context::getDefault(); + const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); + CV_DbgAssert(svmFns->isValid()); + cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); + + if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0) + { + CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle); + cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL); + CV_Assert(status == CL_SUCCESS); + } + } + bufferPoolSVM.release((void*)u->handle); } +#endif else { clReleaseMemObject((cl_mem)u->handle); } u->handle = 0; - u->capacity = 0; delete u; } } @@ -3925,13 +4535,41 @@ public: { if( !u->copyOnMap() ) { + // TODO + // because there can be other map requests for the same UMat with different access flags, + // we use the universal (read-write) access mode. +#ifdef HAVE_OPENCL_SVM + if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) + { + if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER) + { + Context& ctx = Context::getDefault(); + const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); + CV_DbgAssert(svmFns->isValid()); + + if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0) + { + CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size); + cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, + u->handle, u->size, + 0, NULL, NULL); + CV_Assert(status == CL_SUCCESS); + u->allocatorFlags_ |= svm::OPENCL_SVM_BUFFER_MAP; + } + } + clFinish(q); + u->data = (uchar*)u->handle; + u->markHostCopyObsolete(false); + u->markDeviceMemMapped(true); + return; + } +#endif if (u->data) // FIXIT Workaround for UMat synchronization issue { //CV_Assert(u->hostCopyObsolete() == false); return; } - // because there can be other map requests for the same UMat with different access flags, - // we use the universal (read-write) access mode. + cl_int retval = 0; u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE, (CL_MAP_READ | CL_MAP_WRITE), @@ -3943,6 +4581,7 @@ public: return; } + // TODO Is it really a good idea and was it tested well? // if map failed, switch to copy-on-map mode for the particular buffer u->flags |= UMatData::COPY_ON_MAP; } @@ -3957,6 +4596,9 @@ public: if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() ) { AlignedDataPtr alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT); +#ifdef HAVE_OPENCL_SVM + CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0); +#endif CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS ); u->markHostCopyObsolete(false); @@ -3983,6 +4625,31 @@ public: { CV_Assert(u->data != NULL); u->markDeviceMemMapped(false); +#ifdef HAVE_OPENCL_SVM + if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) + { + if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER) + { + Context& ctx = Context::getDefault(); + const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); + CV_DbgAssert(svmFns->isValid()); + + CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0); + { + CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle); + cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, + 0, NULL, NULL); + CV_Assert(status == CL_SUCCESS); + clFinish(q); + u->allocatorFlags_ &= ~svm::OPENCL_SVM_BUFFER_MAP; + } + } + u->data = 0; + u->markDeviceCopyObsolete(false); + u->markHostCopyObsolete(false); + return; + } +#endif CV_Assert( (retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, u->data, 0, 0, 0)) == CL_SUCCESS ); if (Device::getDefault().isAMD()) @@ -3995,6 +4662,9 @@ public: else if( u->copyOnMap() && u->deviceCopyObsolete() ) { AlignedDataPtr alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT); +#ifdef HAVE_OPENCL_SVM + CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0); +#endif CV_Assert( (retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0)) == CL_SUCCESS ); } @@ -4102,17 +4772,78 @@ public: srcrawofs, new_srcofs, new_srcstep, dstrawofs, new_dstofs, new_dststep); - AlignedDataPtr alignedPtr((uchar*)dstptr, sz[0] * dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT); - if( iscontinuous ) +#ifdef HAVE_OPENCL_SVM + if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) { - CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, - srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS ); + CV_DbgAssert(u->data == NULL || u->data == u->handle); + Context& ctx = Context::getDefault(); + const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); + CV_DbgAssert(svmFns->isValid()); + + CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0); + if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER) + { + CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size); + cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ, + u->handle, u->size, + 0, NULL, NULL); + CV_Assert(status == CL_SUCCESS); + } + clFinish(q); + if( iscontinuous ) + { + memcpy(dstptr, (uchar*)u->handle + srcrawofs, total); + } + else + { + // This code is from MatAllocator::download() + int isz[CV_MAX_DIM]; + uchar* srcptr = (uchar*)u->handle; + for( int i = 0; i < dims; i++ ) + { + CV_Assert( sz[i] <= (size_t)INT_MAX ); + if( sz[i] == 0 ) + return; + if( srcofs ) + srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1); + isz[i] = (int)sz[i]; + } + + Mat src(dims, isz, CV_8U, srcptr, srcstep); + Mat dst(dims, isz, CV_8U, dstptr, dststep); + + const Mat* arrays[] = { &src, &dst }; + uchar* ptrs[2]; + NAryMatIterator it(arrays, ptrs, 2); + size_t j, planesz = it.size; + + for( j = 0; j < it.nplanes; j++, ++it ) + memcpy(ptrs[1], ptrs[0], planesz); + } + if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER) + { + CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle); + cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, + 0, NULL, NULL); + CV_Assert(status == CL_SUCCESS); + clFinish(q); + } } else +#endif { - CV_Assert( clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE, - new_srcofs, new_dstofs, new_sz, new_srcstep[0], new_srcstep[1], - new_dststep[0], new_dststep[1], alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS ); + AlignedDataPtr alignedPtr((uchar*)dstptr, sz[0] * dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT); + if( iscontinuous ) + { + CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, + srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0) >= 0 ); + } + else + { + CV_Assert( clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE, + new_srcofs, new_dstofs, new_sz, new_srcstep[0], new_srcstep[1], + new_dststep[0], new_dststep[1], alignedPtr.getAlignedPtr(), 0, 0, 0) >= 0 ); + } } } @@ -4153,20 +4884,91 @@ public: CV_Assert( u->handle != 0 ); cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); - AlignedDataPtr alignedPtr((uchar*)srcptr, sz[0] * srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT); - if( iscontinuous ) +#ifdef HAVE_OPENCL_SVM + if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) { - CV_Assert( clEnqueueWriteBuffer(q, (cl_mem)u->handle, - CL_TRUE, dstrawofs, total, srcptr, 0, 0, 0) == CL_SUCCESS ); + CV_DbgAssert(u->data == NULL || u->data == u->handle); + Context& ctx = Context::getDefault(); + const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); + CV_DbgAssert(svmFns->isValid()); + + CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0); + if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER) + { + CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size); + cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_WRITE, + u->handle, u->size, + 0, NULL, NULL); + CV_Assert(status == CL_SUCCESS); + } + clFinish(q); + if( iscontinuous ) + { + memcpy((uchar*)u->handle + dstrawofs, srcptr, total); + } + else + { + // This code is from MatAllocator::upload() + int isz[CV_MAX_DIM]; + uchar* dstptr = (uchar*)u->handle; + for( int i = 0; i < dims; i++ ) + { + CV_Assert( sz[i] <= (size_t)INT_MAX ); + if( sz[i] == 0 ) + return; + if( dstofs ) + dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1); + isz[i] = (int)sz[i]; + } + + Mat src(dims, isz, CV_8U, (void*)srcptr, srcstep); + Mat dst(dims, isz, CV_8U, dstptr, dststep); + + const Mat* arrays[] = { &src, &dst }; + uchar* ptrs[2]; + NAryMatIterator it(arrays, ptrs, 2); + size_t j, planesz = it.size; + + for( j = 0; j < it.nplanes; j++, ++it ) + memcpy(ptrs[1], ptrs[0], planesz); + } + if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER) + { + CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle); + cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, + 0, NULL, NULL); + CV_Assert(status == CL_SUCCESS); + clFinish(q); + } } else +#endif { - CV_Assert( clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE, - new_dstofs, new_srcofs, new_sz, new_dststep[0], new_dststep[1], - new_srcstep[0], new_srcstep[1], srcptr, 0, 0, 0) == CL_SUCCESS ); + AlignedDataPtr alignedPtr((uchar*)srcptr, sz[0] * srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT); + if( iscontinuous ) + { + CV_Assert( clEnqueueWriteBuffer(q, (cl_mem)u->handle, + CL_TRUE, dstrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0) >= 0 ); + } + else + { + CV_Assert( clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE, + new_dstofs, new_srcofs, new_sz, new_dststep[0], new_dststep[1], + new_srcstep[0], new_srcstep[1], alignedPtr.getAlignedPtr(), 0, 0, 0) >= 0 ); + } } - u->markHostCopyObsolete(true); +#ifdef HAVE_OPENCL_SVM + if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER || + (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM) + { + // nothing + } + else +#endif + { + u->markHostCopyObsolete(true); + } u->markDeviceCopyObsolete(false); } @@ -4198,7 +5000,17 @@ public: { download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep); dst->markHostCopyObsolete(false); - dst->markDeviceCopyObsolete(true); +#ifdef HAVE_OPENCL_SVM + if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER || + (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM) + { + // nothing + } + else +#endif + { + dst->markDeviceCopyObsolete(true); + } return; } @@ -4206,26 +5018,110 @@ public: CV_Assert(dst->refcount == 0); cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); - cl_int retval; - if( iscontinuous ) + cl_int retval = CL_SUCCESS; +#ifdef HAVE_OPENCL_SVM + if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 || + (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) { - CV_Assert( (retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle, - srcrawofs, dstrawofs, total, 0, 0, 0)) == CL_SUCCESS ); + if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 && + (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) + { + Context& ctx = Context::getDefault(); + const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); + CV_DbgAssert(svmFns->isValid()); + + if( iscontinuous ) + { + CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMemcpy: %p <-- %p (%d)\n", + (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs, (int)total); + cl_int status = svmFns->fn_clEnqueueSVMMemcpy(q, CL_TRUE, + (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs, + total, 0, NULL, NULL); + CV_Assert(status == CL_SUCCESS); + } + else + { + clFinish(q); + // This code is from MatAllocator::download()/upload() + int isz[CV_MAX_DIM]; + uchar* srcptr = (uchar*)src->handle; + for( int i = 0; i < dims; i++ ) + { + CV_Assert( sz[i] <= (size_t)INT_MAX ); + if( sz[i] == 0 ) + return; + if( srcofs ) + srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1); + isz[i] = (int)sz[i]; + } + Mat m_src(dims, isz, CV_8U, srcptr, srcstep); + + uchar* dstptr = (uchar*)dst->handle; + for( int i = 0; i < dims; i++ ) + { + if( dstofs ) + dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1); + } + Mat m_dst(dims, isz, CV_8U, dstptr, dststep); + + const Mat* arrays[] = { &m_src, &m_dst }; + uchar* ptrs[2]; + NAryMatIterator it(arrays, ptrs, 2); + size_t j, planesz = it.size; + + for( j = 0; j < it.nplanes; j++, ++it ) + memcpy(ptrs[1], ptrs[0], planesz); + } + } + else + { + if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) + { + map(src, ACCESS_READ); + upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep); + unmap(src); + } + else + { + map(dst, ACCESS_WRITE); + download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep); + unmap(dst); + } + } } else +#endif { - CV_Assert( (retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle, - new_srcofs, new_dstofs, new_sz, - new_srcstep[0], new_srcstep[1], - new_dststep[0], new_dststep[1], - 0, 0, 0)) == CL_SUCCESS ); + if( iscontinuous ) + { + CV_Assert( (retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle, + srcrawofs, dstrawofs, total, 0, 0, 0)) == CL_SUCCESS ); + } + else + { + CV_Assert( (retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle, + new_srcofs, new_dstofs, new_sz, + new_srcstep[0], new_srcstep[1], + new_dststep[0], new_dststep[1], + 0, 0, 0)) == CL_SUCCESS ); + } } - if(retval == CL_SUCCESS) + if (retval == CL_SUCCESS) { CV_IMPL_ADD(CV_IMPL_OCL) } - dst->markHostCopyObsolete(true); +#ifdef HAVE_OPENCL_SVM + if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER || + (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM) + { + // nothing + } + else +#endif + { + dst->markHostCopyObsolete(true); + } dst->markDeviceCopyObsolete(false); if( _sync ) @@ -4234,7 +5130,23 @@ public: } } - BufferPoolController* getBufferPoolController() const { return &bufferPool; } + BufferPoolController* getBufferPoolController(const char* id) const { +#ifdef HAVE_OPENCL_SVM + if ((svm::checkForceSVMUmatUsage() && (id == NULL || strcmp(id, "OCL") == 0)) || (id != NULL && strcmp(id, "SVM") == 0)) + { + return &bufferPoolSVM; + } +#endif + if (id != NULL && strcmp(id, "HOST_ALLOC") == 0) + { + return &bufferPoolHostPtr; + } + if (id != NULL && strcmp(id, "OCL") != 0) + { + CV_ErrorNoReturn(cv::Error::StsBadArg, "getBufferPoolController(): unknown BufferPool ID\n"); + } + return &bufferPool; + } MatAllocator* matStdAllocator; }; @@ -4818,7 +5730,7 @@ void* Image2D::ptr() const return p ? p->handle : 0; } -bool isPerformanceCheckBypassed() +bool internal::isPerformanceCheckBypassed() { static bool initialized = false; static bool value = false; @@ -4830,4 +5742,22 @@ bool isPerformanceCheckBypassed() return value; } +bool internal::isCLBuffer(UMat& u) +{ + void* h = u.handle(ACCESS_RW); + if (!h) + return true; + CV_DbgAssert(u.u->currAllocator == getOpenCLAllocator()); +#if 1 + if ((u.u->allocatorFlags_ & 0xffff0000) != 0) // OpenCL SVM flags are stored here + return false; +#else + cl_mem_object_type type = 0; + cl_int ret = clGetMemObjectInfo((cl_mem)h, CL_MEM_TYPE, sizeof(type), &type, NULL); + if (ret != CL_SUCCESS || type != CL_MEM_OBJECT_BUFFER) + return false; +#endif + return true; +} + }} diff --git a/modules/core/src/opencl/runtime/opencl_core.cpp b/modules/core/src/opencl/runtime/opencl_core.cpp index 93f6aae5de..5dd174709d 100644 --- a/modules/core/src/opencl/runtime/opencl_core.cpp +++ b/modules/core/src/opencl/runtime/opencl_core.cpp @@ -182,6 +182,65 @@ static void* opencl_check_fn(int ID); #define CUSTOM_FUNCTION_ID 1000 +#ifdef HAVE_OPENCL_SVM +#include "opencv2/core/opencl/runtime/opencl_svm_20.hpp" +#define SVM_FUNCTION_ID_START CUSTOM_FUNCTION_ID +#define SVM_FUNCTION_ID_END CUSTOM_FUNCTION_ID + 100 + +enum OPENCL_FN_SVM_ID +{ + OPENCL_FN_clSVMAlloc = SVM_FUNCTION_ID_START, + OPENCL_FN_clSVMFree, + OPENCL_FN_clSetKernelArgSVMPointer, + OPENCL_FN_clSetKernelExecInfo, + OPENCL_FN_clEnqueueSVMFree, + OPENCL_FN_clEnqueueSVMMemcpy, + OPENCL_FN_clEnqueueSVMMemFill, + OPENCL_FN_clEnqueueSVMMap, + OPENCL_FN_clEnqueueSVMUnmap, +}; + +void* (CL_API_CALL *clSVMAlloc)(cl_context context, cl_svm_mem_flags flags, size_t size, unsigned int alignment) = + opencl_fn4::switch_fn; +static const struct DynamicFnEntry _clSVMAlloc_definition = { "clSVMAlloc", (void**)&clSVMAlloc}; +void (CL_API_CALL *clSVMFree)(cl_context context, void* svm_pointer) = + opencl_fn2::switch_fn; +static const struct DynamicFnEntry _clSVMFree_definition = { "clSVMFree", (void**)&clSVMFree}; +cl_int (CL_API_CALL *clSetKernelArgSVMPointer)(cl_kernel kernel, cl_uint arg_index, const void* arg_value) = + opencl_fn3::switch_fn; +static const struct DynamicFnEntry _clSetKernelArgSVMPointer_definition = { "clSetKernelArgSVMPointer", (void**)&clSetKernelArgSVMPointer}; +//void* (CL_API_CALL *clSetKernelExecInfo)(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void* param_value) = +// opencl_fn4::switch_fn; +//static const struct DynamicFnEntry _clSetKernelExecInfo_definition = { "clSetKernelExecInfo", (void**)&clSetKernelExecInfo}; +//cl_int (CL_API_CALL *clEnqueueSVMFree)(...) = +// opencl_fn8::switch_fn; +//static const struct DynamicFnEntry _clEnqueueSVMFree_definition = { "clEnqueueSVMFree", (void**)&clEnqueueSVMFree}; +cl_int (CL_API_CALL *clEnqueueSVMMemcpy)(cl_command_queue command_queue, cl_bool blocking_copy, void* dst_ptr, const void* src_ptr, size_t size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) = + opencl_fn8::switch_fn; +static const struct DynamicFnEntry _clEnqueueSVMMemcpy_definition = { "clEnqueueSVMMemcpy", (void**)&clEnqueueSVMMemcpy}; +cl_int (CL_API_CALL *clEnqueueSVMMemFill)(cl_command_queue command_queue, void* svm_ptr, const void* pattern, size_t pattern_size, size_t size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) = + opencl_fn8::switch_fn; +static const struct DynamicFnEntry _clEnqueueSVMMemFill_definition = { "clEnqueueSVMMemFill", (void**)&clEnqueueSVMMemFill}; +cl_int (CL_API_CALL *clEnqueueSVMMap)(cl_command_queue command_queue, cl_bool blocking_map, cl_map_flags map_flags, void* svm_ptr, size_t size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) = + opencl_fn8::switch_fn; +static const struct DynamicFnEntry _clEnqueueSVMMap_definition = { "clEnqueueSVMMap", (void**)&clEnqueueSVMMap}; +cl_int (CL_API_CALL *clEnqueueSVMUnmap)(cl_command_queue command_queue, void* svm_ptr, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) = + opencl_fn5::switch_fn; +static const struct DynamicFnEntry _clEnqueueSVMUnmap_definition = { "clEnqueueSVMUnmap", (void**)&clEnqueueSVMUnmap}; + +static const struct DynamicFnEntry* opencl_svm_fn_list[] = { + &_clSVMAlloc_definition, + &_clSVMFree_definition, + &_clSetKernelArgSVMPointer_definition, + NULL/*&_clSetKernelExecInfo_definition*/, + NULL/*&_clEnqueueSVMFree_definition*/, + &_clEnqueueSVMMemcpy_definition, + &_clEnqueueSVMMemFill_definition, + &_clEnqueueSVMMap_definition, + &_clEnqueueSVMUnmap_definition, +}; +#endif // HAVE_OPENCL_SVM + // // END OF CUSTOM FUNCTIONS HERE // @@ -194,6 +253,14 @@ static void* opencl_check_fn(int ID) assert(ID >= 0 && ID < (int)(sizeof(opencl_fn_list)/sizeof(opencl_fn_list[0]))); e = opencl_fn_list[ID]; } +#ifdef HAVE_OPENCL_SVM + else if (ID >= SVM_FUNCTION_ID_START && ID < SVM_FUNCTION_ID_END) + { + ID = ID - SVM_FUNCTION_ID_START; + assert(ID >= 0 && ID < (int)(sizeof(opencl_svm_fn_list)/sizeof(opencl_svm_fn_list[0]))); + e = opencl_svm_fn_list[ID]; + } +#endif else { CV_ErrorNoReturn(cv::Error::StsBadArg, "Invalid function ID"); diff --git a/modules/core/src/umatrix.cpp b/modules/core/src/umatrix.cpp index ffc20777b5..1b42f1ee1e 100644 --- a/modules/core/src/umatrix.cpp +++ b/modules/core/src/umatrix.cpp @@ -55,7 +55,7 @@ UMatData::UMatData(const MatAllocator* allocator) prevAllocator = currAllocator = allocator; urefcount = refcount = 0; data = origdata = 0; - size = 0; capacity = 0; + size = 0; flags = 0; handle = 0; userdata = 0; @@ -67,7 +67,7 @@ UMatData::~UMatData() prevAllocator = currAllocator = 0; urefcount = refcount = 0; data = origdata = 0; - size = 0; capacity = 0; + size = 0; flags = 0; handle = 0; userdata = 0; @@ -221,7 +221,7 @@ UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const temp_u = a->allocate(dims, size.p, type(), data, step.p, accessFlags, usageFlags); temp_u->refcount = 1; } - UMat::getStdAllocator()->allocate(temp_u, accessFlags, usageFlags); + UMat::getStdAllocator()->allocate(temp_u, accessFlags, usageFlags); // TODO result is not checked hdr.flags = flags; setSize(hdr, dims, size.p, step.p); finalizeHdr(hdr); @@ -575,7 +575,7 @@ Mat UMat::getMat(int accessFlags) const { if(!u) return Mat(); - u->currAllocator->map(u, accessFlags | ACCESS_READ); + u->currAllocator->map(u, accessFlags | ACCESS_READ); // TODO Support ACCESS_WRITE without unnecessary data transfers CV_Assert(u->data != 0); Mat hdr(dims, size.p, type(), u->data + offset, step.p); hdr.flags = flags;