ocl: OpenCL SVM support

pull/3603/head
Alexander Alekhin 10 years ago
parent 58ad952b1a
commit 0a07d780e0
  1. 1
      CMakeLists.txt
  2. 4
      cmake/OpenCVDetectOpenCL.cmake
  3. 1
      cmake/templates/cvconfig.h.in
  4. 4
      modules/core/include/opencv2/core/mat.hpp
  5. 20
      modules/core/include/opencv2/core/ocl.hpp
  6. 81
      modules/core/include/opencv2/core/opencl/opencl_svm.hpp
  7. 12
      modules/core/include/opencv2/core/opencl/runtime/opencl_core.hpp
  8. 52
      modules/core/include/opencv2/core/opencl/runtime/opencl_svm_20.hpp
  9. 42
      modules/core/include/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp
  10. 166
      modules/core/include/opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp
  11. 10
      modules/core/src/matmul.cpp
  12. 3
      modules/core/src/matrix.cpp
  13. 1218
      modules/core/src/ocl.cpp
  14. 67
      modules/core/src/opencl/runtime/opencl_core.cpp
  15. 8
      modules/core/src/umatrix.cpp

@ -162,6 +162,7 @@ OCV_OPTION(WITH_XIMEA "Include XIMEA cameras support" OFF
OCV_OPTION(WITH_XINE "Include Xine support (GPL)" OFF IF (UNIX AND NOT APPLE AND NOT ANDROID) ) OCV_OPTION(WITH_XINE "Include Xine support (GPL)" OFF IF (UNIX AND NOT APPLE AND NOT ANDROID) )
OCV_OPTION(WITH_CLP "Include Clp support (EPL)" OFF) OCV_OPTION(WITH_CLP "Include Clp support (EPL)" OFF)
OCV_OPTION(WITH_OPENCL "Include OpenCL Runtime support" ON IF (NOT IOS) ) OCV_OPTION(WITH_OPENCL "Include OpenCL Runtime support" ON IF (NOT IOS) )
OCV_OPTION(WITH_OPENCL_SVM "Include OpenCL Shared Virtual Memory support" OFF ) # experimental
OCV_OPTION(WITH_OPENCLAMDFFT "Include AMD OpenCL FFT library support" ON IF (NOT ANDROID AND NOT IOS) ) OCV_OPTION(WITH_OPENCLAMDFFT "Include AMD OpenCL FFT library support" ON IF (NOT ANDROID AND NOT IOS) )
OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON IF (NOT ANDROID AND NOT IOS) ) OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON IF (NOT ANDROID AND NOT IOS) )
OCV_OPTION(WITH_DIRECTX "Include DirectX support" ON IF WIN32 ) OCV_OPTION(WITH_DIRECTX "Include DirectX support" ON IF WIN32 )

@ -26,6 +26,10 @@ if(OPENCL_FOUND)
set(HAVE_OPENCL 1) set(HAVE_OPENCL 1)
if(WITH_OPENCL_SVM)
set(HAVE_OPENCL_SVM 1)
endif()
if(HAVE_OPENCL_STATIC) if(HAVE_OPENCL_STATIC)
set(OPENCL_LIBRARIES "${OPENCL_LIBRARY}") set(OPENCL_LIBRARIES "${OPENCL_LIBRARY}")
else() else()

@ -122,6 +122,7 @@
/* OpenCL Support */ /* OpenCL Support */
#cmakedefine HAVE_OPENCL #cmakedefine HAVE_OPENCL
#cmakedefine HAVE_OPENCL_STATIC #cmakedefine HAVE_OPENCL_STATIC
#cmakedefine HAVE_OPENCL_SVM
/* OpenEXR codec */ /* OpenEXR codec */
#cmakedefine HAVE_OPENEXR #cmakedefine HAVE_OPENEXR

@ -415,7 +415,7 @@ public:
const size_t dstofs[], const size_t dststep[], bool sync) const; const size_t dstofs[], const size_t dststep[], bool sync) const;
// default implementation returns DummyBufferPoolController // default implementation returns DummyBufferPoolController
virtual BufferPoolController* getBufferPoolController() const; virtual BufferPoolController* getBufferPoolController(const char* id = NULL) const;
}; };
@ -481,7 +481,7 @@ struct CV_EXPORTS UMatData
int refcount; int refcount;
uchar* data; uchar* data;
uchar* origdata; uchar* origdata;
size_t size, capacity; size_t size;
int flags; int flags;
void* handle; void* handle;

@ -56,6 +56,8 @@ CV_EXPORTS_W bool haveAmdFft();
CV_EXPORTS_W void setUseOpenCL(bool flag); CV_EXPORTS_W void setUseOpenCL(bool flag);
CV_EXPORTS_W void finish(); CV_EXPORTS_W void finish();
CV_EXPORTS bool haveSVM();
class CV_EXPORTS Context; class CV_EXPORTS Context;
class CV_EXPORTS Device; class CV_EXPORTS Device;
class CV_EXPORTS Kernel; class CV_EXPORTS Kernel;
@ -248,7 +250,10 @@ public:
void* ptr() const; void* ptr() const;
friend void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device); friend void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
protected:
bool useSVM() const;
void setUseSVM(bool enabled);
struct Impl; struct Impl;
Impl* p; Impl* p;
}; };
@ -666,8 +671,17 @@ protected:
CV_EXPORTS MatAllocator* getOpenCLAllocator(); CV_EXPORTS MatAllocator* getOpenCLAllocator();
CV_EXPORTS_W bool isPerformanceCheckBypassed();
#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::isPerformanceCheckBypassed() || (condition)) #ifdef __OPENCV_BUILD
namespace internal {
CV_EXPORTS bool isPerformanceCheckBypassed();
#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::internal::isPerformanceCheckBypassed() || (condition))
CV_EXPORTS bool isCLBuffer(UMat& u);
} // namespace internal
#endif
//! @} //! @}

@ -0,0 +1,81 @@
/* See LICENSE file in the root OpenCV directory */
#ifndef __OPENCV_CORE_OPENCL_SVM_HPP__
#define __OPENCV_CORE_OPENCL_SVM_HPP__
//
// Internal usage only (binary compatibility is not guaranteed)
//
#ifndef __OPENCV_BUILD
#error Internal header file
#endif
#if defined(HAVE_OPENCL) && defined(HAVE_OPENCL_SVM)
#include "runtime/opencl_core.hpp"
#include "runtime/opencl_svm_20.hpp"
#include "runtime/opencl_svm_hsa_extension.hpp"
namespace cv { namespace ocl { namespace svm {
struct SVMCapabilities
{
enum Value
{
SVM_COARSE_GRAIN_BUFFER = (1 << 0),
SVM_FINE_GRAIN_BUFFER = (1 << 1),
SVM_FINE_GRAIN_SYSTEM = (1 << 2),
SVM_ATOMICS = (1 << 3),
};
int value_;
SVMCapabilities(int capabilities = 0) : value_(capabilities) { }
operator int() const { return value_; }
inline bool isNoSVMSupport() const { return value_ == 0; }
inline bool isSupportCoarseGrainBuffer() const { return (value_ & SVM_COARSE_GRAIN_BUFFER) != 0; }
inline bool isSupportFineGrainBuffer() const { return (value_ & SVM_FINE_GRAIN_BUFFER) != 0; }
inline bool isSupportFineGrainSystem() const { return (value_ & SVM_FINE_GRAIN_SYSTEM) != 0; }
inline bool isSupportAtomics() const { return (value_ & SVM_ATOMICS) != 0; }
};
CV_EXPORTS const SVMCapabilities getSVMCapabilitites(const ocl::Context& context);
struct SVMFunctions
{
clSVMAllocAMD_fn fn_clSVMAlloc;
clSVMFreeAMD_fn fn_clSVMFree;
clSetKernelArgSVMPointerAMD_fn fn_clSetKernelArgSVMPointer;
//clSetKernelExecInfoAMD_fn fn_clSetKernelExecInfo;
//clEnqueueSVMFreeAMD_fn fn_clEnqueueSVMFree;
clEnqueueSVMMemcpyAMD_fn fn_clEnqueueSVMMemcpy;
clEnqueueSVMMemFillAMD_fn fn_clEnqueueSVMMemFill;
clEnqueueSVMMapAMD_fn fn_clEnqueueSVMMap;
clEnqueueSVMUnmapAMD_fn fn_clEnqueueSVMUnmap;
inline SVMFunctions()
: fn_clSVMAlloc(NULL), fn_clSVMFree(NULL),
fn_clSetKernelArgSVMPointer(NULL), /*fn_clSetKernelExecInfo(NULL),*/
/*fn_clEnqueueSVMFree(NULL),*/ fn_clEnqueueSVMMemcpy(NULL), fn_clEnqueueSVMMemFill(NULL),
fn_clEnqueueSVMMap(NULL), fn_clEnqueueSVMUnmap(NULL)
{
// nothing
}
inline bool isValid() const
{
return fn_clSVMAlloc != NULL && fn_clSVMFree && fn_clSetKernelArgSVMPointer &&
/*fn_clSetKernelExecInfo && fn_clEnqueueSVMFree &&*/ fn_clEnqueueSVMMemcpy &&
fn_clEnqueueSVMMemFill && fn_clEnqueueSVMMap && fn_clEnqueueSVMUnmap;
}
};
// We should guarantee that SVMFunctions lifetime is not less than context's lifetime
CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context);
CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags);
}}} //namespace cv::ocl::svm
#endif
#endif // __OPENCV_CORE_OPENCL_SVM_HPP__
/* End of file. */

@ -62,6 +62,18 @@
#endif #endif
#endif #endif
#ifdef HAVE_OPENCL_SVM
#define clSVMAlloc clSVMAlloc_
#define clSVMFree clSVMFree_
#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_
#define clSetKernelExecInfo clSetKernelExecInfo_
#define clEnqueueSVMFree clEnqueueSVMFree_
#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_
#define clEnqueueSVMMemFill clEnqueueSVMMemFill_
#define clEnqueueSVMMap clEnqueueSVMMap_
#define clEnqueueSVMUnmap clEnqueueSVMUnmap_
#endif
#include "autogenerated/opencl_core.hpp" #include "autogenerated/opencl_core.hpp"
#endif // HAVE_OPENCL_STATIC #endif // HAVE_OPENCL_STATIC

@ -0,0 +1,52 @@
/* See LICENSE file in the root OpenCV directory */
#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__
#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__
#if defined(HAVE_OPENCL_SVM)
#include "opencl_core.hpp"
#include "opencl_svm_definitions.hpp"
#ifndef HAVE_OPENCL_STATIC
#undef clSVMAlloc
#define clSVMAlloc clSVMAlloc_pfn
#undef clSVMFree
#define clSVMFree clSVMFree_pfn
#undef clSetKernelArgSVMPointer
#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_pfn
#undef clSetKernelExecInfo
//#define clSetKernelExecInfo clSetKernelExecInfo_pfn
#undef clEnqueueSVMFree
//#define clEnqueueSVMFree clEnqueueSVMFree_pfn
#undef clEnqueueSVMMemcpy
#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_pfn
#undef clEnqueueSVMMemFill
#define clEnqueueSVMMemFill clEnqueueSVMMemFill_pfn
#undef clEnqueueSVMMap
#define clEnqueueSVMMap clEnqueueSVMMap_pfn
#undef clEnqueueSVMUnmap
#define clEnqueueSVMUnmap clEnqueueSVMUnmap_pfn
extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSVMAlloc)(cl_context context, cl_svm_mem_flags flags, size_t size, unsigned int alignment);
extern CL_RUNTIME_EXPORT void (CL_API_CALL *clSVMFree)(cl_context context, void* svm_pointer);
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clSetKernelArgSVMPointer)(cl_kernel kernel, cl_uint arg_index, const void* arg_value);
//extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSetKernelExecInfo)(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void* param_value);
//extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMFree)(cl_command_queue command_queue, cl_uint num_svm_pointers, void* svm_pointers[],
// void (CL_CALLBACK *pfn_free_func)(cl_command_queue queue, cl_uint num_svm_pointers, void* svm_pointers[], void* user_data), void* user_data,
// cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMemcpy)(cl_command_queue command_queue, cl_bool blocking_copy, void* dst_ptr, const void* src_ptr, size_t size,
cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMemFill)(cl_command_queue command_queue, void* svm_ptr, const void* pattern, size_t pattern_size, size_t size,
cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMap)(cl_command_queue command_queue, cl_bool blocking_map, cl_map_flags map_flags, void* svm_ptr, size_t size,
cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMUnmap)(cl_command_queue command_queue, void* svm_ptr,
cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
#endif // HAVE_OPENCL_STATIC
#endif // HAVE_OPENCL_SVM
#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__

@ -0,0 +1,42 @@
/* See LICENSE file in the root OpenCV directory */
#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__
#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__
#if defined(HAVE_OPENCL_SVM)
#if defined(CL_VERSION_2_0)
// OpenCL 2.0 contains SVM definitions
#else
typedef cl_bitfield cl_device_svm_capabilities;
typedef cl_bitfield cl_svm_mem_flags;
typedef cl_uint cl_kernel_exec_info;
//
// TODO Add real values after OpenCL 2.0 release
//
#ifndef CL_DEVICE_SVM_CAPABILITIES
#define CL_DEVICE_SVM_CAPABILITIES 0x1053
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0)
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1)
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2)
#define CL_DEVICE_SVM_ATOMICS (1 << 3)
#endif
#ifndef CL_MEM_SVM_FINE_GRAIN_BUFFER
#define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10)
#endif
#ifndef CL_MEM_SVM_ATOMICS
#define CL_MEM_SVM_ATOMICS (1 << 11)
#endif
#endif // CL_VERSION_2_0
#endif // HAVE_OPENCL_SVM
#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__

@ -0,0 +1,166 @@
/* See LICENSE file in the root OpenCV directory */
#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__
#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__
#if defined(HAVE_OPENCL_SVM)
#include "opencl_core.hpp"
#ifndef CL_DEVICE_SVM_CAPABILITIES_AMD
//
// Part of the file is an extract from the cl_ext.h file from AMD APP SDK package.
// Below is the original copyright.
//
/*******************************************************************************
* Copyright (c) 2008-2013 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/*******************************************
* Shared Virtual Memory (SVM) extension
*******************************************/
typedef cl_bitfield cl_device_svm_capabilities_amd;
typedef cl_bitfield cl_svm_mem_flags_amd;
typedef cl_uint cl_kernel_exec_info_amd;
/* cl_device_info */
#define CL_DEVICE_SVM_CAPABILITIES_AMD 0x1053
#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT_AMD 0x1054
/* cl_device_svm_capabilities_amd */
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD (1 << 0)
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_AMD (1 << 1)
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_AMD (1 << 2)
#define CL_DEVICE_SVM_ATOMICS_AMD (1 << 3)
/* cl_svm_mem_flags_amd */
#define CL_MEM_SVM_FINE_GRAIN_BUFFER_AMD (1 << 10)
#define CL_MEM_SVM_ATOMICS_AMD (1 << 11)
/* cl_mem_info */
#define CL_MEM_USES_SVM_POINTER_AMD 0x1109
/* cl_kernel_exec_info_amd */
#define CL_KERNEL_EXEC_INFO_SVM_PTRS_AMD 0x11B6
#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_AMD 0x11B7
/* cl_command_type */
#define CL_COMMAND_SVM_FREE_AMD 0x1209
#define CL_COMMAND_SVM_MEMCPY_AMD 0x120A
#define CL_COMMAND_SVM_MEMFILL_AMD 0x120B
#define CL_COMMAND_SVM_MAP_AMD 0x120C
#define CL_COMMAND_SVM_UNMAP_AMD 0x120D
typedef CL_API_ENTRY void*
(CL_API_CALL * clSVMAllocAMD_fn)(
cl_context /* context */,
cl_svm_mem_flags_amd /* flags */,
size_t /* size */,
unsigned int /* alignment */
) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY void
(CL_API_CALL * clSVMFreeAMD_fn)(
cl_context /* context */,
void* /* svm_pointer */
) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clEnqueueSVMFreeAMD_fn)(
cl_command_queue /* command_queue */,
cl_uint /* num_svm_pointers */,
void** /* svm_pointers */,
void (CL_CALLBACK *)( /*pfn_free_func*/
cl_command_queue /* queue */,
cl_uint /* num_svm_pointers */,
void** /* svm_pointers */,
void* /* user_data */),
void* /* user_data */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */
) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clEnqueueSVMMemcpyAMD_fn)(
cl_command_queue /* command_queue */,
cl_bool /* blocking_copy */,
void* /* dst_ptr */,
const void* /* src_ptr */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */
) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clEnqueueSVMMemFillAMD_fn)(
cl_command_queue /* command_queue */,
void* /* svm_ptr */,
const void* /* pattern */,
size_t /* pattern_size */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */
) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clEnqueueSVMMapAMD_fn)(
cl_command_queue /* command_queue */,
cl_bool /* blocking_map */,
cl_map_flags /* map_flags */,
void* /* svm_ptr */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */
) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clEnqueueSVMUnmapAMD_fn)(
cl_command_queue /* command_queue */,
void* /* svm_ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */
) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clSetKernelArgSVMPointerAMD_fn)(
cl_kernel /* kernel */,
cl_uint /* arg_index */,
const void * /* arg_value */
) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clSetKernelExecInfoAMD_fn)(
cl_kernel /* kernel */,
cl_kernel_exec_info_amd /* param_name */,
size_t /* param_value_size */,
const void * /* param_value */
) CL_EXT_SUFFIX__VERSION_1_2;
#endif
#endif // HAVE_OPENCL_SVM
#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__

@ -721,6 +721,16 @@ static bool ocl_gemm_amdblas( InputArray matA, InputArray matB, double alpha,
return false; return false;
UMat A = matA.getUMat(), B = matB.getUMat(), D = matD.getUMat(); UMat A = matA.getUMat(), B = matB.getUMat(), D = matD.getUMat();
if (!ocl::internal::isCLBuffer(A) || !ocl::internal::isCLBuffer(B) || !ocl::internal::isCLBuffer(D))
{
return false;
}
if (haveC)
{
UMat C = matC.getUMat();
if (!ocl::internal::isCLBuffer(C))
return false;
}
if (haveC) if (haveC)
ctrans ? transpose(matC, D) : matC.copyTo(D); ctrans ? transpose(matC, D) : matC.copyTo(D);
else else

@ -159,8 +159,9 @@ void MatAllocator::copy(UMatData* usrc, UMatData* udst, int dims, const size_t s
memcpy(ptrs[1], ptrs[0], planesz); memcpy(ptrs[1], ptrs[0], planesz);
} }
BufferPoolController* MatAllocator::getBufferPoolController() const BufferPoolController* MatAllocator::getBufferPoolController(const char* id) const
{ {
(void)id;
static DummyBufferPoolController dummy; static DummyBufferPoolController dummy;
return &dummy; return &dummy;
} }

File diff suppressed because it is too large Load Diff

@ -182,6 +182,65 @@ static void* opencl_check_fn(int ID);
#define CUSTOM_FUNCTION_ID 1000 #define CUSTOM_FUNCTION_ID 1000
#ifdef HAVE_OPENCL_SVM
#include "opencv2/core/opencl/runtime/opencl_svm_20.hpp"
#define SVM_FUNCTION_ID_START CUSTOM_FUNCTION_ID
#define SVM_FUNCTION_ID_END CUSTOM_FUNCTION_ID + 100
enum OPENCL_FN_SVM_ID
{
OPENCL_FN_clSVMAlloc = SVM_FUNCTION_ID_START,
OPENCL_FN_clSVMFree,
OPENCL_FN_clSetKernelArgSVMPointer,
OPENCL_FN_clSetKernelExecInfo,
OPENCL_FN_clEnqueueSVMFree,
OPENCL_FN_clEnqueueSVMMemcpy,
OPENCL_FN_clEnqueueSVMMemFill,
OPENCL_FN_clEnqueueSVMMap,
OPENCL_FN_clEnqueueSVMUnmap,
};
void* (CL_API_CALL *clSVMAlloc)(cl_context context, cl_svm_mem_flags flags, size_t size, unsigned int alignment) =
opencl_fn4<OPENCL_FN_clSVMAlloc, void*, cl_context, cl_svm_mem_flags, size_t, unsigned int>::switch_fn;
static const struct DynamicFnEntry _clSVMAlloc_definition = { "clSVMAlloc", (void**)&clSVMAlloc};
void (CL_API_CALL *clSVMFree)(cl_context context, void* svm_pointer) =
opencl_fn2<OPENCL_FN_clSVMFree, void, cl_context, void*>::switch_fn;
static const struct DynamicFnEntry _clSVMFree_definition = { "clSVMFree", (void**)&clSVMFree};
cl_int (CL_API_CALL *clSetKernelArgSVMPointer)(cl_kernel kernel, cl_uint arg_index, const void* arg_value) =
opencl_fn3<OPENCL_FN_clSetKernelArgSVMPointer, cl_int, cl_kernel, cl_uint, const void*>::switch_fn;
static const struct DynamicFnEntry _clSetKernelArgSVMPointer_definition = { "clSetKernelArgSVMPointer", (void**)&clSetKernelArgSVMPointer};
//void* (CL_API_CALL *clSetKernelExecInfo)(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void* param_value) =
// opencl_fn4<OPENCL_FN_clSetKernelExecInfo, void*, cl_kernel, cl_kernel_exec_info, size_t, const void*>::switch_fn;
//static const struct DynamicFnEntry _clSetKernelExecInfo_definition = { "clSetKernelExecInfo", (void**)&clSetKernelExecInfo};
//cl_int (CL_API_CALL *clEnqueueSVMFree)(...) =
// opencl_fn8<OPENCL_FN_clEnqueueSVMFree, cl_int, ...>::switch_fn;
//static const struct DynamicFnEntry _clEnqueueSVMFree_definition = { "clEnqueueSVMFree", (void**)&clEnqueueSVMFree};
cl_int (CL_API_CALL *clEnqueueSVMMemcpy)(cl_command_queue command_queue, cl_bool blocking_copy, void* dst_ptr, const void* src_ptr, size_t size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) =
opencl_fn8<OPENCL_FN_clEnqueueSVMMemcpy, cl_int, cl_command_queue, cl_bool, void*, const void*, size_t, cl_uint, const cl_event*, cl_event*>::switch_fn;
static const struct DynamicFnEntry _clEnqueueSVMMemcpy_definition = { "clEnqueueSVMMemcpy", (void**)&clEnqueueSVMMemcpy};
cl_int (CL_API_CALL *clEnqueueSVMMemFill)(cl_command_queue command_queue, void* svm_ptr, const void* pattern, size_t pattern_size, size_t size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) =
opencl_fn8<OPENCL_FN_clEnqueueSVMMemFill, cl_int, cl_command_queue, void*, const void*, size_t, size_t, cl_uint, const cl_event*, cl_event*>::switch_fn;
static const struct DynamicFnEntry _clEnqueueSVMMemFill_definition = { "clEnqueueSVMMemFill", (void**)&clEnqueueSVMMemFill};
cl_int (CL_API_CALL *clEnqueueSVMMap)(cl_command_queue command_queue, cl_bool blocking_map, cl_map_flags map_flags, void* svm_ptr, size_t size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) =
opencl_fn8<OPENCL_FN_clEnqueueSVMMap, cl_int, cl_command_queue, cl_bool, cl_map_flags, void*, size_t, cl_uint, const cl_event*, cl_event*>::switch_fn;
static const struct DynamicFnEntry _clEnqueueSVMMap_definition = { "clEnqueueSVMMap", (void**)&clEnqueueSVMMap};
cl_int (CL_API_CALL *clEnqueueSVMUnmap)(cl_command_queue command_queue, void* svm_ptr, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) =
opencl_fn5<OPENCL_FN_clEnqueueSVMUnmap, cl_int, cl_command_queue, void*, cl_uint, const cl_event*, cl_event*>::switch_fn;
static const struct DynamicFnEntry _clEnqueueSVMUnmap_definition = { "clEnqueueSVMUnmap", (void**)&clEnqueueSVMUnmap};
static const struct DynamicFnEntry* opencl_svm_fn_list[] = {
&_clSVMAlloc_definition,
&_clSVMFree_definition,
&_clSetKernelArgSVMPointer_definition,
NULL/*&_clSetKernelExecInfo_definition*/,
NULL/*&_clEnqueueSVMFree_definition*/,
&_clEnqueueSVMMemcpy_definition,
&_clEnqueueSVMMemFill_definition,
&_clEnqueueSVMMap_definition,
&_clEnqueueSVMUnmap_definition,
};
#endif // HAVE_OPENCL_SVM
// //
// END OF CUSTOM FUNCTIONS HERE // END OF CUSTOM FUNCTIONS HERE
// //
@ -194,6 +253,14 @@ static void* opencl_check_fn(int ID)
assert(ID >= 0 && ID < (int)(sizeof(opencl_fn_list)/sizeof(opencl_fn_list[0]))); assert(ID >= 0 && ID < (int)(sizeof(opencl_fn_list)/sizeof(opencl_fn_list[0])));
e = opencl_fn_list[ID]; e = opencl_fn_list[ID];
} }
#ifdef HAVE_OPENCL_SVM
else if (ID >= SVM_FUNCTION_ID_START && ID < SVM_FUNCTION_ID_END)
{
ID = ID - SVM_FUNCTION_ID_START;
assert(ID >= 0 && ID < (int)(sizeof(opencl_svm_fn_list)/sizeof(opencl_svm_fn_list[0])));
e = opencl_svm_fn_list[ID];
}
#endif
else else
{ {
CV_ErrorNoReturn(cv::Error::StsBadArg, "Invalid function ID"); CV_ErrorNoReturn(cv::Error::StsBadArg, "Invalid function ID");

@ -55,7 +55,7 @@ UMatData::UMatData(const MatAllocator* allocator)
prevAllocator = currAllocator = allocator; prevAllocator = currAllocator = allocator;
urefcount = refcount = 0; urefcount = refcount = 0;
data = origdata = 0; data = origdata = 0;
size = 0; capacity = 0; size = 0;
flags = 0; flags = 0;
handle = 0; handle = 0;
userdata = 0; userdata = 0;
@ -67,7 +67,7 @@ UMatData::~UMatData()
prevAllocator = currAllocator = 0; prevAllocator = currAllocator = 0;
urefcount = refcount = 0; urefcount = refcount = 0;
data = origdata = 0; data = origdata = 0;
size = 0; capacity = 0; size = 0;
flags = 0; flags = 0;
handle = 0; handle = 0;
userdata = 0; userdata = 0;
@ -221,7 +221,7 @@ UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const
temp_u = a->allocate(dims, size.p, type(), data, step.p, accessFlags, usageFlags); temp_u = a->allocate(dims, size.p, type(), data, step.p, accessFlags, usageFlags);
temp_u->refcount = 1; temp_u->refcount = 1;
} }
UMat::getStdAllocator()->allocate(temp_u, accessFlags, usageFlags); UMat::getStdAllocator()->allocate(temp_u, accessFlags, usageFlags); // TODO result is not checked
hdr.flags = flags; hdr.flags = flags;
setSize(hdr, dims, size.p, step.p); setSize(hdr, dims, size.p, step.p);
finalizeHdr(hdr); finalizeHdr(hdr);
@ -575,7 +575,7 @@ Mat UMat::getMat(int accessFlags) const
{ {
if(!u) if(!u)
return Mat(); return Mat();
u->currAllocator->map(u, accessFlags | ACCESS_READ); u->currAllocator->map(u, accessFlags | ACCESS_READ); // TODO Support ACCESS_WRITE without unnecessary data transfers
CV_Assert(u->data != 0); CV_Assert(u->data != 0);
Mat hdr(dims, size.p, type(), u->data + offset, step.p); Mat hdr(dims, size.p, type(), u->data + offset, step.p);
hdr.flags = flags; hdr.flags = flags;

Loading…
Cancel
Save