diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index 73bb1d6a8b..bc676c1acd 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -565,6 +565,7 @@ struct CV_EXPORTS UMatData int allocatorFlags_; int mapcount; UMatData* originalUMatData; + std::shared_ptr allocatorContext; }; CV_ENUM_FLAGS(UMatData::MemoryFlag) diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp index 115f5d127f..f5d55198d7 100644 --- a/modules/core/include/opencv2/core/ocl.hpp +++ b/modules/core/include/opencv2/core/ocl.hpp @@ -229,8 +229,15 @@ public: CV_WRAP static const Device& getDefault(); -protected: + /** + * @param d OpenCL handle (cl_device_id). clRetainDevice() is called on success. + */ + static Device fromHandle(void* d); + struct Impl; + inline Impl* getImpl() const { return (Impl*)p; } + inline bool empty() const { return !p; } +protected: Impl* p; }; @@ -239,33 +246,55 @@ class CV_EXPORTS Context { public: Context(); - explicit Context(int dtype); + explicit Context(int dtype); //!< @deprecated ~Context(); Context(const Context& c); - Context& operator = (const Context& c); + Context& operator= (const Context& c); + /** @deprecated */ bool create(); + /** @deprecated */ bool create(int dtype); + size_t ndevices() const; - const Device& device(size_t idx) const; + Device& device(size_t idx) const; Program getProg(const ProgramSource& prog, const String& buildopt, String& errmsg); void unloadProg(Program& prog); + + /** Get thread-local OpenCL context (initialize if necessary) */ +#if 0 // OpenCV 5.0 + static Context& getDefault(); +#else static Context& getDefault(bool initialize = true); +#endif + + /** @returns cl_context value */ void* ptr() const; - friend void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device); bool useSVM() const; void setUseSVM(bool enabled); + /** + * @param context OpenCL handle (cl_context). clRetainContext() is called on success + */ + static Context fromHandle(void* context); + static Context fromDevice(const ocl::Device& device); + static Context create(const std::string& configuration); + + void release(); + struct Impl; inline Impl* getImpl() const { return (Impl*)p; } + inline bool empty() const { return !p; } +// TODO OpenCV 5.0 //protected: Impl* p; }; +/** @deprecated */ class CV_EXPORTS Platform { public: @@ -275,11 +304,14 @@ public: Platform& operator = (const Platform& p); void* ptr() const; + + /** @deprecated */ static Platform& getDefault(); - friend void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device); -protected: struct Impl; + inline Impl* getImpl() const { return (Impl*)p; } + inline bool empty() const { return !p; } +protected: Impl* p; }; @@ -319,6 +351,7 @@ CV_EXPORTS void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, in CV_EXPORTS void convertFromImage(void* cl_mem_image, UMat& dst); // TODO Move to internal header +/// @deprecated void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device); class CV_EXPORTS Queue @@ -340,6 +373,7 @@ public: struct Impl; friend struct Impl; inline Impl* getImpl() const { return p; } + inline bool empty() const { return !p; } protected: Impl* p; }; @@ -490,6 +524,7 @@ public: struct Impl; friend struct Impl; inline Impl* getImpl() const { return (Impl*)p; } + inline bool empty() const { return !p; } protected: Impl* p; public: @@ -571,6 +606,7 @@ public: struct Impl; friend struct Impl; inline Impl* getImpl() const { return (Impl*)p; } + inline bool empty() const { return !p; } protected: Impl* p; }; @@ -579,6 +615,9 @@ class CV_EXPORTS PlatformInfo { public: PlatformInfo(); + /** + * @param id pointer cl_platform_id (cl_platform_id*) + */ explicit PlatformInfo(void* id); ~PlatformInfo(); @@ -591,8 +630,9 @@ public: int deviceNumber() const; void getDevice(Device& device, int d) const; -protected: struct Impl; + bool empty() const { return !p; } +protected: Impl* p; }; @@ -689,6 +729,106 @@ private: CV_EXPORTS MatAllocator* getOpenCLAllocator(); +class CV_EXPORTS_W OpenCLExecutionContext +{ +public: + OpenCLExecutionContext() = default; + ~OpenCLExecutionContext() = default; + + OpenCLExecutionContext(const OpenCLExecutionContext& other) = default; + OpenCLExecutionContext(OpenCLExecutionContext&& other) = default; + + OpenCLExecutionContext& operator=(const OpenCLExecutionContext& other) = default; + OpenCLExecutionContext& operator=(OpenCLExecutionContext&& other) = default; + + /** Get associated ocl::Context */ + Context& getContext() const; + /** Get associated ocl::Device */ + Device& getDevice() const; + /** Get associated ocl::Queue */ + Queue& getQueue() const; + + bool useOpenCL() const; + void setUseOpenCL(bool flag); + + /** Get OpenCL execution context of current thread. + * + * Initialize OpenCL execution context if it is empty + * - create new + * - reuse context of the main thread (threadID = 0) + */ + static OpenCLExecutionContext& getCurrent(); + + /** Get OpenCL execution context of current thread (can be empty) */ + static OpenCLExecutionContext& getCurrentRef(); + + /** Bind this OpenCL execution context to current thread. + * + * Context can't be empty. + * + * @note clFinish is not called for queue of previous execution context + */ + void bind() const; + + /** Creates new execution context with same OpenCV context and device + * + * @param q OpenCL queue + */ + OpenCLExecutionContext cloneWithNewQueue(const ocl::Queue& q) const; + /** @overload */ + OpenCLExecutionContext cloneWithNewQueue() const; + + /** @brief Creates OpenCL execution context + * OpenCV will check if available OpenCL platform has platformName name, then assign context to + * OpenCV and call `clRetainContext` function. The deviceID device will be used as target device and + * new command queue will be created. + * + * @note Lifetime of passed handles is transferred to OpenCV wrappers on success + * + * @param platformName name of OpenCL platform to attach, this string is used to check if platform is available to OpenCV at runtime + * @param platformID ID of platform attached context was created for (cl_platform_id) + * @param context OpenCL context to be attached to OpenCV (cl_context) + * @param deviceID OpenCL device (cl_device_id) + */ + static OpenCLExecutionContext create(const std::string& platformName, void* platformID, void* context, void* deviceID); + + /** @brief Creates OpenCL execution context + * + * @param context non-empty OpenCL context + * @param device non-empty OpenCL device (must be a part of context) + * @param queue non-empty OpenCL queue for provided context and device + */ + static OpenCLExecutionContext create(const Context& context, const Device& device, const ocl::Queue& queue); + /** @overload */ + static OpenCLExecutionContext create(const Context& context, const Device& device); + + struct Impl; + inline bool empty() const { return !p; } + void release(); +protected: + std::shared_ptr p; +}; + +class OpenCLExecutionContextScope +{ + OpenCLExecutionContext ctx_; +public: + inline OpenCLExecutionContextScope(const OpenCLExecutionContext& ctx) + { + CV_Assert(!ctx.empty()); + ctx_ = OpenCLExecutionContext::getCurrentRef(); + ctx.bind(); + } + + inline ~OpenCLExecutionContextScope() + { + if (!ctx_.empty()) + { + ctx_.bind(); + } + } +}; + #ifdef __OPENCV_BUILD namespace internal { diff --git a/modules/core/src/directx.cpp b/modules/core/src/directx.cpp index 56ed26f6f2..c651449d59 100644 --- a/modules/core/src/directx.cpp +++ b/modules/core/src/directx.cpp @@ -458,9 +458,22 @@ Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device) CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't create context for DirectX interop"); } - Context& ctx = Context::getDefault(false); - initializeContextFromHandle(ctx, platforms[found], context, device); - return ctx; + cl_platform_id platform = platforms[found]; + std::string platformName = PlatformInfo(platform).name(); + + OpenCLExecutionContext clExecCtx; + try + { + clExecCtx = OpenCLExecutionContext::create(platformName, platform, context, device); + } + catch (...) + { + clReleaseDevice(device); + clReleaseContext(context); + throw; + } + clExecCtx.bind(); + return const_cast(clExecCtx.getContext()); #endif } @@ -565,10 +578,22 @@ Context& initializeContextFromD3D10Device(ID3D10Device* pD3D10Device) CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't create context for DirectX interop"); } + cl_platform_id platform = platforms[found]; + std::string platformName = PlatformInfo(platform).name(); - Context& ctx = Context::getDefault(false); - initializeContextFromHandle(ctx, platforms[found], context, device); - return ctx; + OpenCLExecutionContext clExecCtx; + try + { + clExecCtx = OpenCLExecutionContext::create(platformName, platform, context, device); + } + catch (...) + { + clReleaseDevice(device); + clReleaseContext(context); + throw; + } + clExecCtx.bind(); + return const_cast(clExecCtx.getContext()); #endif } @@ -675,10 +700,23 @@ Context& initializeContextFromDirect3DDevice9Ex(IDirect3DDevice9Ex* pDirect3DDev CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't create context for DirectX interop"); } - Context& ctx = Context::getDefault(false); - initializeContextFromHandle(ctx, platforms[found], context, device); + cl_platform_id platform = platforms[found]; + std::string platformName = PlatformInfo(platform).name(); + + OpenCLExecutionContext clExecCtx; + try + { + clExecCtx = OpenCLExecutionContext::create(platformName, platform, context, device); + } + catch (...) + { + clReleaseDevice(device); + clReleaseContext(context); + throw; + } + clExecCtx.bind(); g_isDirect3DDevice9Ex = true; - return ctx; + return const_cast(clExecCtx.getContext()); #endif } @@ -785,10 +823,23 @@ Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDirect3DDevice9 CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't create context for DirectX interop"); } - Context& ctx = Context::getDefault(false); - initializeContextFromHandle(ctx, platforms[found], context, device); + cl_platform_id platform = platforms[found]; + std::string platformName = PlatformInfo(platform).name(); + + OpenCLExecutionContext clExecCtx; + try + { + clExecCtx = OpenCLExecutionContext::create(platformName, platform, context, device); + } + catch (...) + { + clReleaseDevice(device); + clReleaseContext(context); + throw; + } + clExecCtx.bind(); g_isDirect3DDevice9Ex = false; - return ctx; + return const_cast(clExecCtx.getContext()); #endif } diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index c1bbd3316c..c5c5b12953 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -829,6 +829,322 @@ public: #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT + +struct OpenCLExecutionContext::Impl +{ + ocl::Context context_; + int device_; // device index in context + ocl::Queue queue_; + int useOpenCL_; + +protected: + Impl() = delete; + + void _init_device(cl_device_id deviceID) + { + CV_Assert(deviceID); + int ndevices = (int)context_.ndevices(); + CV_Assert(ndevices > 0); + bool found = false; + for (int i = 0; i < ndevices; i++) + { + ocl::Device d = context_.device(i); + cl_device_id dhandle = (cl_device_id)d.ptr(); + if (dhandle == deviceID) + { + device_ = i; + found = true; + break; + } + } + CV_Assert(found && "OpenCL device can't work with passed OpenCL context"); + } + + void _init_device(const ocl::Device& device) + { + CV_Assert(device.ptr()); + int ndevices = (int)context_.ndevices(); + CV_Assert(ndevices > 0); + bool found = false; + for (int i = 0; i < ndevices; i++) + { + ocl::Device d = context_.device(i); + if (d.getImpl() == device.getImpl()) + { + device_ = i; + found = true; + break; + } + } + CV_Assert(found && "OpenCL device can't work with passed OpenCL context"); + } + +public: + Impl(cl_platform_id platformID, cl_context context, cl_device_id deviceID) + : device_(0), useOpenCL_(-1) + { + CV_UNUSED(platformID); + CV_Assert(context); + CV_Assert(deviceID); + + context_ = Context::fromHandle(context); + _init_device(deviceID); + queue_ = Queue(context_, context_.device(device_)); + } + + Impl(const ocl::Context& context, const ocl::Device& device, const ocl::Queue& queue) + : device_(0), useOpenCL_(-1) + { + CV_Assert(context.ptr()); + CV_Assert(device.ptr()); + + context_ = context; + _init_device(device); + queue_ = queue; + } + + Impl(const ocl::Context& context, const ocl::Device& device) + : device_(0), useOpenCL_(-1) + { + CV_Assert(context.ptr()); + CV_Assert(device.ptr()); + + context_ = context; + _init_device(device); + queue_ = Queue(context_, context_.device(device_)); + } + + Impl(const ocl::Context& context, const int device, const ocl::Queue& queue) + : context_(context) + , device_(device) + , queue_(queue) + , useOpenCL_(-1) + { + // nothing + } + Impl(const Impl& other) + : context_(other.context_) + , device_(other.device_) + , queue_(other.queue_) + , useOpenCL_(-1) + { + // nothing + } + + inline bool useOpenCL() const { return const_cast(this)->useOpenCL(); } + bool useOpenCL() + { + if (useOpenCL_ < 0) + { + try + { + useOpenCL_ = 0; + if (!context_.empty() && context_.ndevices() > 0) + { + const Device& d = context_.device(device_); + useOpenCL_ = d.available(); + } + } + catch (const cv::Exception&) + { + // nothing + } + if (!useOpenCL_) + CV_LOG_INFO(NULL, "OpenCL: can't use OpenCL execution context"); + } + return useOpenCL_ > 0; + } + + void setUseOpenCL(bool flag) + { + if (!flag) + useOpenCL_ = 0; + else + useOpenCL_ = -1; + } + + static const std::shared_ptr& getInitializedExecutionContext() + { + CV_TRACE_FUNCTION(); + + CV_LOG_INFO(NULL, "OpenCL: initializing thread execution context"); + + static bool initialized = false; + static std::shared_ptr g_primaryExecutionContext; + + if (!initialized) + { + cv::AutoLock lock(getInitializationMutex()); + if (!initialized) + { + CV_LOG_INFO(NULL, "OpenCL: creating new execution context..."); + try + { + Context c = ocl::Context::create(std::string()); + if (c.ndevices()) + { + int deviceId = 0; + auto& d = c.device(deviceId); + if (d.available()) + { + auto q = ocl::Queue(c, d); + if (!q.ptr()) + { + CV_LOG_ERROR(NULL, "OpenCL: Can't create default OpenCL queue"); + } + else + { + g_primaryExecutionContext = std::make_shared(c, deviceId, q); + CV_LOG_INFO(NULL, "OpenCL: device=" << d.name()); + } + } + else + { + CV_LOG_ERROR(NULL, "OpenCL: OpenCL device is not available (CL_DEVICE_AVAILABLE returns false)"); + } + } + else + { + CV_LOG_INFO(NULL, "OpenCL: context is not available/disabled"); + } + } + catch (const std::exception& e) + { + CV_LOG_INFO(NULL, "OpenCL: Can't initialize OpenCL context/device/queue: " << e.what()); + } + catch (...) + { + CV_LOG_WARNING(NULL, "OpenCL: Can't initialize OpenCL context/device/queue: unknown C++ exception"); + } + initialized = true; + } + } + return g_primaryExecutionContext; + } +}; + +Context& OpenCLExecutionContext::getContext() const +{ + CV_Assert(p); + return p->context_; +} +Device& OpenCLExecutionContext::getDevice() const +{ + CV_Assert(p); + return p->context_.device(p->device_); +} +Queue& OpenCLExecutionContext::getQueue() const +{ + CV_Assert(p); + return p->queue_; +} + +bool OpenCLExecutionContext::useOpenCL() const +{ + if (p) + return p->useOpenCL(); + return false; +} +void OpenCLExecutionContext::setUseOpenCL(bool flag) +{ + CV_Assert(p); + p->setUseOpenCL(flag); +} + +/* static */ +OpenCLExecutionContext& OpenCLExecutionContext::getCurrent() +{ + CV_TRACE_FUNCTION(); + CoreTLSData& data = getCoreTlsData(); + OpenCLExecutionContext& c = data.oclExecutionContext; + if (!data.oclExecutionContextInitialized) + { + data.oclExecutionContextInitialized = true; + if (c.empty() && haveOpenCL()) + c.p = Impl::getInitializedExecutionContext(); + } + return c; +} + +/* static */ +OpenCLExecutionContext& OpenCLExecutionContext::getCurrentRef() +{ + CV_TRACE_FUNCTION(); + CoreTLSData& data = getCoreTlsData(); + OpenCLExecutionContext& c = data.oclExecutionContext; + return c; +} + +void OpenCLExecutionContext::bind() const +{ + CV_TRACE_FUNCTION(); + CV_Assert(p); + CoreTLSData& data = getCoreTlsData(); + data.oclExecutionContext = *this; + data.oclExecutionContextInitialized = true; + data.useOpenCL = p->useOpenCL_; // propagate "-1", avoid call useOpenCL() +} + + +OpenCLExecutionContext OpenCLExecutionContext::cloneWithNewQueue() const +{ + CV_TRACE_FUNCTION(); + CV_Assert(p); + const Queue q(getContext(), getDevice()); + return cloneWithNewQueue(q); +} + +OpenCLExecutionContext OpenCLExecutionContext::cloneWithNewQueue(const ocl::Queue& q) const +{ + CV_TRACE_FUNCTION(); + CV_Assert(p); + CV_Assert(q.ptr() != NULL); + OpenCLExecutionContext c; + c.p = std::make_shared(p->context_, p->device_, q); + return c; +} + +/* static */ +OpenCLExecutionContext OpenCLExecutionContext::create(const Context& context, const Device& device, const ocl::Queue& queue) +{ + CV_TRACE_FUNCTION(); + if (!haveOpenCL()) + CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!"); + + CV_Assert(!context.empty()); + CV_Assert(context.ptr()); + CV_Assert(!device.empty()); + CV_Assert(device.ptr()); + OpenCLExecutionContext ctx; + ctx.p = std::make_shared(context, device, queue); + return ctx; + +} + +/* static */ +OpenCLExecutionContext OpenCLExecutionContext::create(const Context& context, const Device& device) +{ + CV_TRACE_FUNCTION(); + if (!haveOpenCL()) + CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!"); + + CV_Assert(!context.empty()); + CV_Assert(context.ptr()); + CV_Assert(!device.empty()); + CV_Assert(device.ptr()); + OpenCLExecutionContext ctx; + ctx.p = std::make_shared(context, device); + return ctx; + +} + +void OpenCLExecutionContext::release() +{ + CV_TRACE_FUNCTION(); + p.reset(); +} + + // true if we have initialized OpenCL subsystem with available platforms static bool g_isOpenCVActivated = false; @@ -848,14 +1164,18 @@ bool haveOpenCL() { g_isOpenCLAvailable = false; g_isOpenCLInitialized = true; + return false; } } + + cv::AutoLock lock(getInitializationMutex()); CV_LOG_INFO(NULL, "Initialize OpenCL runtime..."); try { cl_uint n = 0; g_isOpenCLAvailable = ::clGetPlatformIDs(0, NULL, &n) == CL_SUCCESS; g_isOpenCVActivated = n > 0; + CV_LOG_INFO(NULL, "OpenCL: found " << n << " platforms"); } catch (...) { @@ -873,11 +1193,16 @@ bool useOpenCL() { try { - data.useOpenCL = (int)(haveOpenCL() && Device::getDefault().ptr() && Device::getDefault().available()) ? 1 : 0; + data.useOpenCL = 0; + if (haveOpenCL()) + { + auto c = OpenCLExecutionContext::getCurrent(); + data.useOpenCL = c.useOpenCL(); + } } catch (...) { - data.useOpenCL = 0; + CV_LOG_INFO(NULL, "OpenCL: can't initialize thread OpenCL execution context"); } } return data.useOpenCL > 0; @@ -895,16 +1220,23 @@ void setUseOpenCL(bool flag) CV_TRACE_FUNCTION(); CoreTLSData& data = getCoreTlsData(); - if (!flag) + auto& c = OpenCLExecutionContext::getCurrentRef(); + if (!c.empty()) { - data.useOpenCL = 0; + c.setUseOpenCL(flag); + data.useOpenCL = c.useOpenCL(); } - else if( haveOpenCL() ) + else { - data.useOpenCL = (Device::getDefault().ptr() != NULL) ? 1 : 0; + if (!flag) + data.useOpenCL = 0; + else + data.useOpenCL = -1; // enabled by default (if context is not initialized) } } + + #ifdef HAVE_CLAMDBLAS class AmdBlasHelper @@ -1151,6 +1483,7 @@ void* Platform::ptr() const Platform& Platform::getDefault() { + CV_LOG_ONCE_WARNING(NULL, "OpenCL: Platform::getDefault() is deprecated and will be removed. Use cv::ocl::getPlatfomsInfo() for enumeration of available platforms"); static Platform p; if( !p.p ) { @@ -1187,9 +1520,24 @@ static void parseDeviceVersion(const String &deviceVersion, int &major, int &min struct Device::Impl { Impl(void* d) + : refcount(1) + , handle(0) + { + try + { + cl_device_id device = (cl_device_id)d; + _init(device); + CV_OCL_CHECK(clRetainDevice(device)); // increment reference counter on success only + } + catch (...) + { + throw; + } + } + + void _init(cl_device_id d) { handle = (cl_device_id)d; - refcount = 1; name_ = getStrProp(CL_DEVICE_NAME); version_ = getStrProp(CL_DEVICE_VERSION); @@ -1252,6 +1600,20 @@ struct Device::Impl #endif } + ~Impl() + { +#ifdef _WIN32 + if (!cv::__termination) +#endif + { + if (handle) + { + CV_OCL_CHECK(clReleaseDevice(handle)); + handle = 0; + } + } + } + template _TpOut getProp(cl_device_info prop) const { @@ -1349,6 +1711,16 @@ void Device::set(void* d) if(p) p->release(); p = new Impl(d); + if (p->handle) + { + CV_OCL_CHECK(clReleaseDevice((cl_device_id)d)); + } +} + +Device Device::fromHandle(void* d) +{ + Device device(d); + return device; } void* Device::ptr() const @@ -1611,10 +1983,14 @@ size_t Device::profilingTimerResolution() const const Device& Device::getDefault() { - const Context& ctx = Context::getDefault(); - int idx = getCoreTlsData().device; - const Device& device = ctx.device(idx); - return device; + auto& c = OpenCLExecutionContext::getCurrent(); + if (!c.empty()) + { + return c.getDevice(); + } + + static Device dummy; + return dummy; } ////////////////////////////////////// Context /////////////////////////////////////////////////// @@ -1666,7 +2042,7 @@ static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr, split(configurationStr, ':', parts); if (parts.size() > 3) { - std::cerr << "ERROR: Invalid configuration string for OpenCL device" << std::endl; + CV_LOG_ERROR(NULL, "OpenCL: Invalid configuration string for OpenCL device: " << configurationStr); return false; } if (parts.size() > 2) @@ -1683,17 +2059,20 @@ static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr, } #if defined WINRT || defined _WIN32_WCE -static cl_device_id selectOpenCLDevice() +static cl_device_id selectOpenCLDevice(const char* configuration = NULL) { + CV_UNUSED(configuration) return NULL; } #else -static cl_device_id selectOpenCLDevice() +static cl_device_id selectOpenCLDevice(const char* configuration = NULL) { std::string platform, deviceName; std::vector deviceTypes; - const char* configuration = getenv("OPENCV_OPENCL_DEVICE"); + if (!configuration) + configuration = getenv("OPENCV_OPENCL_DEVICE"); + if (configuration && (strcmp(configuration, "disabled") == 0 || !parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName) @@ -1753,7 +2132,7 @@ static cl_device_id selectOpenCLDevice() } if (selectedPlatform == -1) { - std::cerr << "ERROR: Can't find OpenCL platform by name: " << platform << std::endl; + CV_LOG_ERROR(NULL, "OpenCL: Can't find OpenCL platform by name: " << platform); goto not_found; } } @@ -1784,7 +2163,7 @@ static cl_device_id selectOpenCLDevice() deviceType = Device::TYPE_ALL; else { - std::cerr << "ERROR: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t] << std::endl; + CV_LOG_ERROR(NULL, "OpenCL: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t]); goto not_found; } @@ -1835,13 +2214,16 @@ not_found: if (!configuration) return NULL; // suppress messages on stderr - std::cerr << "ERROR: Requested OpenCL device not found, check configuration: " << configuration << std::endl - << " Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl - << " Device types: "; + std::ostringstream msg; + msg << "ERROR: Requested OpenCL device not found, check configuration: '" << configuration << "'" << std::endl + << " Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl + << " Device types:"; for (size_t t = 0; t < deviceTypes.size(); t++) - std::cerr << deviceTypes[t] << " "; + msg << ' ' << deviceTypes[t]; + + msg << std::endl << " Device name: " << (deviceName.length() == 0 ? "any" : deviceName); - std::cerr << std::endl << " Device name: " << (deviceName.length() == 0 ? "any" : deviceName) << std::endl; + CV_LOG_ERROR(NULL, msg.str()); return NULL; } #endif @@ -1922,131 +2304,252 @@ static size_t getProgramCountLimit() return count; } +static int g_contextId = 0; + +class OpenCLBufferPoolImpl; +class OpenCLSVMBufferPoolImpl; + struct Context::Impl { static Context::Impl* get(Context& context) { return context.p; } - void __init() + typedef std::deque container_t; + static container_t& getGlobalContainer() { - refcount = 1; - handle = 0; + static container_t g_contexts; + return g_contexts; + } + +protected: + Impl(const std::string& configuration_) + : refcount(1) + , contextId(CV_XADD(&g_contextId, 1)) + , configuration(configuration_) + , handle(0) #ifdef HAVE_OPENCL_SVM - svmInitialized = false; + , svmInitialized(false) #endif + { + if (!haveOpenCL()) + CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!"); + + cv::AutoLock lock(cv::getInitializationMutex()); + auto& container = getGlobalContainer(); + container.resize(std::max(container.size(), (size_t)contextId + 1)); + container[contextId] = this; } - Impl() + ~Impl() { - __init(); +#ifdef _WIN32 + if (!cv::__termination) +#endif + { + if (handle) + { + CV_OCL_DBG_CHECK(clReleaseContext(handle)); + handle = NULL; + } + devices.clear(); + } + + { + cv::AutoLock lock(cv::getInitializationMutex()); + auto& container = getGlobalContainer(); + CV_Assert((size_t)contextId < container.size()); + container[contextId] = NULL; + } } - void setDefault() + void init_device_list() { - CV_Assert(handle == NULL); + CV_Assert(handle); - cl_device_id d = selectOpenCLDevice(); + cl_uint ndevices = 0; + CV_OCL_CHECK(clGetContextInfo(handle, CL_CONTEXT_NUM_DEVICES, sizeof(ndevices), &ndevices, NULL)); + CV_Assert(ndevices > 0); - if (d == NULL) - return; + cv::AutoBuffer cl_devices(ndevices); + size_t devices_ret_size = 0; + CV_OCL_CHECK(clGetContextInfo(handle, CL_CONTEXT_DEVICES, cl_devices.size() * sizeof(cl_device_id), &cl_devices[0], &devices_ret_size)); + CV_CheckEQ(devices_ret_size, cl_devices.size() * sizeof(cl_device_id), ""); - cl_platform_id pl = NULL; - CV_OCL_DBG_CHECK(clGetDeviceInfo(d, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &pl, NULL)); + devices.clear(); + for (unsigned i = 0; i < ndevices; i++) + { + devices.emplace_back(Device::fromHandle(cl_devices[i])); + } + } - cl_context_properties prop[] = + void __init_buffer_pools(); // w/o synchronization + void _init_buffer_pools() const + { + if (!bufferPool_) { - CL_CONTEXT_PLATFORM, (cl_context_properties)pl, - 0 - }; + cv::AutoLock lock(cv::getInitializationMutex()); + if (!bufferPool_) + { + const_cast(this)->__init_buffer_pools(); + } + } + } +public: + static Impl* findContext(const std::string& configuration) + { + CV_TRACE_FUNCTION(); + cv::AutoLock lock(cv::getInitializationMutex()); + auto& container = getGlobalContainer(); + if (configuration.empty() && !container.empty()) + return container[0]; + for (auto it = container.begin(); it != container.end(); ++it) + { + Impl* i = *it; + if (i && i->configuration == configuration) + { + return i; + } + } + return NULL; + } - // !!! in the current implementation force the number of devices to 1 !!! - cl_uint nd = 1; - cl_int status; + static Impl* findOrCreateContext(const std::string& configuration_) + { + CV_TRACE_FUNCTION(); + std::string configuration = configuration_; + if (configuration_.empty()) + { + const char* c = getenv("OPENCV_OPENCL_DEVICE"); + if (c) + configuration = c; + } + Impl* impl = findContext(configuration); + if (impl) + { + CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration) + return impl; + } - handle = clCreateContext(prop, nd, &d, 0, 0, &status); - CV_OCL_DBG_CHECK_RESULT(status, "clCreateContext"); + cl_device_id d = selectOpenCLDevice(configuration.empty() ? NULL : configuration.c_str()); + if (d == NULL) + return NULL; - bool ok = handle != 0 && status == CL_SUCCESS; - if( ok ) + impl = new Impl(configuration); + try { - devices.resize(nd); - devices[0].set(d); + impl->createFromDevice(d); + if (impl->handle) + return impl; + delete impl; + return NULL; + } + catch (...) + { + delete impl; + throw; } - else - handle = NULL; } - Impl(int dtype0) + static Impl* findOrCreateContext(cl_context h) { - __init(); + CV_TRACE_FUNCTION(); - cl_int retval = 0; - cl_platform_id pl = (cl_platform_id)Platform::getDefault().ptr(); - cl_context_properties prop[] = + CV_Assert(h); + + std::string configuration = cv::format("@ctx-%p", (void*)h); + Impl* impl = findContext(configuration); + if (impl) { - CL_CONTEXT_PLATFORM, (cl_context_properties)pl, - 0 - }; + CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration) + impl->addref(); + return impl; + } - cl_uint nd0 = 0; - int dtype = dtype0 & 15; - cl_int status = clGetDeviceIDs(pl, dtype, 0, NULL, &nd0); - if (status != CL_DEVICE_NOT_FOUND) // Not an error if platform has no devices + impl = new Impl(configuration); + try + { + CV_OCL_CHECK(clRetainContext(h)); + impl->handle = h; + impl->init_device_list(); + return impl; + } + catch (...) { - CV_OCL_DBG_CHECK_RESULT(status, - cv::format("clGetDeviceIDs(platform=%p, device_type=%d, num_entries=0, devices=NULL, numDevices=%p)", pl, dtype, &nd0).c_str()); + delete impl; + throw; } + } - if (nd0 == 0) - return; + static Impl* findOrCreateContext(const ocl::Device& device) + { + CV_TRACE_FUNCTION(); - AutoBuffer dlistbuf(nd0*2+1); - cl_device_id* dlist = (cl_device_id*)dlistbuf.data(); - cl_device_id* dlist_new = dlist + nd0; - CV_OCL_DBG_CHECK(clGetDeviceIDs(pl, dtype, nd0, dlist, &nd0)); + CV_Assert(!device.empty()); + cl_device_id d = (cl_device_id)device.ptr(); + CV_Assert(d); - cl_uint i, nd = 0; - String name0; - for(i = 0; i < nd0; i++) + std::string configuration = cv::format("@dev-%p", (void*)d); + Impl* impl = findContext(configuration); + if (impl) { - Device d(dlist[i]); - if( !d.available() || !d.compilerAvailable() ) - continue; - if( dtype0 == Device::TYPE_DGPU && d.hostUnifiedMemory() ) - continue; - if( dtype0 == Device::TYPE_IGPU && !d.hostUnifiedMemory() ) - continue; - String name = d.name(); - if( nd != 0 && name != name0 ) - continue; - name0 = name; - dlist_new[nd++] = dlist[i]; + CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration) + impl->addref(); + return impl; + } + + impl = new Impl(configuration); + try + { + impl->createFromDevice(d); + CV_Assert(impl->handle); + return impl; + } + catch (...) + { + delete impl; + throw; } + } - if(nd == 0) + void setDefault() + { + CV_TRACE_FUNCTION(); + cl_device_id d = selectOpenCLDevice(); + + if (d == NULL) return; + createFromDevice(d); + } + + void createFromDevice(cl_device_id d) + { + CV_TRACE_FUNCTION(); + CV_Assert(handle == NULL); + + cl_platform_id pl = NULL; + CV_OCL_DBG_CHECK(clGetDeviceInfo(d, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &pl, NULL)); + + cl_context_properties prop[] = + { + CL_CONTEXT_PLATFORM, (cl_context_properties)pl, + 0 + }; + // !!! in the current implementation force the number of devices to 1 !!! - nd = 1; + cl_uint nd = 1; + cl_int status; + + handle = clCreateContext(prop, nd, &d, 0, 0, &status); + CV_OCL_DBG_CHECK_RESULT(status, "clCreateContext"); - handle = clCreateContext(prop, nd, dlist_new, 0, 0, &retval); - CV_OCL_DBG_CHECK_RESULT(retval, "clCreateContext"); - bool ok = handle != 0 && retval == CL_SUCCESS; + bool ok = handle != 0 && status == CL_SUCCESS; if( ok ) { devices.resize(nd); - for( i = 0; i < nd; i++ ) - devices[i].set(dlist_new[i]); + devices[0].set(d); } - } - - ~Impl() - { - if(handle) - { - CV_OCL_DBG_CHECK(clReleaseContext(handle)); + else handle = NULL; - } - devices.clear(); } Program getProg(const ProgramSource& src, const String& buildflags, String& errmsg); @@ -2124,6 +2627,9 @@ struct Context::Impl IMPLEMENT_REFCOUNTABLE(); + const int contextId; // global unique ID + const std::string configuration; + cl_context handle; std::vector devices; @@ -2136,6 +2642,21 @@ struct Context::Impl typedef std::list CacheList; CacheList cacheList; + std::shared_ptr bufferPool_; + std::shared_ptr bufferPoolHostPtr_; + OpenCLBufferPoolImpl& getBufferPool() const + { + _init_buffer_pools(); + CV_DbgAssert(bufferPool_); + return *bufferPool_.get(); + } + OpenCLBufferPoolImpl& getBufferPoolHostPtr() const + { + _init_buffer_pools(); + CV_DbgAssert(bufferPoolHostPtr_); + return *bufferPoolHostPtr_.get(); + } + #ifdef HAVE_OPENCL_SVM bool svmInitialized; bool svmAvailable; @@ -2271,6 +2792,15 @@ struct Context::Impl svmFunctions.fn_clSVMAlloc = NULL; return; } + + std::shared_ptr bufferPoolSVM_; + + OpenCLSVMBufferPoolImpl& getBufferPoolSVM() const + { + _init_buffer_pools(); + CV_DbgAssert(bufferPoolSVM_); + return *bufferPoolSVM_.get(); + } #endif friend class Program; @@ -2282,49 +2812,66 @@ Context::Context() p = 0; } +Context::~Context() +{ + release(); +} + +// deprecated Context::Context(int dtype) { p = 0; create(dtype); } -bool Context::create() +void Context::release() { - if( !haveOpenCL() ) - return false; - if(p) - p->release(); - p = new Impl(); - if(!p->handle) + if (p) { - delete p; - p = 0; + p->release(); + p = NULL; } - return p != 0; } -bool Context::create(int dtype0) +bool Context::create() { - if( !haveOpenCL() ) + release(); + if (!haveOpenCL()) return false; - if(p) - p->release(); - p = new Impl(dtype0); - if(!p->handle) - { - delete p; - p = 0; - } - return p != 0; + p = Impl::findOrCreateContext(std::string()); + if (p->handle) + return true; + release(); + return false; } -Context::~Context() +// deprecated +bool Context::create(int dtype) { - if (p) + if( !haveOpenCL() ) + return false; + release(); + if (dtype == CL_DEVICE_TYPE_DEFAULT || (unsigned)dtype == (unsigned)CL_DEVICE_TYPE_ALL) { - p->release(); - p = NULL; + p = Impl::findOrCreateContext(""); + } + else if (dtype == CL_DEVICE_TYPE_GPU) + { + p = Impl::findOrCreateContext(":GPU:"); } + else if (dtype == CL_DEVICE_TYPE_CPU) + { + p = Impl::findOrCreateContext(":CPU:"); + } + else + { + CV_LOG_ERROR(NULL, "OpenCL: Can't recognize OpenCV device type=" << dtype); + } + if (p && !p->handle) + { + release(); + } + return p != 0; } Context::Context(const Context& c) @@ -2355,7 +2902,7 @@ size_t Context::ndevices() const return p ? p->devices.size() : 0; } -const Device& Context::device(size_t idx) const +Device& Context::device(size_t idx) const { static Device dummy; return !p || idx >= p->devices.size() ? dummy : p->devices[idx]; @@ -2363,23 +2910,16 @@ const Device& Context::device(size_t idx) const Context& Context::getDefault(bool initialize) { - static Context* ctx = new Context(); - if(!ctx->p && haveOpenCL()) + auto& c = OpenCLExecutionContext::getCurrent(); + if (!c.empty()) { - if (!ctx->p) - ctx->p = new Impl(); - if (initialize) - { - // do not create new Context right away. - // First, try to retrieve existing context of the same type. - // In its turn, Platform::getContext() may call Context::create() - // if there is no such context. - if (ctx->p->handle == NULL) - ctx->p->setDefault(); - } + auto& ctx = c.getContext(); + return ctx; } - return *ctx; + CV_UNUSED(initialize); + static Context dummy; + return dummy; } Program Context::getProg(const ProgramSource& prog, @@ -2394,6 +2934,30 @@ void Context::unloadProg(Program& prog) p->unloadProg(prog); } +/* static */ +Context Context::fromHandle(void* context) +{ + Context ctx; + ctx.p = Impl::findOrCreateContext((cl_context)context); + return ctx; +} + +/* static */ +Context Context::fromDevice(const ocl::Device& device) +{ + Context ctx; + ctx.p = Impl::findOrCreateContext(device); + return ctx; +} + +/* static */ +Context Context::create(const std::string& configuration) +{ + Context ctx; + ctx.p = Impl::findOrCreateContext(configuration); + return ctx; +} + #ifdef HAVE_OPENCL_SVM bool Context::useSVM() const { @@ -2477,12 +3041,23 @@ static void get_platform_name(cl_platform_id id, String& name) */ void attachContext(const String& platformName, void* platformID, void* context, void* deviceID) { - cl_uint cnt = 0; + auto ctx = OpenCLExecutionContext::create(platformName, platformID, context, deviceID); + ctx.bind(); +} +/* static */ +OpenCLExecutionContext OpenCLExecutionContext::create( + const std::string& platformName, void* platformID, void* context, void* deviceID +) +{ + if (!haveOpenCL()) + CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!"); + + cl_uint cnt = 0; CV_OCL_CHECK(clGetPlatformIDs(0, 0, &cnt)); if (cnt == 0) - CV_Error(cv::Error::OpenCLApiCallError, "no OpenCL platform available!"); + CV_Error(cv::Error::OpenCLApiCallError, "No OpenCL platform available!"); std::vector platforms(cnt); @@ -2512,44 +3087,25 @@ void attachContext(const String& platformName, void* platformID, void* context, if (platformName != actualPlatformName) CV_Error(cv::Error::OpenCLApiCallError, "No matched platforms available!"); - // do not initialize OpenCL context - Context ctx = Context::getDefault(false); - - // attach supplied context to OpenCV - initializeContextFromHandle(ctx, platformID, context, deviceID); - - CV_OCL_CHECK(clRetainContext((cl_context)context)); - - // clear command queue, if any - CoreTLSData& data = getCoreTlsData(); - data.oclQueue.finish(); - Queue q; - data.oclQueue = q; - - return; -} // attachContext() - + OpenCLExecutionContext ctx; + ctx.p = std::make_shared((cl_platform_id)platformID, (cl_context)context, (cl_device_id)deviceID); + CV_OCL_CHECK(clReleaseContext((cl_context)context)); + CV_OCL_CHECK(clReleaseDevice((cl_device_id)deviceID)); + return ctx; +} -void initializeContextFromHandle(Context& ctx, void* platform, void* _context, void* _device) +void initializeContextFromHandle(Context& ctx, void* _platform, void* _context, void* _device) { + // internal call, less checks + cl_platform_id platformID = (cl_platform_id)_platform; cl_context context = (cl_context)_context; - cl_device_id device = (cl_device_id)_device; - - // cleanup old context - Context::Impl * impl = ctx.p; - if (impl->handle) - { - CV_OCL_DBG_CHECK(clReleaseContext(impl->handle)); - } - impl->devices.clear(); + cl_device_id deviceID = (cl_device_id)_device; - impl->handle = context; - impl->devices.resize(1); - impl->devices[0].set(device); + std::string platformName = PlatformInfo(platformID).name(); - Platform& p = Platform::getDefault(); - Platform::Impl* pImpl = p.p; - pImpl->handle = (cl_platform_id)platform; + auto clExecCtx = OpenCLExecutionContext::create(platformName, platformID, context, deviceID); + CV_Assert(!clExecCtx.empty()); + ctx = clExecCtx.getContext(); } /////////////////////////////////////////// Queue ///////////////////////////////////////////// @@ -2712,10 +3268,14 @@ void* Queue::ptr() const Queue& Queue::getDefault() { - Queue& q = getCoreTlsData().oclQueue; - if( !q.p && haveOpenCL() ) - q.create(Context::getDefault()); - return q; + auto& c = OpenCLExecutionContext::getCurrent(); + if (!c.empty()) + { + auto& q = c.getQueue(); + return q; + } + static Queue dummy; + return dummy; } static cl_command_queue getQueue(const Queue& q) @@ -4478,14 +5038,32 @@ private: #define CV_OPENCL_DATA_PTR_ALIGNMENT 16 #endif -class OpenCLAllocator CV_FINAL : public MatAllocator + +void Context::Impl::__init_buffer_pools() { - mutable OpenCLBufferPoolImpl bufferPool; - mutable OpenCLBufferPoolImpl bufferPoolHostPtr; -#ifdef HAVE_OPENCL_SVM - mutable OpenCLSVMBufferPoolImpl bufferPoolSVM; + bufferPool_ = std::make_shared(0); + OpenCLBufferPoolImpl& bufferPool = *bufferPool_.get(); + bufferPoolHostPtr_ = std::make_shared(CL_MEM_ALLOC_HOST_PTR); + OpenCLBufferPoolImpl& bufferPoolHostPtr = *bufferPoolHostPtr_.get(); + + size_t defaultPoolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0; + size_t poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_BUFFERPOOL_LIMIT", defaultPoolSize); + bufferPool.setMaxReservedSize(poolSize); + size_t poolSizeHostPtr = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_HOST_PTR_BUFFERPOOL_LIMIT", defaultPoolSize); + bufferPoolHostPtr.setMaxReservedSize(poolSizeHostPtr); + +#ifdef HAVE_OPENCL_SVM + bufferPoolSVM_ = std::make_shared(); + OpenCLSVMBufferPoolImpl& bufferPoolSVM = *bufferPoolSVM_.get(); + size_t poolSizeSVM = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_SVM_BUFFERPOOL_LIMIT", defaultPoolSize); + bufferPoolSVM.setMaxReservedSize(poolSizeSVM); #endif + CV_LOG_INFO(NULL, "OpenCL: Initializing buffer pool for context@" << contextId << " with max capacity: poolSize=" << poolSize << " poolSizeHostPtr=" << poolSizeHostPtr); +} + +class OpenCLAllocator CV_FINAL : public MatAllocator +{ public: enum AllocatorFlags { @@ -4498,20 +5076,7 @@ public: }; OpenCLAllocator() - : bufferPool(0), - bufferPoolHostPtr(CL_MEM_ALLOC_HOST_PTR) - { - size_t defaultPoolSize, poolSize; - defaultPoolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0; - poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_BUFFERPOOL_LIMIT", defaultPoolSize); - bufferPool.setMaxReservedSize(poolSize); - poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_HOST_PTR_BUFFERPOOL_LIMIT", defaultPoolSize); - bufferPoolHostPtr.setMaxReservedSize(poolSize); -#ifdef HAVE_OPENCL_SVM - poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_SVM_BUFFERPOOL_LIMIT", defaultPoolSize); - bufferPoolSVM.setMaxReservedSize(poolSize); -#endif - + { matStdAllocator = Mat::getDefaultAllocator(); } ~OpenCLAllocator() @@ -4563,6 +5128,9 @@ public: { if(!useOpenCL()) return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags); + + flushCleanupQueue(); + CV_Assert(data == 0); size_t total = CV_ELEM_SIZE(type); for( int i = dims-1; i >= 0; i-- ) @@ -4573,7 +5141,9 @@ public: } Context& ctx = Context::getDefault(); - flushCleanupQueue(); + if (!ctx.getImpl()) + return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags); + Context::Impl& ctxImpl = *ctx.getImpl(); int createFlags = 0; UMatData::MemoryFlag flags0 = static_cast(0); @@ -4587,7 +5157,7 @@ public: if (ctx.useSVM() && svm::useSVM(usageFlags) && !svmCaps.isNoSVMSupport()) { allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED; - handle = bufferPoolSVM.allocate(total); + handle = ctxImpl.getBufferPoolSVM().allocate(total); // this property is constant, so single buffer pool can be used here bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer(); @@ -4598,12 +5168,12 @@ public: if (createFlags == 0) { allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_USED; - handle = bufferPool.allocate(total); + handle = ctxImpl.getBufferPool().allocate(total); } else if (createFlags == CL_MEM_ALLOC_HOST_PTR) { allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED; - handle = bufferPoolHostPtr.allocate(total); + handle = ctxImpl.getBufferPoolHostPtr().allocate(total); } else { @@ -4619,6 +5189,7 @@ public: u->handle = handle; u->flags = flags0; u->allocatorFlags_ = allocatorFlags; + u->allocatorContext = std::static_pointer_cast(std::make_shared(ctx)); CV_DbgAssert(!u->tempUMat()); // for bufferPool.release() consistency in deallocate() u->markHostCopyObsolete(true); opencl_allocator_stats.onAllocate(u->size); @@ -4931,15 +5502,26 @@ public: } if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_USED) { - bufferPool.release((cl_mem)u->handle); + std::shared_ptr pCtx = std::static_pointer_cast(u->allocatorContext); + CV_Assert(pCtx); + ocl::Context& ctx = *pCtx.get(); + CV_Assert(ctx.getImpl()); + ctx.getImpl()->getBufferPool().release((cl_mem)u->handle); } else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED) { - bufferPoolHostPtr.release((cl_mem)u->handle); + std::shared_ptr pCtx = std::static_pointer_cast(u->allocatorContext); + CV_Assert(pCtx); + ocl::Context& ctx = *pCtx.get(); + CV_Assert(ctx.getImpl()); + ctx.getImpl()->getBufferPoolHostPtr().release((cl_mem)u->handle); } #ifdef HAVE_OPENCL_SVM else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED) { + std::shared_ptr pCtx = std::static_pointer_cast(u->allocatorContext); + CV_Assert(pCtx); + ocl::Context& ctx = *pCtx.get(); if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM) { //nothing @@ -4947,7 +5529,6 @@ public: else if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER || (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER) { - Context& ctx = Context::getDefault(); const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); CV_DbgAssert(svmFns->isValid()); cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); @@ -4959,7 +5540,8 @@ public: CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()"); } } - bufferPoolSVM.release((void*)u->handle); + CV_Assert(ctx.getImpl()); + ctx.getImpl()->getBufferPoolSVM().release((void*)u->handle); } #endif else @@ -5675,22 +6257,26 @@ public: } } - BufferPoolController* getBufferPoolController(const char* id) const CV_OVERRIDE { + BufferPoolController* getBufferPoolController(const char* id) const CV_OVERRIDE + { + ocl::Context ctx = Context::getDefault(); + if (ctx.empty()) + return NULL; #ifdef HAVE_OPENCL_SVM if ((svm::checkForceSVMUmatUsage() && (id == NULL || strcmp(id, "OCL") == 0)) || (id != NULL && strcmp(id, "SVM") == 0)) { - return &bufferPoolSVM; + return &ctx.getImpl()->getBufferPoolSVM(); } #endif if (id != NULL && strcmp(id, "HOST_ALLOC") == 0) { - return &bufferPoolHostPtr; + return &ctx.getImpl()->getBufferPoolHostPtr(); } if (id != NULL && strcmp(id, "OCL") != 0) { CV_Error(cv::Error::StsBadArg, "getBufferPoolController(): unknown BufferPool ID\n"); } - return &bufferPool; + return &ctx.getImpl()->getBufferPool(); } MatAllocator* matStdAllocator; diff --git a/modules/core/src/ocl_disabled.impl.hpp b/modules/core/src/ocl_disabled.impl.hpp index a0516476bf..97c3856b37 100644 --- a/modules/core/src/ocl_disabled.impl.hpp +++ b/modules/core/src/ocl_disabled.impl.hpp @@ -144,6 +144,8 @@ const Device& Device::getDefault() return dummy; } +/* static */ Device Device::fromHandle(void* d) { OCL_NOT_AVAILABLE(); } + Context::Context() : p(NULL) { } Context::Context(int dtype) : p(NULL) { } @@ -154,7 +156,7 @@ Context& Context::operator=(const Context& c) { return *this; } bool Context::create() { return false; } bool Context::create(int dtype) { return false; } size_t Context::ndevices() const { return 0; } -const Device& Context::device(size_t idx) const { OCL_NOT_AVAILABLE(); } +Device& Context::device(size_t idx) const { OCL_NOT_AVAILABLE(); } Program Context::getProg(const ProgramSource& prog, const String& buildopt, String& errmsg) { OCL_NOT_AVAILABLE(); } void Context::unloadProg(Program& prog) { } @@ -169,6 +171,13 @@ void* Context::ptr() const { return NULL; } bool Context::useSVM() const { return false; } void Context::setUseSVM(bool enabled) { } +/* static */ Context Context::fromHandle(void* context) { OCL_NOT_AVAILABLE(); } +/* static */ Context Context::fromDevice(const ocl::Device& device) { OCL_NOT_AVAILABLE(); } +/* static */ Context Context::create(const std::string& configuration) { OCL_NOT_AVAILABLE(); } + +void Context::release() { } + + Platform::Platform() : p(NULL) { } Platform::~Platform() { } Platform::Platform(const Platform&) : p(NULL) { } @@ -355,6 +364,43 @@ MatAllocator* getOpenCLAllocator() { return NULL; } internal::ProgramEntry::operator ProgramSource&() const { OCL_NOT_AVAILABLE(); } + +struct OpenCLExecutionContext::Impl +{ + Impl() = default; +}; + +Context& OpenCLExecutionContext::getContext() const { OCL_NOT_AVAILABLE(); } +Device& OpenCLExecutionContext::getDevice() const { OCL_NOT_AVAILABLE(); } +Queue& OpenCLExecutionContext::getQueue() const { OCL_NOT_AVAILABLE(); } + +bool OpenCLExecutionContext::useOpenCL() const { return false; } +void OpenCLExecutionContext::setUseOpenCL(bool flag) { } + +static +OpenCLExecutionContext& getDummyOpenCLExecutionContext() +{ + static OpenCLExecutionContext dummy; + return dummy; +} + +/* static */ +OpenCLExecutionContext& OpenCLExecutionContext::getCurrent() { return getDummyOpenCLExecutionContext(); } + +/* static */ +OpenCLExecutionContext& OpenCLExecutionContext::getCurrentRef() { return getDummyOpenCLExecutionContext(); } + +void OpenCLExecutionContext::bind() const { OCL_NOT_AVAILABLE(); } + +OpenCLExecutionContext OpenCLExecutionContext::cloneWithNewQueue(const ocl::Queue& q) const { OCL_NOT_AVAILABLE(); } +OpenCLExecutionContext OpenCLExecutionContext::cloneWithNewQueue() const { OCL_NOT_AVAILABLE(); } + +/* static */ OpenCLExecutionContext OpenCLExecutionContext::create(const std::string& platformName, void* platformID, void* context, void* deviceID) { OCL_NOT_AVAILABLE(); } +/* static */ OpenCLExecutionContext OpenCLExecutionContext::create(const Context& context, const Device& device, const ocl::Queue& queue) { OCL_NOT_AVAILABLE(); } +/* static */ OpenCLExecutionContext OpenCLExecutionContext::create(const Context& context, const Device& device) { OCL_NOT_AVAILABLE(); } + +void OpenCLExecutionContext::release() { } + }} #if defined(_MSC_VER) diff --git a/modules/core/src/opengl.cpp b/modules/core/src/opengl.cpp index fc042b9151..37ab8623d8 100644 --- a/modules/core/src/opengl.cpp +++ b/modules/core/src/opengl.cpp @@ -1689,9 +1689,14 @@ Context& initializeContextFromGL() if (found < 0) CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't create context for OpenGL interop"); - Context& ctx = Context::getDefault(false); - initializeContextFromHandle(ctx, platforms[found], context, device); - return ctx; + cl_platform_id platform = platforms[found]; + std::string platformName = PlatformInfo(platform).name(); + + OpenCLExecutionContext clExecCtx = OpenCLExecutionContext::create(platformName, platform, context, deviceID); + clReleaseDevice(device); + clReleaseContext(context); + clExecCtx.bind(); + return const_cast(clExecCtx.getContext()); #endif } diff --git a/modules/core/src/precomp.hpp b/modules/core/src/precomp.hpp index 836488f439..21e281c007 100644 --- a/modules/core/src/precomp.hpp +++ b/modules/core/src/precomp.hpp @@ -322,7 +322,7 @@ struct CoreTLSData { CoreTLSData() : //#ifdef HAVE_OPENCL - device(0), useOpenCL(-1), + oclExecutionContextInitialized(false), useOpenCL(-1), //#endif useIPP(-1), useIPP_NE(-1) @@ -333,8 +333,8 @@ struct CoreTLSData RNG rng; //#ifdef HAVE_OPENCL - int device; // device index of an array of devices in a context, see also Device::getDefault - ocl::Queue oclQueue; // the queue used for running a kernel, see also getQueue, Kernel::run + ocl::OpenCLExecutionContext oclExecutionContext; + bool oclExecutionContextInitialized; int useOpenCL; // 1 - use, 0 - do not use, -1 - auto/not initialized //#endif int useIPP; // 1 - use, 0 - do not use, -1 - auto/not initialized diff --git a/modules/core/src/va_intel.cpp b/modules/core/src/va_intel.cpp index c571b90b5f..42948dc457 100644 --- a/modules/core/src/va_intel.cpp +++ b/modules/core/src/va_intel.cpp @@ -106,7 +106,7 @@ Context& initializeContextFromVA(VADisplay display, bool tryInterop) CL_PREFERRED_DEVICES_FOR_VA_API_INTEL, 0, NULL, &numDevices); if ((status != CL_SUCCESS) || !(numDevices > 0)) continue; - numDevices = 1; // initializeContextFromHandle() expects only 1 device + numDevices = 1; // OpenCV expects only 1 device status = clGetDeviceIDsFromVA_APIMediaAdapterINTEL(platforms[i], CL_VA_API_DISPLAY_INTEL, display, CL_PREFERRED_DEVICES_FOR_VA_API_INTEL, numDevices, &device, NULL); if (status != CL_SUCCESS) @@ -135,9 +135,23 @@ Context& initializeContextFromVA(VADisplay display, bool tryInterop) if (found >= 0) { contextInitialized = true; - Context& ctx = Context::getDefault(false); - initializeContextFromHandle(ctx, platforms[found], context, device); - return ctx; + + cl_platform_id platform = platforms[found]; + std::string platformName = PlatformInfo(platform).name(); + + OpenCLExecutionContext clExecCtx; + try + { + clExecCtx = OpenCLExecutionContext::create(platformName, platform, context, device); + } + catch (...) + { + clReleaseDevice(device); + clReleaseContext(context); + throw; + } + clExecCtx.bind(); + return const_cast(clExecCtx.getContext()); } } # endif // HAVE_VA_INTEL && HAVE_OPENCL diff --git a/modules/core/test/test_opencl.cpp b/modules/core/test/test_opencl.cpp new file mode 100644 index 0000000000..f4f195ea6e --- /dev/null +++ b/modules/core/test/test_opencl.cpp @@ -0,0 +1,191 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "test_precomp.hpp" +#include "opencv2/ts/ocl_test.hpp" + +namespace opencv_test { +namespace ocl { + +static void executeUMatCall(bool requireOpenCL = true) +{ + UMat a(100, 100, CV_8UC1, Scalar::all(0)); + UMat b; + cv::add(a, Scalar::all(1), b); + Mat b_cpu = b.getMat(ACCESS_READ); + EXPECT_EQ(0, cv::norm(b_cpu - 1, NORM_INF)); + + if (requireOpenCL) + { + EXPECT_TRUE(cv::ocl::useOpenCL()); + } +} + +TEST(OCL_Context, createFromDevice) +{ + bool useOCL = cv::ocl::useOpenCL(); + + OpenCLExecutionContext ctx = OpenCLExecutionContext::getCurrent(); + + if (!useOCL) + { + ASSERT_TRUE(ctx.empty()); // Other tests should not broke global state + throw SkipTestException("OpenCL is not available / disabled"); + } + + ASSERT_FALSE(ctx.empty()); + + ocl::Device device = ctx.getDevice(); + ASSERT_FALSE(device.empty()); + + ocl::Context context = ocl::Context::fromDevice(device); + ocl::Context context2 = ocl::Context::fromDevice(device); + + EXPECT_TRUE(context.getImpl() == context2.getImpl()) << "Broken cache for OpenCL context (device)"; +} + +TEST(OCL_OpenCLExecutionContext, basic) +{ + bool useOCL = cv::ocl::useOpenCL(); + + OpenCLExecutionContext ctx = OpenCLExecutionContext::getCurrent(); + + if (!useOCL) + { + ASSERT_TRUE(ctx.empty()); // Other tests should not broke global state + throw SkipTestException("OpenCL is not available / disabled"); + } + + ASSERT_FALSE(ctx.empty()); + + ocl::Context context = ctx.getContext(); + ocl::Context context2 = ocl::Context::getDefault(); + EXPECT_TRUE(context.getImpl() == context2.getImpl()); + + ocl::Device device = ctx.getDevice(); + ocl::Device device2 = ocl::Device::getDefault(); + EXPECT_TRUE(device.getImpl() == device2.getImpl()); + + ocl::Queue queue = ctx.getQueue(); + ocl::Queue queue2 = ocl::Queue::getDefault(); + EXPECT_TRUE(queue.getImpl() == queue2.getImpl()); +} + +TEST(OCL_OpenCLExecutionContext, createAndBind) +{ + bool useOCL = cv::ocl::useOpenCL(); + + OpenCLExecutionContext ctx = OpenCLExecutionContext::getCurrent(); + + if (!useOCL) + { + ASSERT_TRUE(ctx.empty()); // Other tests should not broke global state + throw SkipTestException("OpenCL is not available / disabled"); + } + + ASSERT_FALSE(ctx.empty()); + + ocl::Context context = ctx.getContext(); + ocl::Device device = ctx.getDevice(); + + OpenCLExecutionContext ctx2 = OpenCLExecutionContext::create(context, device); + ASSERT_FALSE(ctx2.empty()); + + try + { + ctx2.bind(); + executeUMatCall(); + ctx.bind(); + executeUMatCall(); + } + catch (...) + { + ctx.bind(); // restore + throw; + } +} + +TEST(OCL_OpenCLExecutionContext, createGPU) +{ + bool useOCL = cv::ocl::useOpenCL(); + + OpenCLExecutionContext ctx = OpenCLExecutionContext::getCurrent(); + + if (!useOCL) + { + ASSERT_TRUE(ctx.empty()); // Other tests should not broke global state + throw SkipTestException("OpenCL is not available / disabled"); + } + + ASSERT_FALSE(ctx.empty()); + + ocl::Context context = ocl::Context::create(":GPU:1"); + if (context.empty()) + { + context = ocl::Context::create(":CPU:"); + if (context.empty()) + throw SkipTestException("OpenCL GPU1/CPU devices are not available"); + } + + ocl::Device device = context.device(0); + + OpenCLExecutionContext ctx2 = OpenCLExecutionContext::create(context, device); + ASSERT_FALSE(ctx2.empty()); + + try + { + ctx2.bind(); + executeUMatCall(); + ctx.bind(); + executeUMatCall(); + } + catch (...) + { + ctx.bind(); // restore + throw; + } +} + +TEST(OCL_OpenCLExecutionContext, ScopeTest) +{ + bool useOCL = cv::ocl::useOpenCL(); + + OpenCLExecutionContext ctx = OpenCLExecutionContext::getCurrent(); + + if (!useOCL) + { + ASSERT_TRUE(ctx.empty()); // Other tests should not broke global state + throw SkipTestException("OpenCL is not available / disabled"); + } + + ASSERT_FALSE(ctx.empty()); + + ocl::Context context = ocl::Context::create(":GPU:1"); + if (context.empty()) + { + context = ocl::Context::create(":CPU:"); + if (context.empty()) + context = ctx.getContext(); + } + + ocl::Device device = context.device(0); + + OpenCLExecutionContext ctx2 = OpenCLExecutionContext::create(context, device); + ASSERT_FALSE(ctx2.empty()); + + try + { + OpenCLExecutionContextScope ctx_scope(ctx2); + executeUMatCall(); + } + catch (...) + { + ctx.bind(); // restore + throw; + } + + executeUMatCall(); +} + +} } // namespace opencv_test::ocl diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index aa751cff29..a542b2fb53 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -26,6 +26,7 @@ add_subdirectory(dnn) add_subdirectory(gpu) add_subdirectory(tapi) add_subdirectory(opencl) +add_subdirectory(sycl) if(WIN32 AND HAVE_DIRECTX) add_subdirectory(directx) endif() @@ -122,6 +123,7 @@ endif() add_subdirectory(dnn) # add_subdirectory(gpu) add_subdirectory(opencl) +add_subdirectory(sycl) # add_subdirectory(opengl) # add_subdirectory(openvx) add_subdirectory(tapi) diff --git a/samples/sycl/CMakeLists.txt b/samples/sycl/CMakeLists.txt new file mode 100644 index 0000000000..093ed9f3b6 --- /dev/null +++ b/samples/sycl/CMakeLists.txt @@ -0,0 +1,80 @@ +if(OPENCV_SKIP_SAMPLES_SYCL) + return() +endif() + +ocv_install_example_src(opencl *.cpp *.hpp CMakeLists.txt) + +set(OPENCV_SYCL_SAMPLES_REQUIRED_DEPS + opencv_core + opencv_imgproc + opencv_imgcodecs + opencv_videoio + opencv_highgui) +ocv_check_dependencies(${OPENCV_SYCL_SAMPLES_REQUIRED_DEPS}) + +if(NOT BUILD_EXAMPLES OR NOT OCV_DEPENDENCIES_FOUND OR OPENCV_SKIP_SAMPLES_BUILD_SYCL) + return() +endif() + +if(CMAKE_VERSION VERSION_LESS "3.5") + message(STATUS "SYCL samples require CMake 3.5+") + return() +endif() + +cmake_policy(VERSION 3.5) + +find_package(SYCL QUIET) # will oneAPI support this straightforward way? + +if(NOT SYCL_FOUND AND NOT OPENCV_SKIP_SAMPLES_SYCL_ONEDNN) + # lets try scripts from oneAPI:oneDNN component + if(NOT DEFINED DNNLROOT AND DEFINED ENV{DNNLROOT}) + set(DNNLROOT "$ENV{DNNLROOT}") + endif() + # Some verions of called script violate CMake policy and may emit unrecoverable CMake errors + # Use OPENCV_SKIP_SAMPLES_SYCL=1 / OPENCV_SKIP_SAMPLES_SYCL_ONEDNN to bypass this + find_package(dnnl CONFIG QUIET HINTS "${DNNLROOT}") +endif() + +if(NOT SYCL_FOUND AND NOT OPENCV_SKIP_SAMPLES_SYCL_COMPUTECPP) + # lets try this SYCL SDK too: https://github.com/codeplaysoftware/computecpp-sdk + find_package(ComputeCpp QUIET) + if(ComputeCpp_FOUND) + set(SYCL_TARGET ComputeCpp::ComputeCpp) + set(SYCL_FLAGS ${ComputeCpp_FLAGS}) + set(SYCL_INCLUDE_DIRS ${ComputeCpp_INCLUDE_DIRS}) + set(SYCL_LIBRARIES ${ComputeCpp_LIBRARIES}) + endif() +endif() + +if(OPENCV_CMAKE_DEBUG_SYCL) + ocv_cmake_dump_vars("SYCL") # OpenCV source tree is required +endif() + +if(NOT SYCL_TARGET) + message(STATUS "SYCL/OpenCL samples are skipped: SYCL SDK is required") + message(STATUS " - check configuration of SYCL_DIR/SYCL_ROOT/CMAKE_MODULE_PATH") + message(STATUS " - ensure that right compiler is selected from SYCL SDK (e.g, clang++): CMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}") + return() +endif() + +project(sycl_samples) + +if(SYCL_FLAGS) # "target_link_libraries(... ${SYCL_TARGET})" is not enough. Hacking... + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SYCL_FLAGS}") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${SYCL_FLAGS}") +endif() + +ocv_include_modules_recurse(${OPENCV_SYCL_SAMPLES_REQUIRED_DEPS}) +ocv_include_directories(${OpenCL_INCLUDE_DIR}) +file(GLOB all_samples RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp) +foreach(sample_filename ${all_samples}) + ocv_define_sample(tgt ${sample_filename} sycl) + ocv_target_link_libraries(${tgt} PRIVATE + ${OPENCV_LINKER_LIBS} + ${OPENCV_SYCL_SAMPLES_REQUIRED_DEPS} + ${SYCL_TARGET}) + + if(COMMAND add_sycl_to_target) # ComputeCpp + add_sycl_to_target(TARGET ${tgt} SOURCES ${sample_filename}) + endif() +endforeach() diff --git a/samples/sycl/sycl-opencv-interop.cpp b/samples/sycl/sycl-opencv-interop.cpp new file mode 100644 index 0000000000..ccb8eaf412 --- /dev/null +++ b/samples/sycl/sycl-opencv-interop.cpp @@ -0,0 +1,351 @@ +/* + * The example of interoperability between SYCL/OpenCL and OpenCV. + * - SYCL: https://www.khronos.org/sycl/ + * - SYCL runtime parameters: https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md + */ +#include + +#include +#include +#include +#include + +#include + + +class sycl_inverse_kernel; // can be omitted - modern SYCL versions doesn't require this + +using namespace cv; + + +class App +{ +public: + App(const CommandLineParser& cmd); + ~App(); + + void initVideoSource(); + + void initSYCL(); + + void process_frame(cv::Mat& frame); + + /// to check result with CPU-only reference code + Mat process_frame_reference(const cv::Mat& frame); + + int run(); + + bool isRunning() { return m_running; } + bool doProcess() { return m_process; } + + void setRunning(bool running) { m_running = running; } + void setDoProcess(bool process) { m_process = process; } + +protected: + void handleKey(char key); + +private: + bool m_running; + bool m_process; + bool m_show_ui; + + int64 m_t0; + int64 m_t1; + float m_time; + float m_frequency; + + std::string m_file_name; + int m_camera_id; + cv::VideoCapture m_cap; + cv::Mat m_frame; + + cl::sycl::queue sycl_queue; +}; + + +App::App(const CommandLineParser& cmd) +{ + m_camera_id = cmd.get("camera"); + m_file_name = cmd.get("video"); + + m_running = false; + m_process = false; +} // ctor + + +App::~App() +{ + // nothing +} + + +void App::initSYCL() +{ + using namespace cl::sycl; + + // Configuration details: https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md + cl::sycl::default_selector selector; + + sycl_queue = cl::sycl::queue(selector, [](cl::sycl::exception_list l) + { + // exception_handler + for (auto ep : l) + { + try + { + std::rethrow_exception(ep); + } + catch (const cl::sycl::exception& e) + { + std::cerr << "SYCL exception: " << e.what() << std::endl; + } + } + }); + + auto device = sycl_queue.get_device(); + auto platform = device.get_platform(); + std::cout << "SYCL device: " << device.get_info() + << " @ " << device.get_info() + << " (platform: " << platform.get_info() << ")" << std::endl; + + if (device.is_host()) + { + std::cerr << "SYCL can't select OpenCL device. Host is used for computations, interoperability is not available" << std::endl; + } + else + { + // bind OpenCL context/device/queue from SYCL to OpenCV + try + { + auto ctx = cv::ocl::OpenCLExecutionContext::create( + platform.get_info(), + platform.get(), + sycl_queue.get_context().get(), + device.get() + ); + ctx.bind(); + } + catch (const cv::Exception& e) + { + std::cerr << "OpenCV: Can't bind SYCL OpenCL context/device/queue: " << e.what() << std::endl; + } + std::cout << "OpenCV uses OpenCL: " << (cv::ocl::useOpenCL() ? "True" : "False") << std::endl; + } +} // initSYCL() + + +void App::initVideoSource() +{ + if (!m_file_name.empty() && m_camera_id == -1) + { + m_cap.open(samples::findFileOrKeep(m_file_name)); + if (!m_cap.isOpened()) + throw std::runtime_error(std::string("can't open video stream: ") + m_file_name); + } + else if (m_camera_id != -1) + { + m_cap.open(m_camera_id); + if (!m_cap.isOpened()) + throw std::runtime_error(std::string("can't open camera: ") + std::to_string(m_camera_id)); + } + else + throw std::runtime_error(std::string("specify video source")); +} // initVideoSource() + + +void App::process_frame(cv::Mat& frame) +{ + using namespace cl::sycl; + + // cv::Mat => cl::sycl::buffer + { + CV_Assert(frame.isContinuous()); + CV_CheckTypeEQ(frame.type(), CV_8UC1, ""); + + buffer frame_buffer(frame.data, range<2>(frame.rows, frame.cols)); + + // done automatically: frame_buffer.set_write_back(true); + + sycl_queue.submit([&](handler& cgh) { + auto pixels = frame_buffer.get_access(cgh); + + cgh.parallel_for(range<2>(frame.rows, frame.cols), [=](item<2> item) { + uint8_t v = pixels[item]; + pixels[item] = ~v; + }); + }); + + sycl_queue.wait_and_throw(); + } + + // No way to extract cl_mem from cl::sycl::buffer (ref: 3.6.11 "Interfacing with OpenCL" of SYCL 1.2.1) + // We just reusing OpenCL context/device/queue from SYCL here (see initSYCL() bind part) and call UMat processing + { + UMat blurResult; + { + UMat umat_buffer = frame.getUMat(ACCESS_RW); + cv::blur(umat_buffer, blurResult, Size(3, 3)); // UMat doesn't support inplace + } + Mat result; + blurResult.copyTo(result); + swap(result, frame); + } +} + +Mat App::process_frame_reference(const cv::Mat& frame) +{ + Mat result; + cv::bitwise_not(frame, result); + Mat blurResult; + cv::blur(result, blurResult, Size(3, 3)); // avoid inplace + blurResult.copyTo(result); + return result; +} + +int App::run() +{ + std::cout << "Initializing..." << std::endl; + + initSYCL(); + initVideoSource(); + + std::cout << "Press ESC to exit" << std::endl; + std::cout << " 'p' to toggle ON/OFF processing" << std::endl; + + m_running = true; + m_process = true; + m_show_ui = true; + + int processedFrames = 0; + + cv::TickMeter timer; + + // Iterate over all frames + while (isRunning() && m_cap.read(m_frame)) + { + Mat m_frameGray; + cvtColor(m_frame, m_frameGray, COLOR_BGR2GRAY); + + bool checkWithReference = (processedFrames == 0); + Mat reference_result; + if (checkWithReference) + { + reference_result = process_frame_reference(m_frameGray); + } + + timer.reset(); + timer.start(); + + if (m_process) + { + process_frame(m_frameGray); + } + + timer.stop(); + + if (checkWithReference) + { + double diffInf = cv::norm(reference_result, m_frameGray, NORM_INF); + if (diffInf > 0) + { + std::cerr << "Result is not accurate. diffInf=" << diffInf << std::endl; + imwrite("reference.png", reference_result); + imwrite("actual.png", m_frameGray); + } + } + + Mat img_to_show = m_frameGray; + + std::ostringstream msg; + msg << "Frame " << processedFrames << " (" << m_frame.size + << ") Time: " << cv::format("%.2f", timer.getTimeMilli()) << " msec" + << " (process: " << (m_process ? "True" : "False") << ")"; + std::cout << msg.str() << std::endl; + putText(img_to_show, msg.str(), Point(5, 150), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2); + + if (m_show_ui) + { + try + { + imshow("sycl_interop", img_to_show); + int key = waitKey(1); + switch (key) + { + case 27: // ESC + m_running = false; + break; + + case 'p': // fallthru + case 'P': + m_process = !m_process; + break; + + default: + break; + } + } + catch (const std::exception& e) + { + std::cerr << "ERROR(OpenCV UI): " << e.what() << std::endl; + if (processedFrames > 0) + throw; + m_show_ui = false; // UI is not available + } + } + + processedFrames++; + + if (!m_show_ui) + { + if (processedFrames > 100) + m_running = false; + } + } + + return 0; +} + + +int main(int argc, char** argv) +{ + const char* keys = + "{ help h ? | | print help message }" + "{ camera c | -1 | use camera as input }" + "{ video v | | use video as input }"; + + CommandLineParser cmd(argc, argv, keys); + if (cmd.has("help")) + { + cmd.printMessage(); + return EXIT_SUCCESS; + } + + try + { + App app(cmd); + if (!cmd.check()) + { + cmd.printErrors(); + return 1; + } + app.run(); + } + catch (const cv::Exception& e) + { + std::cout << "FATAL: OpenCV error: " << e.what() << std::endl; + return 1; + } + catch (const std::exception& e) + { + std::cout << "FATAL: C++ error: " << e.what() << std::endl; + return 1; + } + + catch (...) + { + std::cout << "FATAL: unknown C++ exception" << std::endl; + return 1; + } + + return EXIT_SUCCESS; +} // main()