diff --git a/modules/calib3d/src/stereobm.cpp b/modules/calib3d/src/stereobm.cpp index f6708c47d9..2501934f4b 100644 --- a/modules/calib3d/src/stereobm.cpp +++ b/modules/calib3d/src/stereobm.cpp @@ -1108,7 +1108,7 @@ public: int FILTERED = (params.minDisparity - 1) << disp_shift; #ifdef HAVE_OPENCL - if(ocl::useOpenCL() && disparr.isUMat() && params.textureThreshold == 0) + if(ocl::isOpenCLActivated() && disparr.isUMat() && params.textureThreshold == 0) { UMat left, right; if(ocl_prefiltering(leftarr, rightarr, left, right, ¶ms)) diff --git a/modules/core/include/opencv2/core/opencl/ocl_defs.hpp b/modules/core/include/opencv2/core/opencl/ocl_defs.hpp index 3a26b46ef9..605a65f8d8 100644 --- a/modules/core/include/opencv2/core/opencl/ocl_defs.hpp +++ b/modules/core/include/opencv2/core/opencl/ocl_defs.hpp @@ -5,7 +5,22 @@ // Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. +#ifndef OPENCV_CORE_OPENCL_DEFS_HPP +#define OPENCV_CORE_OPENCL_DEFS_HPP + #include "opencv2/core/utility.hpp" +#include "cvconfig.h" + +namespace cv { namespace ocl { +#ifdef HAVE_OPENCL +/// Call is similar to useOpenCL() but doesn't try to load OpenCL runtime or create OpenCL context +CV_EXPORTS bool isOpenCLActivated(); +#else +static inline bool isOpenCLActivated() { return false; } +#endif +}} // namespace + + //#define CV_OPENCL_RUN_ASSERT #ifdef HAVE_OPENCL @@ -13,7 +28,7 @@ #ifdef CV_OPENCL_RUN_VERBOSE #define CV_OCL_RUN_(condition, func, ...) \ { \ - if (cv::ocl::useOpenCL() && (condition) && func) \ + if (cv::ocl::isOpenCLActivated() && (condition) && func) \ { \ printf("%s: OpenCL implementation is running\n", CV_Func); \ fflush(stdout); \ @@ -29,7 +44,7 @@ #elif defined CV_OPENCL_RUN_ASSERT #define CV_OCL_RUN_(condition, func, ...) \ { \ - if (cv::ocl::useOpenCL() && (condition)) \ + if (cv::ocl::isOpenCLActivated() && (condition)) \ { \ if(func) \ { \ @@ -44,7 +59,7 @@ } #else #define CV_OCL_RUN_(condition, func, ...) \ - if (cv::ocl::useOpenCL() && (condition) && func) \ + if (cv::ocl::isOpenCLActivated() && (condition) && func) \ { \ CV_IMPL_ADD(CV_IMPL_OCL); \ return __VA_ARGS__; \ @@ -56,3 +71,5 @@ #endif #define CV_OCL_RUN(condition, func) CV_OCL_RUN_(condition, func) + +#endif // OPENCV_CORE_OPENCL_DEFS_HPP diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp index 827612ee82..dfc2f2dbde 100644 --- a/modules/core/src/convert.cpp +++ b/modules/core/src/convert.cpp @@ -44,6 +44,7 @@ #include "precomp.hpp" #include "opencl_kernels_core.hpp" + #include "convert.hpp" #include "opencv2/core/openvx/ovx_defs.hpp" @@ -897,7 +898,8 @@ void cv::extractChannel(InputArray _src, OutputArray _dst, int coi) CV_Assert( 0 <= coi && coi < cn ); int ch[] = { coi, 0 }; - if (ocl::useOpenCL() && _src.dims() <= 2 && _dst.isUMat()) +#ifdef HAVE_OPENCL + if (ocl::isOpenCLActivated() && _src.dims() <= 2 && _dst.isUMat()) { UMat src = _src.getUMat(); _dst.create(src.dims, &src.size[0], depth); @@ -905,6 +907,7 @@ void cv::extractChannel(InputArray _src, OutputArray _dst, int coi) mixChannels(std::vector(1, src), std::vector(1, dst), ch, 1); return; } +#endif Mat src = _src.getMat(); _dst.create(src.dims, &src.size[0], depth); @@ -925,12 +928,14 @@ void cv::insertChannel(InputArray _src, InputOutputArray _dst, int coi) CV_Assert( 0 <= coi && coi < dcn && scn == 1 ); int ch[] = { 0, coi }; - if (ocl::useOpenCL() && _src.dims() <= 2 && _dst.isUMat()) +#ifdef HAVE_OPENCL + if (ocl::isOpenCLActivated() && _src.dims() <= 2 && _dst.isUMat()) { UMat src = _src.getUMat(), dst = _dst.getUMat(); mixChannels(std::vector(1, src), std::vector(1, dst), ch, 1); return; } +#endif Mat src = _src.getMat(), dst = _dst.getMat(); diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 94bf7959f9..b368fc61b0 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -423,7 +423,7 @@ struct OpenCLBinaryCacheConfigurator { CV_LOG_WARNING(NULL, "- " << remove_entries[i]); } - CV_LOG_WARNING(NULL,"Note: You can disable this behavior via this option: CV_OPENCL_CACHE_CLEANUP=0"); + CV_LOG_WARNING(NULL, "Note: You can disable this behavior via this option: OPENCV_OPENCL_CACHE_CLEANUP=0"); for (size_t i = 0; i < remove_entries.size(); i++) { @@ -781,18 +781,34 @@ public: #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT +// true if we have initialized OpenCL subsystem with available platforms +static bool g_isOpenCVActivated = false; + bool haveOpenCL() { + CV_TRACE_FUNCTION(); #ifdef HAVE_OPENCL static bool g_isOpenCLInitialized = false; static bool g_isOpenCLAvailable = false; if (!g_isOpenCLInitialized) { + CV_TRACE_REGION("Init_OpenCL_Runtime"); + const char* envPath = getenv("OPENCV_OPENCL_RUNTIME"); + if (envPath) + { + if (cv::String(envPath) == "disabled") + { + g_isOpenCLAvailable = false; + g_isOpenCLInitialized = true; + } + } + CV_LOG_INFO(NULL, "Initialize OpenCL runtime..."); try { cl_uint n = 0; g_isOpenCLAvailable = ::clGetPlatformIDs(0, NULL, &n) == CL_SUCCESS; + g_isOpenCVActivated = n > 0; } catch (...) { @@ -813,7 +829,7 @@ bool useOpenCL() { try { - data->useOpenCL = (int)haveOpenCL() && Device::getDefault().ptr() && Device::getDefault().available(); + data->useOpenCL = (int)(haveOpenCL() && Device::getDefault().ptr() && Device::getDefault().available()) ? 1 : 0; } catch (...) { @@ -823,12 +839,27 @@ bool useOpenCL() return data->useOpenCL > 0; } +#ifdef HAVE_OPENCL +bool isOpenCLActivated() +{ + if (!g_isOpenCVActivated) + return false; // prevent unnecessary OpenCL activation via useOpenCL()->haveOpenCL() calls + return useOpenCL(); +} +#endif + void setUseOpenCL(bool flag) { - if( haveOpenCL() ) + CV_TRACE_FUNCTION(); + + CoreTLSData* data = getCoreTlsData().get(); + if (!flag) { - CoreTLSData* data = getCoreTlsData().get(); - data->useOpenCL = (flag && Device::getDefault().ptr() != NULL) ? 1 : 0; + data->useOpenCL = 0; + } + else if( haveOpenCL() ) + { + data->useOpenCL = (Device::getDefault().ptr() != NULL) ? 1 : 0; } } @@ -5289,9 +5320,15 @@ public: } }; +static OpenCLAllocator* getOpenCLAllocator_() // call once guarantee +{ + static OpenCLAllocator* g_allocator = new OpenCLAllocator(); // avoid destrutor call (using of this object is too wide) + g_isOpenCVActivated = true; + return g_allocator; +} MatAllocator* getOpenCLAllocator() { - CV_SINGLETON_LAZY_INIT(MatAllocator, new OpenCLAllocator()) + CV_SINGLETON_LAZY_INIT(MatAllocator, getOpenCLAllocator_()) } }} // namespace cv::ocl diff --git a/modules/core/src/trace.cpp b/modules/core/src/trace.cpp index 230510625e..e22f997a70 100644 --- a/modules/core/src/trace.cpp +++ b/modules/core/src/trace.cpp @@ -8,6 +8,8 @@ #include #include +#include + #include // va_start #include @@ -596,7 +598,7 @@ void Region::destroy() #endif #ifdef HAVE_OPENCL case REGION_FLAG_IMPL_OPENCL: - if (param_synchronizeOpenCL && cv::ocl::useOpenCL()) + if (param_synchronizeOpenCL && cv::ocl::isOpenCLActivated()) cv::ocl::finish(); myCodePath = Impl::CODE_PATH_OPENCL; break; diff --git a/modules/core/src/umatrix.cpp b/modules/core/src/umatrix.cpp index 84cebdba09..6e4cd349d0 100644 --- a/modules/core/src/umatrix.cpp +++ b/modules/core/src/umatrix.cpp @@ -141,7 +141,7 @@ void UMatData::unlock() MatAllocator* UMat::getStdAllocator() { #ifdef HAVE_OPENCL - if( ocl::haveOpenCL() && ocl::useOpenCL() ) + if (ocl::useOpenCL()) return ocl::getOpenCLAllocator(); #endif return Mat::getDefaultAllocator(); diff --git a/modules/features2d/src/fast.cpp b/modules/features2d/src/fast.cpp index 8607f34a2e..c87ea69e13 100644 --- a/modules/features2d/src/fast.cpp +++ b/modules/features2d/src/fast.cpp @@ -422,33 +422,27 @@ void FAST(InputArray _img, std::vector& keypoints, int threshold, bool { CV_INSTRUMENT_REGION() -#ifdef HAVE_OPENCL - if( ocl::useOpenCL() && _img.isUMat() && type == FastFeatureDetector::TYPE_9_16 && - ocl_FAST(_img, keypoints, threshold, nonmax_suppression, 10000)) - { - CV_IMPL_ADD(CV_IMPL_OCL); - return; - } -#endif + CV_OCL_RUN(_img.isUMat() && type == FastFeatureDetector::TYPE_9_16, + ocl_FAST(_img, keypoints, threshold, nonmax_suppression, 10000)); CV_OVX_RUN(true, openvx_FAST(_img, keypoints, threshold, nonmax_suppression, type)) - switch(type) { + switch(type) { case FastFeatureDetector::TYPE_5_8: - FAST_t<8>(_img, keypoints, threshold, nonmax_suppression); - break; + FAST_t<8>(_img, keypoints, threshold, nonmax_suppression); + break; case FastFeatureDetector::TYPE_7_12: - FAST_t<12>(_img, keypoints, threshold, nonmax_suppression); - break; + FAST_t<12>(_img, keypoints, threshold, nonmax_suppression); + break; case FastFeatureDetector::TYPE_9_16: #ifdef HAVE_TEGRA_OPTIMIZATION - if(tegra::useTegra() && tegra::FAST(_img, keypoints, threshold, nonmax_suppression)) - break; + if(tegra::useTegra() && tegra::FAST(_img, keypoints, threshold, nonmax_suppression)) + break; #endif - FAST_t<16>(_img, keypoints, threshold, nonmax_suppression); - break; - } + FAST_t<16>(_img, keypoints, threshold, nonmax_suppression); + break; + } } diff --git a/modules/features2d/src/kaze/AKAZEFeatures.cpp b/modules/features2d/src/kaze/AKAZEFeatures.cpp index eda14e3db5..66acc8ada7 100644 --- a/modules/features2d/src/kaze/AKAZEFeatures.cpp +++ b/modules/features2d/src/kaze/AKAZEFeatures.cpp @@ -520,7 +520,7 @@ convertScalePyramid(const std::vector >& src, std::vector< */ void AKAZEFeatures::Create_Nonlinear_Scale_Space(InputArray image) { - if (ocl::useOpenCL() && image.isUMat()) { + if (ocl::isOpenCLActivated() && image.isUMat()) { // will run OCL version of scale space pyramid UMatPyramid uPyr; // init UMat pyramid with sizes diff --git a/modules/features2d/src/matchers.cpp b/modules/features2d/src/matchers.cpp index b5963e2e61..6cb7670c69 100644 --- a/modules/features2d/src/matchers.cpp +++ b/modules/features2d/src/matchers.cpp @@ -771,7 +771,7 @@ void BFMatcher::knnMatchImpl( InputArray _queryDescriptors, std::vector ksize.height && _src.cols() > ksize.width); diff --git a/modules/imgproc/src/sumpixels.cpp b/modules/imgproc/src/sumpixels.cpp index d19ef3b0a9..40918da527 100755 --- a/modules/imgproc/src/sumpixels.cpp +++ b/modules/imgproc/src/sumpixels.cpp @@ -503,17 +503,8 @@ void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, Output sqdepth = CV_64F; sdepth = CV_MAT_DEPTH(sdepth), sqdepth = CV_MAT_DEPTH(sqdepth); -#ifdef HAVE_OPENCL - if (ocl::useOpenCL() && _sum.isUMat() && !_tilted.needed()) - { - if (!_sqsum.needed()) - { - CV_OCL_RUN(ocl::useOpenCL(), ocl_integral(_src, _sum, sdepth)) - } - else if (_sqsum.isUMat()) - CV_OCL_RUN(ocl::useOpenCL(), ocl_integral(_src, _sum, _sqsum, sdepth, sqdepth)) - } -#endif + CV_OCL_RUN(_sum.isUMat() && !_tilted.needed(), + (_sqsum.needed() ? ocl_integral(_src, _sum, _sqsum, sdepth, sqdepth) : ocl_integral(_src, _sum, sdepth))); Size ssize = _src.size(), isize(ssize.width + 1, ssize.height + 1); _sum.create( isize, CV_MAKETYPE(sdepth, cn) ); diff --git a/modules/objdetect/src/cascadedetect.cpp b/modules/objdetect/src/cascadedetect.cpp index 242fac3f2b..0758fb7a53 100644 --- a/modules/objdetect/src/cascadedetect.cpp +++ b/modules/objdetect/src/cascadedetect.cpp @@ -607,7 +607,7 @@ bool HaarEvaluator::read(const FileNode& node, Size _origWinSize) normrect = Rect(1, 1, origWinSize.width - 2, origWinSize.height - 2); localSize = lbufSize = Size(0, 0); - if (ocl::haveOpenCL()) + if (ocl::isOpenCLActivated()) { if (ocl::Device::getDefault().isAMD() || ocl::Device::getDefault().isIntel() || ocl::Device::getDefault().isNVidia()) { @@ -802,7 +802,7 @@ bool LBPEvaluator::read( const FileNode& node, Size _origWinSize ) } nchannels = 1; localSize = lbufSize = Size(0, 0); - if (ocl::haveOpenCL()) + if (ocl::isOpenCLActivated()) localSize = Size(8, 8); return true; @@ -1306,7 +1306,7 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std:: levelWeights.clear(); #ifdef HAVE_OPENCL - bool use_ocl = tryOpenCL && ocl::useOpenCL() && + bool use_ocl = tryOpenCL && ocl::isOpenCLActivated() && OCL_FORCE_CHECK(_image.isUMat()) && featureEvaluator->getLocalSize().area() > 0 && (data.minNodesPerTree == data.maxNodesPerTree) && diff --git a/modules/stitching/src/blenders.cpp b/modules/stitching/src/blenders.cpp index 858c1508f8..ee665b348d 100644 --- a/modules/stitching/src/blenders.cpp +++ b/modules/stitching/src/blenders.cpp @@ -478,7 +478,7 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl) { Rect rc(x_tl, y_tl, x_br - x_tl, y_br - y_tl); #ifdef HAVE_OPENCL - if ( !cv::ocl::useOpenCL() || + if ( !cv::ocl::isOpenCLActivated() || !ocl_MultiBandBlender_feed(src_pyr_laplace[i], weight_pyr_gauss[i], dst_pyr_laplace_[i](rc), dst_band_weights_[i](rc)) ) #endif @@ -633,7 +633,7 @@ void normalizeUsingWeightMap(InputArray _weight, InputOutputArray _src) #endif #ifdef HAVE_OPENCL - if ( !cv::ocl::useOpenCL() || + if ( !cv::ocl::isOpenCLActivated() || !ocl_normalizeUsingWeightMap(_weight, _src) ) #endif { diff --git a/modules/stitching/src/matchers.cpp b/modules/stitching/src/matchers.cpp index 4fb2d491c0..4cb382c68d 100644 --- a/modules/stitching/src/matchers.cpp +++ b/modules/stitching/src/matchers.cpp @@ -42,6 +42,8 @@ #include "precomp.hpp" +#include "opencv2/core/opencl/ocl_defs.hpp" + using namespace cv; using namespace cv::detail; using namespace cv::cuda; @@ -194,7 +196,7 @@ void CpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat Ptr matcher; #if 0 // TODO check this - if (ocl::useOpenCL()) + if (ocl::isOpenCLActivated()) { matcher = makePtr((int)NORM_L2); } @@ -390,10 +392,12 @@ void FeaturesFinder::operator ()(InputArrayOfArrays images, std::vector(this)) { return true; diff --git a/modules/stitching/src/warpers.cpp b/modules/stitching/src/warpers.cpp index 96fe7f7cb5..e39384e875 100644 --- a/modules/stitching/src/warpers.cpp +++ b/modules/stitching/src/warpers.cpp @@ -111,7 +111,7 @@ Rect PlaneWarper::buildMaps(Size src_size, InputArray K, InputArray R, InputArra _ymap.create(dsize, CV_32FC1); #ifdef HAVE_OPENCL - if (ocl::useOpenCL()) + if (ocl::isOpenCLActivated()) { ocl::Kernel k("buildWarpPlaneMaps", ocl::stitching::warpers_oclsrc); if (!k.empty()) @@ -365,7 +365,7 @@ void SphericalPortraitWarper::detectResultRoi(Size src_size, Point &dst_tl, Poin Rect SphericalWarper::buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap) { #ifdef HAVE_OPENCL - if (ocl::useOpenCL()) + if (ocl::isOpenCLActivated()) { ocl::Kernel k("buildWarpSphericalMaps", ocl::stitching::warpers_oclsrc); if (!k.empty()) @@ -414,7 +414,7 @@ Point SphericalWarper::warp(InputArray src, InputArray K, InputArray R, int inte Rect CylindricalWarper::buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap) { #ifdef HAVE_OPENCL - if (ocl::useOpenCL()) + if (ocl::isOpenCLActivated()) { ocl::Kernel k("buildWarpCylindricalMaps", ocl::stitching::warpers_oclsrc); if (!k.empty()) diff --git a/modules/video/src/bgfg_gaussmix2.cpp b/modules/video/src/bgfg_gaussmix2.cpp index 51ec9f99b6..7103bea4de 100644 --- a/modules/video/src/bgfg_gaussmix2.cpp +++ b/modules/video/src/bgfg_gaussmix2.cpp @@ -193,7 +193,7 @@ public: CV_Assert( nmixtures <= 255); #ifdef HAVE_OPENCL - if (ocl::useOpenCL() && opencl_ON) + if (ocl::isOpenCLActivated() && opencl_ON) { create_ocl_apply_kernel(); diff --git a/modules/video/src/lkpyramid.cpp b/modules/video/src/lkpyramid.cpp index 581dfb7b40..c7c61455a8 100644 --- a/modules/video/src/lkpyramid.cpp +++ b/modules/video/src/lkpyramid.cpp @@ -1226,7 +1226,7 @@ void SparsePyrLKOpticalFlowImpl::calc( InputArray _prevImg, InputArray _nextImg, { CV_INSTRUMENT_REGION() - CV_OCL_RUN(ocl::useOpenCL() && + CV_OCL_RUN(ocl::isOpenCLActivated() && (_prevImg.isUMat() || _nextImg.isUMat()) && ocl::Image2D::isFormatSupported(CV_32F, 1, false), ocl_calcOpticalFlowPyrLK(_prevImg, _nextImg, _prevPts, _nextPts, _status, _err))