Merge pull request #10180 from alalek:ocl_avoid_unnecessary_initialization

pull/10193/head
Vadim Pisarevsky 7 years ago
commit f5dba12762
  1. 2
      modules/calib3d/src/stereobm.cpp
  2. 23
      modules/core/include/opencv2/core/opencl/ocl_defs.hpp
  3. 9
      modules/core/src/convert.cpp
  4. 49
      modules/core/src/ocl.cpp
  5. 4
      modules/core/src/trace.cpp
  6. 2
      modules/core/src/umatrix.cpp
  7. 30
      modules/features2d/src/fast.cpp
  8. 2
      modules/features2d/src/kaze/AKAZEFeatures.cpp
  9. 4
      modules/features2d/src/matchers.cpp
  10. 2
      modules/features2d/src/orb.cpp
  11. 2
      modules/imgproc/src/clahe.cpp
  12. 6
      modules/imgproc/src/deriv.cpp
  13. 2
      modules/imgproc/src/smooth.cpp
  14. 13
      modules/imgproc/src/sumpixels.cpp
  15. 6
      modules/objdetect/src/cascadedetect.cpp
  16. 4
      modules/stitching/src/blenders.cpp
  17. 8
      modules/stitching/src/matchers.cpp
  18. 6
      modules/stitching/src/warpers.cpp
  19. 2
      modules/video/src/bgfg_gaussmix2.cpp
  20. 2
      modules/video/src/lkpyramid.cpp

@ -1108,7 +1108,7 @@ public:
int FILTERED = (params.minDisparity - 1) << disp_shift;
#ifdef HAVE_OPENCL
if(ocl::useOpenCL() && disparr.isUMat() && params.textureThreshold == 0)
if(ocl::isOpenCLActivated() && disparr.isUMat() && params.textureThreshold == 0)
{
UMat left, right;
if(ocl_prefiltering(leftarr, rightarr, left, right, &params))

@ -5,7 +5,22 @@
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
#ifndef OPENCV_CORE_OPENCL_DEFS_HPP
#define OPENCV_CORE_OPENCL_DEFS_HPP
#include "opencv2/core/utility.hpp"
#include "cvconfig.h"
namespace cv { namespace ocl {
#ifdef HAVE_OPENCL
/// Call is similar to useOpenCL() but doesn't try to load OpenCL runtime or create OpenCL context
CV_EXPORTS bool isOpenCLActivated();
#else
static inline bool isOpenCLActivated() { return false; }
#endif
}} // namespace
//#define CV_OPENCL_RUN_ASSERT
#ifdef HAVE_OPENCL
@ -13,7 +28,7 @@
#ifdef CV_OPENCL_RUN_VERBOSE
#define CV_OCL_RUN_(condition, func, ...) \
{ \
if (cv::ocl::useOpenCL() && (condition) && func) \
if (cv::ocl::isOpenCLActivated() && (condition) && func) \
{ \
printf("%s: OpenCL implementation is running\n", CV_Func); \
fflush(stdout); \
@ -29,7 +44,7 @@
#elif defined CV_OPENCL_RUN_ASSERT
#define CV_OCL_RUN_(condition, func, ...) \
{ \
if (cv::ocl::useOpenCL() && (condition)) \
if (cv::ocl::isOpenCLActivated() && (condition)) \
{ \
if(func) \
{ \
@ -44,7 +59,7 @@
}
#else
#define CV_OCL_RUN_(condition, func, ...) \
if (cv::ocl::useOpenCL() && (condition) && func) \
if (cv::ocl::isOpenCLActivated() && (condition) && func) \
{ \
CV_IMPL_ADD(CV_IMPL_OCL); \
return __VA_ARGS__; \
@ -56,3 +71,5 @@
#endif
#define CV_OCL_RUN(condition, func) CV_OCL_RUN_(condition, func)
#endif // OPENCV_CORE_OPENCL_DEFS_HPP

@ -44,6 +44,7 @@
#include "precomp.hpp"
#include "opencl_kernels_core.hpp"
#include "convert.hpp"
#include "opencv2/core/openvx/ovx_defs.hpp"
@ -897,7 +898,8 @@ void cv::extractChannel(InputArray _src, OutputArray _dst, int coi)
CV_Assert( 0 <= coi && coi < cn );
int ch[] = { coi, 0 };
if (ocl::useOpenCL() && _src.dims() <= 2 && _dst.isUMat())
#ifdef HAVE_OPENCL
if (ocl::isOpenCLActivated() && _src.dims() <= 2 && _dst.isUMat())
{
UMat src = _src.getUMat();
_dst.create(src.dims, &src.size[0], depth);
@ -905,6 +907,7 @@ void cv::extractChannel(InputArray _src, OutputArray _dst, int coi)
mixChannels(std::vector<UMat>(1, src), std::vector<UMat>(1, dst), ch, 1);
return;
}
#endif
Mat src = _src.getMat();
_dst.create(src.dims, &src.size[0], depth);
@ -925,12 +928,14 @@ void cv::insertChannel(InputArray _src, InputOutputArray _dst, int coi)
CV_Assert( 0 <= coi && coi < dcn && scn == 1 );
int ch[] = { 0, coi };
if (ocl::useOpenCL() && _src.dims() <= 2 && _dst.isUMat())
#ifdef HAVE_OPENCL
if (ocl::isOpenCLActivated() && _src.dims() <= 2 && _dst.isUMat())
{
UMat src = _src.getUMat(), dst = _dst.getUMat();
mixChannels(std::vector<UMat>(1, src), std::vector<UMat>(1, dst), ch, 1);
return;
}
#endif
Mat src = _src.getMat(), dst = _dst.getMat();

@ -423,7 +423,7 @@ struct OpenCLBinaryCacheConfigurator
{
CV_LOG_WARNING(NULL, "- " << remove_entries[i]);
}
CV_LOG_WARNING(NULL,"Note: You can disable this behavior via this option: CV_OPENCL_CACHE_CLEANUP=0");
CV_LOG_WARNING(NULL, "Note: You can disable this behavior via this option: OPENCV_OPENCL_CACHE_CLEANUP=0");
for (size_t i = 0; i < remove_entries.size(); i++)
{
@ -781,18 +781,34 @@ public:
#endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
// true if we have initialized OpenCL subsystem with available platforms
static bool g_isOpenCVActivated = false;
bool haveOpenCL()
{
CV_TRACE_FUNCTION();
#ifdef HAVE_OPENCL
static bool g_isOpenCLInitialized = false;
static bool g_isOpenCLAvailable = false;
if (!g_isOpenCLInitialized)
{
CV_TRACE_REGION("Init_OpenCL_Runtime");
const char* envPath = getenv("OPENCV_OPENCL_RUNTIME");
if (envPath)
{
if (cv::String(envPath) == "disabled")
{
g_isOpenCLAvailable = false;
g_isOpenCLInitialized = true;
}
}
CV_LOG_INFO(NULL, "Initialize OpenCL runtime...");
try
{
cl_uint n = 0;
g_isOpenCLAvailable = ::clGetPlatformIDs(0, NULL, &n) == CL_SUCCESS;
g_isOpenCVActivated = n > 0;
}
catch (...)
{
@ -813,7 +829,7 @@ bool useOpenCL()
{
try
{
data->useOpenCL = (int)haveOpenCL() && Device::getDefault().ptr() && Device::getDefault().available();
data->useOpenCL = (int)(haveOpenCL() && Device::getDefault().ptr() && Device::getDefault().available()) ? 1 : 0;
}
catch (...)
{
@ -823,12 +839,27 @@ bool useOpenCL()
return data->useOpenCL > 0;
}
#ifdef HAVE_OPENCL
bool isOpenCLActivated()
{
if (!g_isOpenCVActivated)
return false; // prevent unnecessary OpenCL activation via useOpenCL()->haveOpenCL() calls
return useOpenCL();
}
#endif
void setUseOpenCL(bool flag)
{
if( haveOpenCL() )
CV_TRACE_FUNCTION();
CoreTLSData* data = getCoreTlsData().get();
if (!flag)
{
CoreTLSData* data = getCoreTlsData().get();
data->useOpenCL = (flag && Device::getDefault().ptr() != NULL) ? 1 : 0;
data->useOpenCL = 0;
}
else if( haveOpenCL() )
{
data->useOpenCL = (Device::getDefault().ptr() != NULL) ? 1 : 0;
}
}
@ -5289,9 +5320,15 @@ public:
}
};
static OpenCLAllocator* getOpenCLAllocator_() // call once guarantee
{
static OpenCLAllocator* g_allocator = new OpenCLAllocator(); // avoid destrutor call (using of this object is too wide)
g_isOpenCVActivated = true;
return g_allocator;
}
MatAllocator* getOpenCLAllocator()
{
CV_SINGLETON_LAZY_INIT(MatAllocator, new OpenCLAllocator())
CV_SINGLETON_LAZY_INIT(MatAllocator, getOpenCLAllocator_())
}
}} // namespace cv::ocl

@ -8,6 +8,8 @@
#include <opencv2/core/utils/trace.private.hpp>
#include <opencv2/core/utils/configuration.private.hpp>
#include <opencv2/core/opencl/ocl_defs.hpp>
#include <cstdarg> // va_start
#include <sstream>
@ -596,7 +598,7 @@ void Region::destroy()
#endif
#ifdef HAVE_OPENCL
case REGION_FLAG_IMPL_OPENCL:
if (param_synchronizeOpenCL && cv::ocl::useOpenCL())
if (param_synchronizeOpenCL && cv::ocl::isOpenCLActivated())
cv::ocl::finish();
myCodePath = Impl::CODE_PATH_OPENCL;
break;

@ -141,7 +141,7 @@ void UMatData::unlock()
MatAllocator* UMat::getStdAllocator()
{
#ifdef HAVE_OPENCL
if( ocl::haveOpenCL() && ocl::useOpenCL() )
if (ocl::useOpenCL())
return ocl::getOpenCLAllocator();
#endif
return Mat::getDefaultAllocator();

@ -422,33 +422,27 @@ void FAST(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool
{
CV_INSTRUMENT_REGION()
#ifdef HAVE_OPENCL
if( ocl::useOpenCL() && _img.isUMat() && type == FastFeatureDetector::TYPE_9_16 &&
ocl_FAST(_img, keypoints, threshold, nonmax_suppression, 10000))
{
CV_IMPL_ADD(CV_IMPL_OCL);
return;
}
#endif
CV_OCL_RUN(_img.isUMat() && type == FastFeatureDetector::TYPE_9_16,
ocl_FAST(_img, keypoints, threshold, nonmax_suppression, 10000));
CV_OVX_RUN(true,
openvx_FAST(_img, keypoints, threshold, nonmax_suppression, type))
switch(type) {
switch(type) {
case FastFeatureDetector::TYPE_5_8:
FAST_t<8>(_img, keypoints, threshold, nonmax_suppression);
break;
FAST_t<8>(_img, keypoints, threshold, nonmax_suppression);
break;
case FastFeatureDetector::TYPE_7_12:
FAST_t<12>(_img, keypoints, threshold, nonmax_suppression);
break;
FAST_t<12>(_img, keypoints, threshold, nonmax_suppression);
break;
case FastFeatureDetector::TYPE_9_16:
#ifdef HAVE_TEGRA_OPTIMIZATION
if(tegra::useTegra() && tegra::FAST(_img, keypoints, threshold, nonmax_suppression))
break;
if(tegra::useTegra() && tegra::FAST(_img, keypoints, threshold, nonmax_suppression))
break;
#endif
FAST_t<16>(_img, keypoints, threshold, nonmax_suppression);
break;
}
FAST_t<16>(_img, keypoints, threshold, nonmax_suppression);
break;
}
}

@ -520,7 +520,7 @@ convertScalePyramid(const std::vector<Evolution<MatTypeSrc> >& src, std::vector<
*/
void AKAZEFeatures::Create_Nonlinear_Scale_Space(InputArray image)
{
if (ocl::useOpenCL() && image.isUMat()) {
if (ocl::isOpenCLActivated() && image.isUMat()) {
// will run OCL version of scale space pyramid
UMatPyramid uPyr;
// init UMat pyramid with sizes

@ -771,7 +771,7 @@ void BFMatcher::knnMatchImpl( InputArray _queryDescriptors, std::vector<std::vec
Size trainDescSize = trainDescCollection.empty() ? utrainDescCollection[0].size() : trainDescCollection[0].size();
int trainDescOffset = trainDescCollection.empty() ? (int)utrainDescCollection[0].offset : 0;
if ( ocl::useOpenCL() && _queryDescriptors.isUMat() && _queryDescriptors.dims()<=2 && trainDescVectorSize == 1 &&
if ( ocl::isOpenCLActivated() && _queryDescriptors.isUMat() && _queryDescriptors.dims()<=2 && trainDescVectorSize == 1 &&
_queryDescriptors.type() == CV_32FC1 && _queryDescriptors.offset() == 0 && trainDescOffset == 0 &&
trainDescSize.width == _queryDescriptors.size().width && masks.size() == 1 && masks[0].total() == 0 )
{
@ -919,7 +919,7 @@ void BFMatcher::radiusMatchImpl( InputArray _queryDescriptors, std::vector<std::
Size trainDescSize = trainDescCollection.empty() ? utrainDescCollection[0].size() : trainDescCollection[0].size();
int trainDescOffset = trainDescCollection.empty() ? (int)utrainDescCollection[0].offset : 0;
if ( ocl::useOpenCL() && _queryDescriptors.isUMat() && _queryDescriptors.dims()<=2 && trainDescVectorSize == 1 &&
if ( ocl::isOpenCLActivated() && _queryDescriptors.isUMat() && _queryDescriptors.dims()<=2 && trainDescVectorSize == 1 &&
_queryDescriptors.type() == CV_32FC1 && _queryDescriptors.offset() == 0 && trainDescOffset == 0 &&
trainDescSize.width == _queryDescriptors.size().width && masks.size() == 1 && masks[0].total() == 0 )
{

@ -974,7 +974,7 @@ void ORB_Impl::detectAndCompute( InputArray _image, InputArray _mask,
int descPatchSize = cvCeil(halfPatchSize*sqrt(2.0));
int border = std::max(edgeThreshold, std::max(descPatchSize, HARRIS_BLOCK_SIZE/2))+1;
bool useOCL = ocl::useOpenCL() && OCL_FORCE_CHECK(_image.isUMat() || _descriptors.isUMat());
bool useOCL = ocl::isOpenCLActivated() && OCL_FORCE_CHECK(_image.isUMat() || _descriptors.isUMat());
Mat image = _image.getMat(), mask = _mask.getMat();
if( image.type() != CV_8UC1 )

@ -360,7 +360,7 @@ namespace
CV_Assert( _src.type() == CV_8UC1 || _src.type() == CV_16UC1 );
#ifdef HAVE_OPENCL
bool useOpenCL = cv::ocl::useOpenCL() && _src.isUMat() && _src.dims()<=2 && _src.type() == CV_8UC1;
bool useOpenCL = cv::ocl::isOpenCLActivated() && _src.isUMat() && _src.dims()<=2 && _src.type() == CV_8UC1;
#endif
int histSize = _src.type() == CV_8UC1 ? 256 : 65536;

@ -435,7 +435,7 @@ void cv::Sobel( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy,
CV_OVX_RUN(true,
openvx_sobel(_src, _dst, dx, dy, ksize, scale, delta, borderType))
CV_IPP_RUN(!(ocl::useOpenCL() && _dst.isUMat()), ipp_Deriv(_src, _dst, dx, dy, ksize, scale, delta, borderType));
CV_IPP_RUN(!(ocl::isOpenCLActivated() && _dst.isUMat()), ipp_Deriv(_src, _dst, dx, dy, ksize, scale, delta, borderType));
int ktype = std::max(CV_32F, std::max(ddepth, sdepth));
@ -479,7 +479,7 @@ void cv::Scharr( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy,
}
#endif
CV_IPP_RUN(!(ocl::useOpenCL() && _dst.isUMat()), ipp_Deriv(_src, _dst, dx, dy, 0, scale, delta, borderType));
CV_IPP_RUN(!(ocl::isOpenCLActivated() && _dst.isUMat()), ipp_Deriv(_src, _dst, dx, dy, 0, scale, delta, borderType));
int ktype = std::max(CV_32F, std::max(ddepth, sdepth));
@ -795,7 +795,7 @@ void cv::Laplacian( InputArray _src, OutputArray _dst, int ddepth, int ksize,
ocl_Laplacian3_8UC1(_src, _dst, ddepth, kernel, delta, borderType));
}
CV_IPP_RUN(!(cv::ocl::useOpenCL() && _dst.isUMat()), ipp_Laplacian(_src, _dst, ksize, scale, delta, borderType));
CV_IPP_RUN(!(cv::ocl::isOpenCLActivated() && _dst.isUMat()), ipp_Laplacian(_src, _dst, ksize, scale, delta, borderType));
#ifdef HAVE_TEGRA_OPTIMIZATION

@ -2102,7 +2102,7 @@ void cv::GaussianBlur( InputArray _src, OutputArray _dst, Size ksize,
if(sigma1 == 0 && sigma2 == 0 && tegra::useTegra() && tegra::gaussian(src, dst, ksize, borderType))
return;
#endif
bool useOpenCL = (ocl::useOpenCL() && _dst.isUMat() && _src.dims() <= 2 &&
bool useOpenCL = (ocl::isOpenCLActivated() && _dst.isUMat() && _src.dims() <= 2 &&
((ksize.width == 3 && ksize.height == 3) ||
(ksize.width == 5 && ksize.height == 5)) &&
_src.rows() > ksize.height && _src.cols() > ksize.width);

@ -503,17 +503,8 @@ void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, Output
sqdepth = CV_64F;
sdepth = CV_MAT_DEPTH(sdepth), sqdepth = CV_MAT_DEPTH(sqdepth);
#ifdef HAVE_OPENCL
if (ocl::useOpenCL() && _sum.isUMat() && !_tilted.needed())
{
if (!_sqsum.needed())
{
CV_OCL_RUN(ocl::useOpenCL(), ocl_integral(_src, _sum, sdepth))
}
else if (_sqsum.isUMat())
CV_OCL_RUN(ocl::useOpenCL(), ocl_integral(_src, _sum, _sqsum, sdepth, sqdepth))
}
#endif
CV_OCL_RUN(_sum.isUMat() && !_tilted.needed(),
(_sqsum.needed() ? ocl_integral(_src, _sum, _sqsum, sdepth, sqdepth) : ocl_integral(_src, _sum, sdepth)));
Size ssize = _src.size(), isize(ssize.width + 1, ssize.height + 1);
_sum.create( isize, CV_MAKETYPE(sdepth, cn) );

@ -607,7 +607,7 @@ bool HaarEvaluator::read(const FileNode& node, Size _origWinSize)
normrect = Rect(1, 1, origWinSize.width - 2, origWinSize.height - 2);
localSize = lbufSize = Size(0, 0);
if (ocl::haveOpenCL())
if (ocl::isOpenCLActivated())
{
if (ocl::Device::getDefault().isAMD() || ocl::Device::getDefault().isIntel() || ocl::Device::getDefault().isNVidia())
{
@ -802,7 +802,7 @@ bool LBPEvaluator::read( const FileNode& node, Size _origWinSize )
}
nchannels = 1;
localSize = lbufSize = Size(0, 0);
if (ocl::haveOpenCL())
if (ocl::isOpenCLActivated())
localSize = Size(8, 8);
return true;
@ -1306,7 +1306,7 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std::
levelWeights.clear();
#ifdef HAVE_OPENCL
bool use_ocl = tryOpenCL && ocl::useOpenCL() &&
bool use_ocl = tryOpenCL && ocl::isOpenCLActivated() &&
OCL_FORCE_CHECK(_image.isUMat()) &&
featureEvaluator->getLocalSize().area() > 0 &&
(data.minNodesPerTree == data.maxNodesPerTree) &&

@ -478,7 +478,7 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
{
Rect rc(x_tl, y_tl, x_br - x_tl, y_br - y_tl);
#ifdef HAVE_OPENCL
if ( !cv::ocl::useOpenCL() ||
if ( !cv::ocl::isOpenCLActivated() ||
!ocl_MultiBandBlender_feed(src_pyr_laplace[i], weight_pyr_gauss[i],
dst_pyr_laplace_[i](rc), dst_band_weights_[i](rc)) )
#endif
@ -633,7 +633,7 @@ void normalizeUsingWeightMap(InputArray _weight, InputOutputArray _src)
#endif
#ifdef HAVE_OPENCL
if ( !cv::ocl::useOpenCL() ||
if ( !cv::ocl::isOpenCLActivated() ||
!ocl_normalizeUsingWeightMap(_weight, _src) )
#endif
{

@ -42,6 +42,8 @@
#include "precomp.hpp"
#include "opencv2/core/opencl/ocl_defs.hpp"
using namespace cv;
using namespace cv::detail;
using namespace cv::cuda;
@ -194,7 +196,7 @@ void CpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat
Ptr<cv::DescriptorMatcher> matcher;
#if 0 // TODO check this
if (ocl::useOpenCL())
if (ocl::isOpenCLActivated())
{
matcher = makePtr<BFMatcher>((int)NORM_L2);
}
@ -390,10 +392,12 @@ void FeaturesFinder::operator ()(InputArrayOfArrays images, std::vector<ImageFea
bool FeaturesFinder::isThreadSafe() const
{
if (ocl::useOpenCL())
#ifdef HAVE_OPENCL
if (ocl::isOpenCLActivated())
{
return false;
}
#endif
if (dynamic_cast<const SurfFeaturesFinder*>(this))
{
return true;

@ -111,7 +111,7 @@ Rect PlaneWarper::buildMaps(Size src_size, InputArray K, InputArray R, InputArra
_ymap.create(dsize, CV_32FC1);
#ifdef HAVE_OPENCL
if (ocl::useOpenCL())
if (ocl::isOpenCLActivated())
{
ocl::Kernel k("buildWarpPlaneMaps", ocl::stitching::warpers_oclsrc);
if (!k.empty())
@ -365,7 +365,7 @@ void SphericalPortraitWarper::detectResultRoi(Size src_size, Point &dst_tl, Poin
Rect SphericalWarper::buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap)
{
#ifdef HAVE_OPENCL
if (ocl::useOpenCL())
if (ocl::isOpenCLActivated())
{
ocl::Kernel k("buildWarpSphericalMaps", ocl::stitching::warpers_oclsrc);
if (!k.empty())
@ -414,7 +414,7 @@ Point SphericalWarper::warp(InputArray src, InputArray K, InputArray R, int inte
Rect CylindricalWarper::buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap)
{
#ifdef HAVE_OPENCL
if (ocl::useOpenCL())
if (ocl::isOpenCLActivated())
{
ocl::Kernel k("buildWarpCylindricalMaps", ocl::stitching::warpers_oclsrc);
if (!k.empty())

@ -193,7 +193,7 @@ public:
CV_Assert( nmixtures <= 255);
#ifdef HAVE_OPENCL
if (ocl::useOpenCL() && opencl_ON)
if (ocl::isOpenCLActivated() && opencl_ON)
{
create_ocl_apply_kernel();

@ -1226,7 +1226,7 @@ void SparsePyrLKOpticalFlowImpl::calc( InputArray _prevImg, InputArray _nextImg,
{
CV_INSTRUMENT_REGION()
CV_OCL_RUN(ocl::useOpenCL() &&
CV_OCL_RUN(ocl::isOpenCLActivated() &&
(_prevImg.isUMat() || _nextImg.isUMat()) &&
ocl::Image2D::isFormatSupported(CV_32F, 1, false),
ocl_calcOpticalFlowPyrLK(_prevImg, _nextImg, _prevPts, _nextPts, _status, _err))

Loading…
Cancel
Save