Merge pull request #16608 from vpisarev:fix_mac_ocl_tests

* fixed several problems when running tests on Mac:
* OCL_pyrUp
* OCL_flip
* some basic UMat tests
* histogram badarg test (out of range access)

* retained the storepix fix in ocl_flip only for 16U/16S datatype, where the OpenCL compiler on Mac generates incorrect code

* moved deletion of ACCESS_FAST flag to non-SVM branch (where SVM is shared virtual memory (in OpenCL 2.x), not support vector machine)

* force OpenCL to use read/write for GPU<=>CPU memory transfers on machines with discrete video only on Macs. On Windows/Linux the drivers are seemingly smart enough to implement map/unmap properly (and maybe more efficiently than explicit read/write)
pull/16641/head
Vadim Pisarevsky 5 years ago committed by GitHub
parent 150c29356a
commit 07b475062f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 4
      modules/core/src/copy.cpp
  2. 13
      modules/core/src/ocl.cpp
  3. 24
      modules/core/src/opencl/flip.cl
  4. 23
      modules/imgproc/src/pyramids.cpp
  5. 2
      modules/imgproc/test/test_histograms.cpp

@ -916,9 +916,9 @@ static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode )
kercn = (cn!=3 || flipType == FLIP_ROWS) ? std::max(kercn, cn) : cn;
ocl::Kernel k(kernelName, ocl::core::flip_oclsrc,
format( "-D T=%s -D T1=%s -D cn=%d -D PIX_PER_WI_Y=%d -D kercn=%d",
format( "-D T=%s -D T1=%s -D DEPTH=%d -D cn=%d -D PIX_PER_WI_Y=%d -D kercn=%d",
kercn != cn ? ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)) : ocl::vecopTypeToStr(CV_MAKE_TYPE(depth, kercn)),
kercn != cn ? ocl::typeToStr(depth) : ocl::vecopTypeToStr(depth), cn, pxPerWIy, kercn));
kercn != cn ? ocl::typeToStr(depth) : ocl::vecopTypeToStr(depth), depth, cn, pxPerWIy, kercn));
if (k.empty())
return false;

@ -4705,6 +4705,8 @@ public:
int createFlags = 0, flags0 = 0;
getBestFlags(ctx, accessFlags, usageFlags, createFlags, flags0);
bool copyOnMap = (flags0 & UMatData::COPY_ON_MAP) != 0;
cl_context ctx_handle = (cl_context)ctx.ptr();
int allocatorFlags = 0;
int tempUMatFlags = 0;
@ -4764,8 +4766,15 @@ public:
else
#endif
{
if( copyOnMap )
accessFlags &= ~ACCESS_FAST;
tempUMatFlags = UMatData::TEMP_UMAT;
if (CV_OPENCL_ENABLE_MEM_USE_HOST_PTR
if (
#ifdef __APPLE__
!copyOnMap &&
#endif
CV_OPENCL_ENABLE_MEM_USE_HOST_PTR
// There are OpenCL runtime issues for less aligned data
&& (CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR != 0
&& u->origdata == cv::alignPtr(u->origdata, (int)CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR))
@ -4793,7 +4802,7 @@ public:
u->handle = handle;
u->prevAllocator = u->currAllocator;
u->currAllocator = this;
u->flags |= tempUMatFlags;
u->flags |= tempUMatFlags | flags0;
u->allocatorFlags_ = allocatorFlags;
}
if(accessFlags & ACCESS_WRITE)

@ -42,10 +42,25 @@
#if kercn != 3
#define loadpix(addr) *(__global const T *)(addr)
#define storepix(val, addr) *(__global T *)(addr) = val
#define storepix_2(val0, val1, addr0, addr1) \
*(__global T *)(addr0) = val0; *(__global T *)(addr1) = val1
#define TSIZE (int)sizeof(T)
#else
#define loadpix(addr) vload3(0, (__global const T1 *)(addr))
#define storepix(val, addr) vstore3(val, 0, (__global T1 *)(addr))
#if DEPTH == 2 || DEPTH == 3
#define storepix_2(val0, val1, addr0, addr1) \
((__global T1 *)(addr0))[0] = val0.x; \
((__global T1 *)(addr1))[0] = val1.x; \
((__global T1 *)(addr0))[1] = val0.y; \
((__global T1 *)(addr1))[1] = val1.y; \
((__global T1 *)(addr0))[2] = val0.z; \
((__global T1 *)(addr1))[2] = val1.z
#else
#define storepix_2(val0, val1, addr0, addr1) \
storepix(val0, addr0); \
storepix(val1, addr1)
#endif
#define TSIZE ((int)sizeof(T1)*3)
#endif
@ -69,8 +84,7 @@ __kernel void arithm_flip_rows(__global const uchar * srcptr, int src_step, int
T src0 = loadpix(srcptr + src_index0);
T src1 = loadpix(srcptr + src_index1);
storepix(src1, dstptr + dst_index0);
storepix(src0, dstptr + dst_index1);
storepix_2(src1, src0, dstptr + dst_index0, dstptr + dst_index1);
src_index0 += src_step;
src_index1 -= src_step;
@ -115,8 +129,7 @@ __kernel void arithm_flip_rows_cols(__global const uchar * srcptr, int src_step,
#endif
#endif
storepix(src1, dstptr + dst_index0);
storepix(src0, dstptr + dst_index1);
storepix_2(src1, src0, dstptr + dst_index0, dstptr + dst_index1);
src_index0 += src_step;
src_index1 -= src_step;
@ -161,8 +174,7 @@ __kernel void arithm_flip_cols(__global const uchar * srcptr, int src_step, int
#endif
#endif
storepix(src1, dstptr + dst_index0);
storepix(src0, dstptr + dst_index1);
storepix_2(src1, src0, dstptr + dst_index0, dstptr + dst_index1);
src_index0 += src_step;
src_index1 += src_step;

@ -1078,7 +1078,7 @@ static bool ocl_pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int
UMat dst = _dst.getUMat();
int float_depth = depth == CV_64F ? CV_64F : CV_32F;
const int local_size = 16;
const int local_size = channels == 1 ? 16 : 8;
char cvt[2][50];
String buildOptions = format(
"-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s "
@ -1092,22 +1092,17 @@ static bool ocl_pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int
size_t globalThreads[2] = { (size_t)dst.cols, (size_t)dst.rows };
size_t localThreads[2] = { (size_t)local_size, (size_t)local_size };
ocl::Kernel k;
if (ocl::Device::getDefault().isIntel() && channels == 1)
if (type == CV_8UC1 && src.cols % 2 == 0)
{
if (type == CV_8UC1 && src.cols % 2 == 0)
{
buildOptions.clear();
k.create("pyrUp_cols2", ocl::imgproc::pyramid_up_oclsrc, buildOptions);
globalThreads[0] = dst.cols/4; globalThreads[1] = dst.rows/2;
}
else
{
k.create("pyrUp_unrolled", ocl::imgproc::pyr_up_oclsrc, buildOptions);
globalThreads[0] = dst.cols/2; globalThreads[1] = dst.rows/2;
}
buildOptions.clear();
k.create("pyrUp_cols2", ocl::imgproc::pyramid_up_oclsrc, buildOptions);
globalThreads[0] = dst.cols/4; globalThreads[1] = dst.rows/2;
}
else
k.create("pyrUp", ocl::imgproc::pyr_up_oclsrc, buildOptions);
{
k.create("pyrUp_unrolled", ocl::imgproc::pyr_up_oclsrc, buildOptions);
globalThreads[0] = dst.cols/2; globalThreads[1] = dst.rows/2;
}
if (k.empty())
return false;

@ -1966,7 +1966,7 @@ TEST(Imgproc_Hist_Calc, badarg)
Mat img = cv::Mat::zeros(10, 10, CV_8UC1);
Mat imgInt = cv::Mat::zeros(10, 10, CV_32SC1);
Mat hist;
const int hist_size[] = { 100 };
const int hist_size[] = { 100, 100 };
// base run
EXPECT_NO_THROW(cv::calcHist(&img, 1, channels, noArray(), hist, 1, hist_size, ranges, true));
// bad parameters

Loading…
Cancel
Save