Merge pull request #10223 from vpisarev:ocl_mac_fixes

* fixed OpenCL functions on Mac, so that the tests pass

* fixed compile warnings; temporarily disabled OCL branch of TV L1 optical flow on mac

* fixed other few warnings on macos
pull/10230/head
Vadim Pisarevsky 7 years ago committed by Alexander Alekhin
parent a3ec2ac3c5
commit 5ce38e516e
  1. 87
      modules/core/src/ocl.cpp
  2. 5
      modules/core/src/stat.cpp
  3. 14
      modules/imgproc/src/morph.cpp
  4. 2
      modules/video/src/tvl1flow.cpp

@ -4172,13 +4172,13 @@ protected:
size_t step_;
public:
AlignedDataPtr2D(uchar* ptr, size_t rows, size_t cols, size_t step, size_t alignment)
AlignedDataPtr2D(uchar* ptr, size_t rows, size_t cols, size_t step, size_t alignment, size_t extrabytes=0)
: size_(rows*step), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL), rows_(rows), cols_(cols), step_(step)
{
CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
if (((size_t)ptr_ & (alignment - 1)) != 0)
if (ptr == 0 || ((size_t)ptr_ & (alignment - 1)) != 0)
{
allocatedPtr_ = new uchar[size_ + alignment - 1];
allocatedPtr_ = new uchar[size_ + extrabytes + alignment - 1];
ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
if (readAccess)
{
@ -4978,6 +4978,25 @@ public:
CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0));
}
#ifdef __APPLE__
else
{
const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
size_t new_srcrawofs = srcrawofs & ~(padding-1);
size_t membuf_ofs = srcrawofs - new_srcrawofs;
AlignedDataPtr2D<false, false> alignedPtr(0, new_sz[1], new_srcstep[0], new_srcstep[0],
CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
uchar* ptr = alignedPtr.getAlignedPtr();
CV_Assert(new_srcstep[0] >= new_sz[0]);
total = alignSize(new_srcstep[0]*new_sz[1] + membuf_ofs, padding);
total = std::min(total, u->size - new_srcrawofs);
CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
new_srcrawofs, total, ptr, 0, 0, 0));
for( size_t i = 0; i < new_sz[1]; i++ )
memcpy( (uchar*)dstptr + i*new_dststep[0], ptr + i*new_srcstep[0] + membuf_ofs, new_sz[0]);
}
#else
else
{
AlignedDataPtr2D<false, true> alignedPtr((uchar*)dstptr, new_sz[1], new_sz[0], new_dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
@ -4989,6 +5008,7 @@ public:
new_dststep[0], 0,
ptr, 0, 0, 0));
}
#endif
}
}
@ -5095,6 +5115,30 @@ public:
CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
dstrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0));
}
#ifdef __APPLE__
else
{
const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
size_t new_dstrawofs = dstrawofs & ~(padding-1);
size_t membuf_ofs = dstrawofs - new_dstrawofs;
AlignedDataPtr2D<false, false> alignedPtr(0, new_sz[1], new_dststep[0], new_dststep[0],
CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
uchar* ptr = alignedPtr.getAlignedPtr();
CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]);
total = alignSize(new_dststep[0]*new_sz[1] + membuf_ofs, padding);
total = std::min(total, u->size - new_dstrawofs);
/*printf("new_sz0=%d, new_sz1=%d, membuf_ofs=%d, total=%d (%08x), new_dstrawofs=%d (%08x)\n",
(int)new_sz[0], (int)new_sz[1], (int)membuf_ofs,
(int)total, (int)total, (int)new_dstrawofs, (int)new_dstrawofs);*/
CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
new_dstrawofs, total, ptr, 0, 0, 0));
for( size_t i = 0; i < new_sz[1]; i++ )
memcpy( ptr + i*new_dststep[0] + membuf_ofs, (uchar*)srcptr + i*new_srcstep[0], new_sz[0]);
CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
new_dstrawofs, total, ptr, 0, 0, 0));
}
#else
else
{
AlignedDataPtr2D<true, false> alignedPtr((uchar*)srcptr, new_sz[1], new_sz[0], new_srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
@ -5106,6 +5150,7 @@ public:
new_srcstep[0], 0,
ptr, 0, 0, 0));
}
#endif
}
u->markHostCopyObsolete(true);
#ifdef HAVE_OPENCL_SVM
@ -5247,6 +5292,41 @@ public:
CV_OCL_CHECK(retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
srcrawofs, dstrawofs, total, 0, 0, 0));
}
#ifdef __APPLE__
else
{
const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
size_t new_srcrawofs = srcrawofs & ~(padding-1);
size_t srcmembuf_ofs = srcrawofs - new_srcrawofs;
size_t new_dstrawofs = dstrawofs & ~(padding-1);
size_t dstmembuf_ofs = dstrawofs - new_dstrawofs;
AlignedDataPtr2D<false, false> srcBuf(0, new_sz[1], new_srcstep[0], new_srcstep[0],
CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
AlignedDataPtr2D<false, false> dstBuf(0, new_sz[1], new_dststep[0], new_dststep[0],
CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
uchar* srcptr = srcBuf.getAlignedPtr();
uchar* dstptr = dstBuf.getAlignedPtr();
CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]);
size_t src_total = alignSize(new_srcstep[0]*new_sz[1] + srcmembuf_ofs, padding);
src_total = std::min(src_total, src->size - new_srcrawofs);
size_t dst_total = alignSize(new_dststep[0]*new_sz[1] + dstmembuf_ofs, padding);
dst_total = std::min(dst_total, dst->size - new_dstrawofs);
CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)src->handle, CL_TRUE,
new_srcrawofs, src_total, srcptr, 0, 0, 0));
CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)dst->handle, CL_TRUE,
new_dstrawofs, dst_total, dstptr, 0, 0, 0));
for( size_t i = 0; i < new_sz[1]; i++ )
memcpy( dstptr + dstmembuf_ofs + i*new_dststep[0],
srcptr + srcmembuf_ofs + i*new_srcstep[0], new_sz[0]);
CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)dst->handle, CL_TRUE,
new_dstrawofs, dst_total, dstptr, 0, 0, 0));
}
#else
else
{
CV_OCL_CHECK(retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
@ -5255,6 +5335,7 @@ public:
new_dststep[0], 0,
0, 0, 0));
}
#endif
}
if (retval == CL_SUCCESS)
{

@ -3359,6 +3359,11 @@ static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArr
normType &= ~NORM_RELATIVE;
bool normsum = normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR;
#ifdef __APPLE__
if(normType == NORM_L1 && type == CV_16UC3 && !_mask.empty())
return false;
#endif
if (normsum)
{
if (!ocl_sum(_src1, sc1, normType == NORM_L2 || normType == NORM_L2SQR ?

@ -1403,6 +1403,7 @@ void morph(int op, int src_type, int dst_type,
#define ROUNDUP(sz, n) ((sz) + (n) - 1 - (((sz) + (n) - 1) % (n)))
#ifndef __APPLE__
static bool ocl_morph3x3_8UC1( InputArray _src, OutputArray _dst, InputArray _kernel, Point anchor,
int op, int actual_op = -1, InputArray _extraMat = noArray())
{
@ -1628,16 +1629,15 @@ static bool ocl_morphSmall( InputArray _src, OutputArray _dst, InputArray _kerne
}
return kernel.run(2, globalsize, NULL, false);
}
#endif
static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel,
Point anchor, int iterations, int op, int borderType,
const Scalar &, int actual_op = -1, InputArray _extraMat = noArray())
{
const ocl::Device & dev = ocl::Device::getDefault();
int type = _src.type(), depth = CV_MAT_DEPTH(type),
cn = CV_MAT_CN(type), esz = CV_ELEM_SIZE(type);
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
Mat kernel = _kernel.getMat();
Size ksize = !kernel.empty() ? kernel.size() : Size(3, 3), ssize = _src.size();
@ -1664,14 +1664,13 @@ static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel,
iterations = 1;
}
#ifndef __APPLE__
int esz = CV_ELEM_SIZE(type);
// try to use OpenCL kernel adopted for small morph kernel
if (dev.isIntel() && !(dev.type() & ocl::Device::TYPE_CPU) &&
if (dev.isIntel() &&
((ksize.width < 5 && ksize.height < 5 && esz <= 4) ||
(ksize.width == 5 && ksize.height == 5 && cn == 1)) &&
(iterations == 1)
#if defined __APPLE__
&& cn == 1
#endif
)
{
if (ocl_morph3x3_8UC1(_src, _dst, kernel, anchor, op, actual_op, _extraMat))
@ -1680,6 +1679,7 @@ static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel,
if (ocl_morphSmall(_src, _dst, kernel, anchor, borderType, op, actual_op, _extraMat))
return true;
}
#endif
if (iterations == 0 || kernel.rows*kernel.cols == 1)
{

@ -392,9 +392,11 @@ void OpticalFlowDual_TVL1::calc(InputArray _I0, InputArray _I1, InputOutputArray
{
CV_INSTRUMENT_REGION()
#ifndef __APPLE__
CV_OCL_RUN(_flow.isUMat() &&
ocl::Image2D::isFormatSupported(CV_32F, 1, false),
calc_ocl(_I0, _I1, _flow))
#endif
Mat I0 = _I0.getMat();
Mat I1 = _I1.getMat();

Loading…
Cancel
Save