diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index e85bf6fa5b..3133876e44 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -496,6 +496,8 @@ struct CV_EXPORTS UMatData void* handle; void* userdata; int allocatorFlags_; + int mapcount; + UMatData* originalUMatData; }; diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index c0ad8c4ecc..9d9dda3296 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -208,17 +208,14 @@ public: if(!u) return; - CV_Assert(u->urefcount >= 0); - CV_Assert(u->refcount >= 0); - if(u->refcount == 0) + CV_Assert(u->urefcount == 0); + CV_Assert(u->refcount == 0); + if( !(u->flags & UMatData::USER_ALLOCATED) ) { - if( !(u->flags & UMatData::USER_ALLOCATED) ) - { - fastFree(u->origdata); - u->origdata = 0; - } - delete u; + fastFree(u->origdata); + u->origdata = 0; } + delete u; } }; diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 1074f9b011..1345c52e3b 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -4453,8 +4453,11 @@ public: #endif { tempUMatFlags = UMatData::TEMP_UMAT; - handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags, - u->size, u->origdata, &retval); + if (u->origdata == cv::alignPtr(u->origdata, 4)) // There are OpenCL runtime issues for less aligned data + { + handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags, + u->size, u->origdata, &retval); + } if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST)) { handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags, @@ -4510,16 +4513,17 @@ public: if(!u) return; - CV_Assert(u->urefcount >= 0); - CV_Assert(u->refcount >= 0); + CV_Assert(u->urefcount == 0); + CV_Assert(u->refcount == 0 && "UMat deallocation error: some derived Mat is still alive"); - CV_Assert(u->handle != 0 && u->urefcount == 0); + CV_Assert(u->handle != 0); + CV_Assert(u->mapcount == 0); if(u->tempUMat()) { CV_Assert(u->origdata); // UMatDataAutoLock lock(u); - if( u->hostCopyObsolete() && u->refcount > 0 ) + if (u->hostCopyObsolete()) { #ifdef HAVE_OPENCL_SVM if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) @@ -4572,16 +4576,29 @@ public: else { cl_int retval = 0; - void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE, - (CL_MAP_READ | CL_MAP_WRITE), - 0, u->size, 0, 0, 0, &retval); - CV_OclDbgAssert(retval == CL_SUCCESS); - CV_OclDbgAssert(clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0) == CL_SUCCESS); - CV_OclDbgAssert(clFinish(q) == CL_SUCCESS); + if (u->tempUMat()) + { + CV_Assert(u->mapcount == 0); + void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE, + (CL_MAP_READ | CL_MAP_WRITE), + 0, u->size, 0, 0, 0, &retval); + CV_Assert(u->origdata == data); + CV_OclDbgAssert(retval == CL_SUCCESS); + if (u->originalUMatData) + { + CV_Assert(u->originalUMatData->data == data); + } + CV_OclDbgAssert(clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0) == CL_SUCCESS); + CV_OclDbgAssert(clFinish(q) == CL_SUCCESS); + } } } u->markHostCopyObsolete(false); } + else + { + // nothing + } #ifdef HAVE_OPENCL_SVM if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) { @@ -4607,16 +4624,12 @@ public: if(u->data && u->copyOnMap() && u->data != u->origdata) fastFree(u->data); u->data = u->origdata; - if(u->refcount == 0) - { - u->currAllocator->deallocate(u); - u = NULL; - } + u->currAllocator->deallocate(u); + u = NULL; } else { CV_Assert(u->origdata == NULL); - CV_Assert(u->refcount == 0); if(u->data && u->copyOnMap() && u->data != u->origdata) { fastFree(u->data); @@ -4665,17 +4678,13 @@ public: delete u; u = NULL; } - CV_Assert(u == NULL || u->refcount); + CV_Assert(u == NULL); } + // synchronized call (external UMatDataAutoLock, see UMat::getMat) void map(UMatData* u, int accessFlags) const { - if(!u) - return; - - CV_Assert( u->handle != 0 ); - - UMatDataAutoLock autolock(u); + CV_Assert(u && u->handle); if(accessFlags & ACCESS_WRITE) u->markDeviceCopyObsolete(true); @@ -4715,11 +4724,16 @@ public: } #endif - cl_int retval = 0; - u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE, - (CL_MAP_READ | CL_MAP_WRITE), - 0, u->size, 0, 0, 0, &retval); - if(u->data && retval == CL_SUCCESS) + cl_int retval = CL_SUCCESS; + if (!u->deviceMemMapped()) + { + CV_Assert(u->refcount == 1); + CV_Assert(u->mapcount++ == 0); + u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE, + (CL_MAP_READ | CL_MAP_WRITE), + 0, u->size, 0, 0, 0, &retval); + } + if (u->data && retval == CL_SUCCESS) { u->markHostCopyObsolete(false); u->markDeviceMemMapped(true); @@ -4765,7 +4779,6 @@ public: if( !u->copyOnMap() && u->deviceMemMapped() ) { CV_Assert(u->data != NULL); - u->markDeviceMemMapped(false); #ifdef HAVE_OPENCL_SVM if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) { @@ -4792,16 +4805,21 @@ public: return; } #endif - CV_Assert( (retval = clEnqueueUnmapMemObject(q, - (cl_mem)u->handle, u->data, 0, 0, 0)) == CL_SUCCESS ); - if (Device::getDefault().isAMD()) - { - // required for multithreaded applications (see stitching test) - CV_OclDbgAssert(clFinish(q) == CL_SUCCESS); - } - if (u->refcount == 0) + { + CV_Assert(u->mapcount-- == 1); + CV_Assert((retval = clEnqueueUnmapMemObject(q, + (cl_mem)u->handle, u->data, 0, 0, 0)) == CL_SUCCESS); + if (Device::getDefault().isAMD()) + { + // required for multithreaded applications (see stitching test) + CV_OclDbgAssert(clFinish(q) == CL_SUCCESS); + } + u->markDeviceMemMapped(false); u->data = 0; + u->markDeviceCopyObsolete(false); + u->markHostCopyObsolete(true); + } } else if( u->copyOnMap() && u->deviceCopyObsolete() ) { @@ -4811,9 +4829,9 @@ public: #endif CV_Assert( (retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0)) == CL_SUCCESS ); + u->markDeviceCopyObsolete(false); + u->markHostCopyObsolete(true); } - u->markDeviceCopyObsolete(false); - u->markHostCopyObsolete(true); } bool checkContinuous(int dims, const size_t sz[], diff --git a/modules/core/src/umatrix.cpp b/modules/core/src/umatrix.cpp index ad7efae16d..7bfeff1df1 100644 --- a/modules/core/src/umatrix.cpp +++ b/modules/core/src/umatrix.cpp @@ -60,25 +60,71 @@ static Mutex umatLocks[UMAT_NLOCKS]; UMatData::UMatData(const MatAllocator* allocator) { prevAllocator = currAllocator = allocator; - urefcount = refcount = 0; + urefcount = refcount = mapcount = 0; data = origdata = 0; size = 0; flags = 0; handle = 0; userdata = 0; allocatorFlags_ = 0; + originalUMatData = NULL; } UMatData::~UMatData() { prevAllocator = currAllocator = 0; urefcount = refcount = 0; + CV_Assert(mapcount == 0); data = origdata = 0; size = 0; flags = 0; handle = 0; userdata = 0; allocatorFlags_ = 0; + if (originalUMatData) + { + UMatData* u = originalUMatData; + CV_XADD(&(u->urefcount), -1); + CV_XADD(&(u->refcount), -1); + bool showWarn = false; + if (u->refcount == 0) + { + if (u->urefcount > 0) + showWarn = true; + // simulate Mat::deallocate + if (u->mapcount != 0) + { + (u->currAllocator ? u->currAllocator : /* TODO allocator ? allocator :*/ Mat::getStdAllocator())->unmap(u); + } + else + { + // we don't do "map", so we can't do "unmap" + } + } + if (u->refcount == 0 && u->urefcount == 0) // oops, we need to free resources + { + showWarn = true; + // simulate UMat::deallocate + u->currAllocator->deallocate(u); + } +#ifndef NDEBUG + if (showWarn) + { + static int warn_message_showed = 0; + if (warn_message_showed++ < 100) + { + fflush(stdout); + fprintf(stderr, "\n! OPENCV warning: getUMat()/getMat() call chain possible problem." + "\n! Base object is dead, while nested/derived object is still alive or processed." + "\n! Please check lifetime of UMat/Mat objects!\n"); + fflush(stderr); + } + } +#else + (void)showWarn; +#endif + originalUMatData = NULL; + } } void UMatData::lock() @@ -221,19 +267,34 @@ UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const UMat hdr; if(!data) return hdr; + Size wholeSize; + Point ofs; + locateROI(wholeSize, ofs); + Size sz(cols, rows); + if (ofs.x != 0 || ofs.y != 0) + { + Mat src = *this; + int dtop = ofs.y; + int dbottom = wholeSize.height - src.rows - ofs.y; + int dleft = ofs.x; + int dright = wholeSize.width - src.cols - ofs.x; + src.adjustROI(dtop, dbottom, dleft, dright); + return src.getUMat(accessFlags, usageFlags)(cv::Rect(ofs.x, ofs.y, sz.width, sz.height)); + } + CV_Assert(data == datastart); + accessFlags |= ACCESS_RW; - UMatData* temp_u = u; - if(!temp_u) + UMatData* new_u = NULL; { MatAllocator *a = allocator, *a0 = getStdAllocator(); if(!a) a = a0; - temp_u = a->allocate(dims, size.p, type(), data, step.p, accessFlags, usageFlags); + new_u = a->allocate(dims, size.p, type(), data, step.p, accessFlags, usageFlags); } bool allocated = false; try { - allocated = UMat::getStdAllocator()->allocate(temp_u, accessFlags, usageFlags); + allocated = UMat::getStdAllocator()->allocate(new_u, accessFlags, usageFlags); } catch (const cv::Exception& e) { @@ -241,14 +302,26 @@ UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const } if (!allocated) { - allocated = getStdAllocator()->allocate(temp_u, accessFlags, usageFlags); + allocated = getStdAllocator()->allocate(new_u, accessFlags, usageFlags); CV_Assert(allocated); } + if (u != NULL) + { +#ifdef HAVE_OPENCL + if (ocl::useOpenCL() && new_u->currAllocator == ocl::getOpenCLAllocator()) + { + CV_Assert(new_u->tempUMat()); + } +#endif + new_u->originalUMatData = u; + CV_XADD(&(u->refcount), 1); + CV_XADD(&(u->urefcount), 1); + } hdr.flags = flags; setSize(hdr, dims, size.p, step.p); finalizeHdr(hdr); - hdr.u = temp_u; - hdr.offset = data - datastart; + hdr.u = new_u; + hdr.offset = 0; //data - datastart; hdr.addref(); return hdr; } @@ -639,16 +712,25 @@ Mat UMat::getMat(int accessFlags) const return Mat(); // TODO Support ACCESS_READ (ACCESS_WRITE) without unnecessary data transfers accessFlags |= ACCESS_RW; - u->currAllocator->map(u, accessFlags); - CV_Assert(u->data != 0); - Mat hdr(dims, size.p, type(), u->data + offset, step.p); - hdr.flags = flags; - hdr.u = u; - hdr.datastart = u->data; - hdr.data = u->data + offset; - hdr.datalimit = hdr.dataend = u->data + u->size; - CV_XADD(&hdr.u->refcount, 1); - return hdr; + UMatDataAutoLock autolock(u); + if(CV_XADD(&u->refcount, 1) == 0) + u->currAllocator->map(u, accessFlags); + if (u->data != 0) + { + Mat hdr(dims, size.p, type(), u->data + offset, step.p); + hdr.flags = flags; + hdr.u = u; + hdr.datastart = u->data; + hdr.data = u->data + offset; + hdr.datalimit = hdr.dataend = u->data + u->size; + return hdr; + } + else + { + CV_XADD(&u->refcount, -1); + CV_Assert(u->data != 0 && "Error mapping of UMat to host memory."); + return Mat(); + } } void* UMat::handle(int accessFlags) const @@ -656,10 +738,10 @@ void* UMat::handle(int accessFlags) const if( !u ) return 0; - // check flags: if CPU copy is newer, copy it back to GPU. - if( u->deviceCopyObsolete() ) + CV_Assert(u->refcount == 0); + CV_Assert(!u->deviceCopyObsolete() || u->copyOnMap()); + if (u->deviceCopyObsolete()) { - CV_Assert(u->refcount == 0 || u->origdata); u->currAllocator->unmap(u); } diff --git a/modules/core/test/test_umat.cpp b/modules/core/test/test_umat.cpp index 56a7a18172..bdd38a0a12 100644 --- a/modules/core/test/test_umat.cpp +++ b/modules/core/test/test_umat.cpp @@ -243,9 +243,11 @@ TEST_P(UMatBasicTests, GetUMat) EXPECT_MAT_NEAR(ub, ua, 0); } { - Mat b; - b = a.getUMat(ACCESS_RW).getMat(ACCESS_RW); - EXPECT_MAT_NEAR(b, a, 0); + UMat u = a.getUMat(ACCESS_RW); + { + Mat b = u.getMat(ACCESS_RW); + EXPECT_MAT_NEAR(b, a, 0); + } } { Mat b; @@ -253,13 +255,15 @@ TEST_P(UMatBasicTests, GetUMat) EXPECT_MAT_NEAR(b, a, 0); } { - UMat ub; - ub = ua.getMat(ACCESS_RW).getUMat(ACCESS_RW); - EXPECT_MAT_NEAR(ub, ua, 0); + Mat m = ua.getMat(ACCESS_RW); + { + UMat ub = m.getUMat(ACCESS_RW); + EXPECT_MAT_NEAR(ub, ua, 0); + } } } -INSTANTIATE_TEST_CASE_P(UMat, UMatBasicTests, Combine(testing::Values(CV_8U), testing::Values(1, 2), +INSTANTIATE_TEST_CASE_P(UMat, UMatBasicTests, Combine(testing::Values(CV_8U, CV_64F), testing::Values(1, 2), testing::Values(cv::Size(1, 1), cv::Size(1, 128), cv::Size(128, 1), cv::Size(128, 128), cv::Size(640, 480)), Bool())); //////////////////////////////////////////////////////////////// Reshape //////////////////////////////////////////////////////////////////////// @@ -1080,7 +1084,7 @@ TEST(UMat, unmap_in_class) Mat dst; m.convertTo(dst, CV_32FC1); // some additional CPU-based per-pixel processing into dst - intermediateResult = dst.getUMat(ACCESS_READ); + intermediateResult = dst.getUMat(ACCESS_READ); // this violates lifetime of base(dst) / derived (intermediateResult) objects. Use copyTo? std::cout << "data processed..." << std::endl; } // problem is here: dst::~Mat() std::cout << "leave ProcessData()" << std::endl; @@ -1268,5 +1272,61 @@ TEST(UMat, DISABLED_Test_same_behaviour_write_and_write) ASSERT_TRUE(exceptionDetected); // data race } +TEST(UMat, mat_umat_sync) +{ + UMat u(10, 10, CV_8UC1, Scalar(1)); + { + Mat m = u.getMat(ACCESS_RW).reshape(1); + m.setTo(Scalar(255)); + } + + UMat uDiff; + compare(u, 255, uDiff, CMP_NE); + ASSERT_EQ(0, countNonZero(uDiff)); +} + +TEST(UMat, testTempObjects_UMat) +{ + UMat u(10, 10, CV_8UC1, Scalar(1)); + { + UMat u2 = u.getMat(ACCESS_RW).getUMat(ACCESS_RW); + u2.setTo(Scalar(255)); + } + + UMat uDiff; + compare(u, 255, uDiff, CMP_NE); + ASSERT_EQ(0, countNonZero(uDiff)); +} + +TEST(UMat, testTempObjects_Mat) +{ + Mat m(10, 10, CV_8UC1, Scalar(1)); + { + Mat m2; + ASSERT_ANY_THROW(m2 = m.getUMat(ACCESS_RW).getMat(ACCESS_RW)); + } +} + +TEST(UMat, testWrongLifetime_UMat) +{ + UMat u(10, 10, CV_8UC1, Scalar(1)); + { + UMat u2 = u.getMat(ACCESS_RW).getUMat(ACCESS_RW); + u.release(); // base object + u2.release(); // derived object, should show warning message + } +} + +TEST(UMat, testWrongLifetime_Mat) +{ + Mat m(10, 10, CV_8UC1, Scalar(1)); + { + UMat u = m.getUMat(ACCESS_RW); + Mat m2 = u.getMat(ACCESS_RW); + m.release(); // base object + m2.release(); // map of derived object + u.release(); // derived object, should show warning message + } +} } } // namespace cvtest::ocl diff --git a/modules/video/src/lkpyramid.cpp b/modules/video/src/lkpyramid.cpp index 43dde7df78..a34e73d519 100644 --- a/modules/video/src/lkpyramid.cpp +++ b/modules/video/src/lkpyramid.cpp @@ -1009,7 +1009,7 @@ namespace cv idxArg = kernel.set(idxArg, (int)winSize.height); // int c_winSize_y idxArg = kernel.set(idxArg, (int)iters); // int c_iters idxArg = kernel.set(idxArg, (char)calcErr); //char calcErr - return kernel.run(2, globalThreads, localThreads, false); + return kernel.run(2, globalThreads, localThreads, true); // sync=true because ocl::Image2D lifetime is not handled well for temp UMat } private: inline static bool isDeviceCPU()