From 54063c40de54d3dc63e4ef6183dd25be51e56812 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 2 Apr 2020 10:10:57 +0000 Subject: [PATCH] core(ocl): options to control buffer access flags - control using of clEnqueueMapBuffer or clEnqueueReadBuffer[Rect] - added benchmarks with OpenCL buffer access use cases --- modules/core/perf/opencl/perf_matop.cpp | 135 ++++++++++++++++++++++++ modules/core/src/ocl.cpp | 21 +++- 2 files changed, 155 insertions(+), 1 deletion(-) diff --git a/modules/core/perf/opencl/perf_matop.cpp b/modules/core/perf/opencl/perf_matop.cpp index 97a881b872..5be1f431f6 100644 --- a/modules/core/perf/opencl/perf_matop.cpp +++ b/modules/core/perf/opencl/perf_matop.cpp @@ -146,6 +146,141 @@ OCL_PERF_TEST_P(CopyToFixture, CopyToWithMaskUninit, SANITY_CHECK(dst); } + + +enum ROIType { + ROI_FULL, + ROI_2_RECT, + ROI_2_TOP, // contiguous memory block + ROI_2_LEFT, + ROI_4, + ROI_16, +}; +static Rect getROI(enum ROIType t, const Size& sz) +{ + switch (t) + { + case ROI_FULL: return Rect(0, 0, sz.width, sz.height); + case ROI_2_RECT: return Rect(0, 0, sz.width * 71 / 100, sz.height * 71 / 100); // 71 = sqrt(1/2) * 100 + case ROI_2_TOP: return Rect(0, 0, sz.width, sz.height / 2); // 71 = sqrt(1/2) * 100 + case ROI_2_LEFT: return Rect(0, 0, sz.width / 2, sz.height); // 71 = sqrt(1/2) * 100 + case ROI_4: return Rect(0, 0, sz.width / 2, sz.height / 2); + case ROI_16: return Rect(0, 0, sz.width / 4, sz.height / 4); + } + CV_Assert(false); +} + +typedef TestBaseWithParam< tuple > OpenCLBuffer; + +static inline void PrintTo(const tuple& v, std::ostream* os) +{ + *os << "(" << get<0>(v) << ", " << typeToString(get<1>(v)) << ", "; + enum ROIType roiType = get<2>(v); + if (roiType == ROI_FULL) + *os << "ROI_100_FULL"; + else if (roiType == ROI_2_RECT) + *os << "ROI_050_RECT_HALF"; + else if (roiType == ROI_2_TOP) + *os << "ROI_050_TOP_HALF"; + else if (roiType == ROI_2_LEFT) + *os << "ROI_050_LEFT_HALF"; + else if (roiType == ROI_4) + *os << "ROI_025_1/4"; + else + *os << "ROI_012_1/16"; + *os << ")"; +} + +PERF_TEST_P_(OpenCLBuffer, cpu_write) +{ + const Size srcSize = get<0>(GetParam()); + const int type = get<1>(GetParam()); + const Rect roi = getROI(get<2>(GetParam()), srcSize); + + checkDeviceMaxMemoryAllocSize(srcSize, type); + + UMat src(srcSize, type); + declare.in(src(roi), WARMUP_NONE); + + OCL_TEST_CYCLE() + { + Mat m = src(roi).getMat(ACCESS_WRITE); + m.setTo(Scalar(1, 2, 3, 4)); + } + + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P_(OpenCLBuffer, cpu_read) +{ + const Size srcSize = get<0>(GetParam()); + const int type = get<1>(GetParam()); + const Rect roi = getROI(get<2>(GetParam()), srcSize); + + checkDeviceMaxMemoryAllocSize(srcSize, type); + + UMat src(srcSize, type, Scalar(1, 2, 3, 4)); + declare.in(src(roi), WARMUP_NONE); + + OCL_TEST_CYCLE() + { + unsigned counter = 0; + Mat m = src(roi).getMat(ACCESS_READ); + for (int y = 0; y < m.rows; y++) + { + uchar* ptr = m.ptr(y); + size_t width_bytes = m.cols * m.elemSize(); + for (size_t x_bytes = 0; x_bytes < width_bytes; x_bytes++) + counter += (unsigned)(ptr[x_bytes]); + } + } + + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P_(OpenCLBuffer, cpu_update) +{ + const Size srcSize = get<0>(GetParam()); + const int type = get<1>(GetParam()); + const Rect roi = getROI(get<2>(GetParam()), srcSize); + + checkDeviceMaxMemoryAllocSize(srcSize, type); + + UMat src(srcSize, type, Scalar(1, 2, 3, 4)); + declare.in(src(roi), WARMUP_NONE); + + OCL_TEST_CYCLE() + { + Mat m = src(roi).getMat(ACCESS_READ | ACCESS_WRITE); + for (int y = 0; y < m.rows; y++) + { + uchar* ptr = m.ptr(y); + size_t width_bytes = m.cols * m.elemSize(); + for (size_t x_bytes = 0; x_bytes < width_bytes; x_bytes++) + ptr[x_bytes] += 1; + } + } + + SANITY_CHECK_NOTHING(); +} + +INSTANTIATE_TEST_CASE_P(/*FULL*/, OpenCLBuffer, + testing::Combine( + testing::Values(szVGA, sz720p, sz1080p, sz2160p), + testing::Values(CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4), + testing::Values(ROI_FULL) + ) +); + +INSTANTIATE_TEST_CASE_P(ROI, OpenCLBuffer, + testing::Combine( + testing::Values(sz1080p, sz2160p), + testing::Values(CV_8UC1), + testing::Values(ROI_16, ROI_4, ROI_2_RECT, ROI_2_LEFT, ROI_2_TOP, ROI_FULL) + ) +); + + } } // namespace opencv_test::ocl #endif // HAVE_OPENCL diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 52533484a0..c6b6e2f0f0 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -4607,6 +4607,17 @@ public: return u; } + static bool isOpenCLMapForced() // force clEnqueueMapBuffer / clEnqueueUnmapMemObject OpenCL API + { + static bool value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_BUFFER_FORCE_MAPPING", false); + return value; + } + static bool isOpenCLCopyingForced() // force clEnqueueReadBuffer[Rect] / clEnqueueWriteBuffer[Rect] OpenCL API + { + static bool value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_BUFFER_FORCE_COPYING", false); + return value; + } + void getBestFlags(const Context& ctx, int /*flags*/, UMatUsageFlags usageFlags, int& createFlags, int& flags0) const { const Device& dev = ctx.device(0); @@ -4614,7 +4625,15 @@ public: if ((usageFlags & USAGE_ALLOCATE_HOST_MEMORY) != 0) createFlags |= CL_MEM_ALLOC_HOST_PTR; - if( dev.hostUnifiedMemory() ) + if (!isOpenCLCopyingForced() && + (isOpenCLMapForced() || + (dev.hostUnifiedMemory() +#ifndef __APPLE__ + || dev.isIntel() +#endif + ) + ) + ) flags0 = 0; else flags0 = UMatData::COPY_ON_MAP;