From 6a3d925a47d54945eab7b50a769531703cde99a2 Mon Sep 17 00:00:00 2001 From: Joe Howse Date: Mon, 21 Jun 2021 00:46:32 -0300 Subject: [PATCH] OpenCL: core support for FP16, more channel orders * Support cl_image conversion for CL_HALF_FLOAT (float16) * Support cl_image conversion for additional channel orders: CL_A, CL_INTENSITY, CL_LUMINANCE, CL_RG, CL_RA * Comment on why cl_image conversion is unsupported for CL_RGB * Predict optimal vector width for float16 * ocl::kernelToStr: support float16 * ocl::Device::halfFPConfig: drop artificial requirement for OpenCL version >= 1.2. Even OpenCL 1.0 supports the underlying config property, CL_DEVICE_HALF_FP_CONFIG. * dumpOpenCLInformation: provide info on OpenCL half-float support and preferred half-float vector width * randu: support default range [-1.0, 1.0] for float16 * TestBase::warmup: support float16 --- .../opencv2/core/opencl/opencl_info.hpp | 7 +++ modules/core/src/ocl.cpp | 45 ++++++++++++++----- modules/ts/src/ocl_perf.cpp | 2 +- modules/ts/src/ts_perf.cpp | 2 +- 4 files changed, 44 insertions(+), 12 deletions(-) diff --git a/modules/core/include/opencv2/core/opencl/opencl_info.hpp b/modules/core/include/opencv2/core/opencl/opencl_info.hpp index 5e5c846ad0..3ead76e5c4 100644 --- a/modules/core/include/opencv2/core/opencl/opencl_info.hpp +++ b/modules/core/include/opencv2/core/opencl/opencl_info.hpp @@ -144,6 +144,10 @@ static void dumpOpenCLInformation() DUMP_MESSAGE_STDOUT(" Double support = " << doubleSupportStr); DUMP_CONFIG_PROPERTY("cv_ocl_current_haveDoubleSupport", device.doubleFPConfig() > 0); + const char* halfSupportStr = device.halfFPConfig() > 0 ? "Yes" : "No"; + DUMP_MESSAGE_STDOUT(" Half support = " << halfSupportStr); + DUMP_CONFIG_PROPERTY("cv_ocl_current_haveHalfSupport", device.halfFPConfig() > 0); + const char* isUnifiedMemoryStr = device.hostUnifiedMemory() ? "Yes" : "No"; DUMP_MESSAGE_STDOUT(" Host unified memory = " << isUnifiedMemoryStr); DUMP_CONFIG_PROPERTY("cv_ocl_current_hostUnifiedMemory", device.hostUnifiedMemory()); @@ -191,6 +195,9 @@ static void dumpOpenCLInformation() DUMP_MESSAGE_STDOUT(" Preferred vector width double = " << device.preferredVectorWidthDouble()); DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthDouble", device.preferredVectorWidthDouble()); + + DUMP_MESSAGE_STDOUT(" Preferred vector width half = " << device.preferredVectorWidthHalf()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthHalf", device.preferredVectorWidthHalf()); } catch (...) { diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 0e97cf52fe..46185446f7 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -1566,6 +1566,7 @@ struct Device::Impl version_ = getStrProp(CL_DEVICE_VERSION); extensions_ = getStrProp(CL_DEVICE_EXTENSIONS); doubleFPConfig_ = getProp(CL_DEVICE_DOUBLE_FP_CONFIG); + halfFPConfig_ = getProp(CL_DEVICE_HALF_FP_CONFIG); hostUnifiedMemory_ = getBoolProp(CL_DEVICE_HOST_UNIFIED_MEMORY); maxComputeUnits_ = getProp(CL_DEVICE_MAX_COMPUTE_UNITS); maxWorkGroupSize_ = getProp(CL_DEVICE_MAX_WORK_GROUP_SIZE); @@ -1678,6 +1679,7 @@ struct Device::Impl String version_; std::string extensions_; int doubleFPConfig_; + int halfFPConfig_; bool hostUnifiedMemory_; int maxComputeUnits_; size_t maxWorkGroupSize_; @@ -1827,11 +1829,7 @@ int Device::singleFPConfig() const { return p ? p->getProp(CL_DEVICE_SINGLE_FP_CONFIG) : 0; } int Device::halfFPConfig() const -#ifdef CL_VERSION_1_2 -{ return p ? p->getProp(CL_DEVICE_HALF_FP_CONFIG) : 0; } -#else -{ CV_REQUIRE_OPENCL_1_2_ERROR; } -#endif +{ return p ? p->halfFPConfig_ : 0; } bool Device::endianLittle() const { return p ? p->getBoolProp(CL_DEVICE_ENDIAN_LITTLE) : false; } @@ -6668,6 +6666,10 @@ void convertFromImage(void* cl_mem_image, UMat& dst) depth = CV_32F; break; + case CL_HALF_FLOAT: + depth = CV_16F; + break; + default: CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_data_type"); } @@ -6676,9 +6678,23 @@ void convertFromImage(void* cl_mem_image, UMat& dst) switch (fmt.image_channel_order) { case CL_R: + case CL_A: + case CL_INTENSITY: + case CL_LUMINANCE: type = CV_MAKE_TYPE(depth, 1); break; + case CL_RG: + case CL_RA: + type = CV_MAKE_TYPE(depth, 2); + break; + + // CL_RGB has no mappings to OpenCV types because CL_RGB can only be used with + // CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, or CL_UNORM_INT_101010. + /*case CL_RGB: + type = CV_MAKE_TYPE(depth, 3); + break;*/ + case CL_RGBA: case CL_BGRA: case CL_ARGB: @@ -7068,6 +7084,13 @@ static std::string kerToStr(const Mat & k) stream << "DIG(" << data[i] << "f)"; stream << "DIG(" << data[width] << "f)"; } + else if (depth == CV_16F) + { + stream.setf(std::ios_base::showpoint); + for (int i = 0; i < width; ++i) + stream << "DIG(" << (float)data[i] << "h)"; + stream << "DIG(" << (float)data[width] << "h)"; + } else { for (int i = 0; i < width; ++i) @@ -7091,7 +7114,7 @@ String kernelToStr(InputArray _kernel, int ddepth, const char * name) typedef std::string (* func_t)(const Mat &); static const func_t funcs[] = { kerToStr, kerToStr, kerToStr, kerToStr, - kerToStr, kerToStr, kerToStr, 0 }; + kerToStr, kerToStr, kerToStr, kerToStr }; const func_t func = funcs[ddepth]; CV_Assert(func != 0); @@ -7130,14 +7153,14 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3, int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(), d.preferredVectorWidthShort(), d.preferredVectorWidthShort(), d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(), - d.preferredVectorWidthDouble(), -1 }; + d.preferredVectorWidthDouble(), d.preferredVectorWidthHalf() }; // if the device says don't use vectors if (vectorWidths[0] == 1) { // it's heuristic vectorWidths[CV_8U] = vectorWidths[CV_8S] = 4; - vectorWidths[CV_16U] = vectorWidths[CV_16S] = 2; + vectorWidths[CV_16U] = vectorWidths[CV_16S] = vectorWidths[CV_16F] = 2; vectorWidths[CV_32S] = vectorWidths[CV_32F] = vectorWidths[CV_64F] = 1; } @@ -7225,10 +7248,12 @@ struct Image2D::Impl { cl_image_format format; static const int channelTypes[] = { CL_UNSIGNED_INT8, CL_SIGNED_INT8, CL_UNSIGNED_INT16, - CL_SIGNED_INT16, CL_SIGNED_INT32, CL_FLOAT, -1, -1 }; + CL_SIGNED_INT16, CL_SIGNED_INT32, CL_FLOAT, -1, CL_HALF_FLOAT }; static const int channelTypesNorm[] = { CL_UNORM_INT8, CL_SNORM_INT8, CL_UNORM_INT16, CL_SNORM_INT16, -1, -1, -1, -1 }; - static const int channelOrders[] = { -1, CL_R, CL_RG, -1, CL_RGBA }; + // CL_RGB has no mappings to OpenCV types because CL_RGB can only be used with + // CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, or CL_UNORM_INT_101010. + static const int channelOrders[] = { -1, CL_R, CL_RG, /*CL_RGB*/ -1, CL_RGBA }; int channelType = norm ? channelTypesNorm[depth] : channelTypes[depth]; int channelOrder = channelOrders[cn]; diff --git a/modules/ts/src/ocl_perf.cpp b/modules/ts/src/ocl_perf.cpp index 8dacf219f6..fe521f2c00 100644 --- a/modules/ts/src/ocl_perf.cpp +++ b/modules/ts/src/ocl_perf.cpp @@ -70,7 +70,7 @@ void randu(InputOutputArray dst) cv::randu(dst, -128, 128); else if (dst.depth() == CV_16U) cv::randu(dst, 0, 1024); - else if (dst.depth() == CV_32F || dst.depth() == CV_64F) + else if (dst.depth() == CV_32F || dst.depth() == CV_64F || dst.depth() == CV_16F) cv::randu(dst, -1.0, 1.0); else if (dst.depth() == CV_16S || dst.depth() == CV_32S) cv::randu(dst, -4096, 4096); diff --git a/modules/ts/src/ts_perf.cpp b/modules/ts/src/ts_perf.cpp index 2a9169fd13..5a42ca01cd 100644 --- a/modules/ts/src/ts_perf.cpp +++ b/modules/ts/src/ts_perf.cpp @@ -1297,7 +1297,7 @@ void TestBase::warmup(cv::InputOutputArray a, WarmUpType wtype) cv::randu(a, -128, 128); else if (depth == CV_16U) cv::randu(a, 0, 1024); - else if (depth == CV_32F || depth == CV_64F) + else if (depth == CV_32F || depth == CV_64F || depth == CV_16F) cv::randu(a, -1.0, 1.0); else if (depth == CV_16S || depth == CV_32S) cv::randu(a, -4096, 4096);