From 8c2dd5fb9a4e568466f92db5d954d5ddc351bc9d Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 8 Oct 2021 00:12:14 +0000 Subject: [PATCH] dnn(ocl4dnn): cleanup dead code, improve logging --- modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp | 2 - .../src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp | 100 ++++++------------ modules/dnn/src/opencl/conv_layer_spatial.cl | 39 ++----- 3 files changed, 42 insertions(+), 99 deletions(-) diff --git a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp index d6fb83becb..bf5fba71a1 100644 --- a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp +++ b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp @@ -222,8 +222,6 @@ class OCL4DNNConvSpatial bool createDWConvKernel(int32_t blockWidth, int32_t blockHeight, int32_t blockDepth); - void CreateSubBuffer(const UMat& buffer, UMat& sub_buffer, - int32_t offset, int32_t size, bool write_only); bool convolve(const UMat &bottom, UMat &top, const UMat &weight, const UMat &bias, int32_t numImages, diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp index 45bd249e5d..6c468fc7da 100644 --- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp +++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp @@ -218,14 +218,7 @@ OCL4DNNConvSpatial::OCL4DNNConvSpatial(OCL4DNNConvConfig config) #endif if (!use_cache_path_) { - static int warn_ = 0; - if (!warn_) - { - std::cerr - << "OpenCV(ocl4dnn): Kernel configuration cache directory doesn't exist: " << cache_path_ << std::endl - << std::endl; - warn_ = true; - } + CV_LOG_ONCE_ERROR(NULL, "OpenCV(ocl4dnn): Kernel configuration cache directory doesn't exist: " << cache_path_); } } @@ -418,7 +411,6 @@ void OCL4DNNConvSpatial::setupKernelDetails(int32_t kernelType, addDef("CHANNELS", channels_ / group_); addDef("APPLY_BIAS", bias_term_); addDef("OUTPUT_Z", M_); - addDef("ZPAR", 1); setFusionDefine(fused_activ_, fused_eltwise_); src_ = cv::ocl::dnn::conv_layer_spatial_oclsrc; @@ -672,8 +664,7 @@ void interleaveMatrix(Dtype* mem_dst, const Dtype *mem, int r, int c, int interleavedRows, int nonInterleavedRows, int blockWidth, int rowAlignment ) { - CHECK_EQ(interleavedRows % 2, 0) << - "interleaveMatrix only supports even values for interleavedRows."; + CV_Check(interleavedRows, interleavedRows % 2 == 0, "interleaveMatrix only supports even values for interleavedRows."); size_t memSize = r * c * sizeof(float); size_t dstSize = memSize * @@ -685,9 +676,12 @@ void interleaveMatrix(Dtype* mem_dst, const Dtype *mem, const int yStride = c * 2; const Dtype *pSrc = mem; Dtype* pDst = mem_dst; - for (int y = 0; y < r;) { - for (int rows = 0; rows < interleavedRows; rows += 2) { - if ( y >= r ) break; + for (int y = 0; y < r;) + { + for (int rows = 0; rows < interleavedRows; rows += 2) + { + if (y >= r) + break; if ((c % xStride) == 0) { for (int x = 0; x < c / xStride; x++) { memcpy(pDst + x * xStride * 2, // NOLINT @@ -712,11 +706,14 @@ void interleaveMatrix(Dtype* mem_dst, const Dtype *mem, y += 2; } - for (int rows = 0; rows < nonInterleavedRows; rows++) { - if (y >= r) break; + for (int rows = 0; rows < nonInterleavedRows; rows++) + { + if (y >= r) + break; const int stride = rowAlignment; int remaining = c; - for (int x = 0; x < c; x += stride) { + for (int x = 0; x < c; x += stride) + { if (remaining >= stride) { memcpy(pDst + x * 2, pSrc + x, stride * sizeof(Dtype)); // NOLINT remaining -=stride; @@ -852,34 +849,6 @@ bool OCL4DNNConvSpatial::createBasicKernel(int32_t blockWidth, return false; } -template<> -void OCL4DNNConvSpatial::CreateSubBuffer(const UMat& buffer, UMat& sub_buffer, - int32_t offset, int32_t size, bool write_only) -{ - cl_mem sub_mem; - cl_buffer_region region; - cl_int err; - size_t element_size = (use_half_) ? sizeof(short) : sizeof(float); - - region.origin = offset * element_size + buffer.offset; - region.size = size * element_size; - sub_mem = clCreateSubBuffer((cl_mem)buffer.handle(ACCESS_READ), - write_only ? CL_MEM_WRITE_ONLY : CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err); - if (err) - { - std::cout << "Failed to create sub buffer." << std::endl; - return; - } - - int step = element_size, rows = size, cols = 1; - ocl::convertFromBuffer(sub_mem, step, rows, cols, - (use_half_) ? CV_16SC1 : CV_32FC1, sub_buffer); - - //decrease ocl mem refcount - clReleaseMemObject(sub_mem); -} - template<> bool OCL4DNNConvSpatial::convolve(const UMat &bottom, UMat &top, const UMat &weight, const UMat &bias, @@ -938,7 +907,7 @@ bool OCL4DNNConvSpatial::convolve(const UMat &bottom, UMat &top, kernel.set(argIdx++, (uint16_t)output_h_); if (!kernel.run_(3, config->global_work_size, config->local_work_size, false)) { - std::cout << "IDLF kernel run failed." << std::endl; + CV_LOG_ERROR(NULL, "DNN/OpenCL: IDLF kernel run failed"); return false; } } @@ -1012,7 +981,7 @@ bool OCL4DNNConvSpatial::convolve(const UMat &bottom, UMat &top, if (!kernel.run_(3, global_size, config->local_work_size, false)) { - std::cout << "GEMM like kernel run failed." << std::endl; + CV_LOG_ERROR(NULL, "DNN/OpenCL: GEMM like kernel run failed"); return false; } } @@ -1115,14 +1084,9 @@ float OCL4DNNConvSpatial::timedConvolve(const UMat &bottom, UMat &top, { queue = cv::ocl::Queue::getDefault(); } - catch (const cv::Exception&) + catch (const std::exception& e) { - static int warn_ = 0; - if (!warn_) - { - std::cout << "OpenCV(ocl4dnn): Can't get OpenCL default queue for auto-tuning." << std::endl; - warn_ = true; - } + CV_LOG_ONCE_ERROR(NULL, "OpenCV(ocl4dnn): Can't get OpenCL default queue for auto-tuning: " << e.what()); return 1e6; } @@ -1326,9 +1290,9 @@ ocl::Program OCL4DNNConvSpatial::compileKernel() phash.insert(std::pair(kernel_name_, program)); if (!program.ptr()) { - std::cout << "Failed to compile kernel: " << kernel_name_ - << ", buildflags: " << options - << ", errmsg: " << errmsg << std::endl; + CV_LOG_WARNING(NULL, "DNN/OpenCL: Failed to compile kernel: " << kernel_name_ + << ", buildflags: '" << options << "', errmsg: '" << errmsg << "'" + ); } return program; } @@ -1754,7 +1718,8 @@ void OCL4DNNConvSpatial::setupConvolution(const UMat &bottom, fastestTime = kernelQueue[x]->executionTime; } } - if (fastestKernel < 0) break; + if (fastestKernel < 0) + break; // Test fastest kernel bool verified = verifyResult(bottom, top, weight, bias, numImages, kernelQueue[fastestKernel], verifyTop); if (verified == true) { @@ -1913,17 +1878,18 @@ bool OCL4DNNConvSpatial::setupKernelByConfig(int x, int y, int z, int typ { if (z == 1) z = 16; - CHECK_EQ(z == 16 || z == 8, true) << "invalid SIMD size" << std::endl; + CV_Check(z, z == 16 || z == 8, "DNN/OpenCL: IDLF - invalid SIMD size"); } kernelQueue.clear(); createConvolutionKernel(type, x, y, z); - if (kernelQueue.size() != 1) { - std::cerr << "Failed setup kernel by config:" + if (kernelQueue.size() != 1) + { + CV_LOG_ERROR(NULL, "DNN/OpenCL: Failed setup kernel by config: " << " x = " << x << " y = " << y << " z = " << z << " type = " << type - << std::endl; + ); return false; } bestKernelConfig = kernelQueue[0]; @@ -1955,13 +1921,9 @@ bool OCL4DNNConvSpatial::loadTunedConfig() { if (cache_path_.empty()) { - static int warn_ = 0; - if (!warn_) - { - std::cout << "OpenCV(ocl4dnn): consider to specify kernel configuration cache directory " << std::endl - << " via OPENCV_OCL4DNN_CONFIG_PATH parameter." << std::endl; - warn_ = true; - } + CV_LOG_ONCE_WARNING(NULL, "OpenCV(ocl4dnn): consider to specify kernel configuration cache directory " + "through OPENCV_OCL4DNN_CONFIG_PATH parameter." + ); } return false; } diff --git a/modules/dnn/src/opencl/conv_layer_spatial.cl b/modules/dnn/src/opencl/conv_layer_spatial.cl index eb5d354020..c9ddacfb8e 100644 --- a/modules/dnn/src/opencl/conv_layer_spatial.cl +++ b/modules/dnn/src/opencl/conv_layer_spatial.cl @@ -161,23 +161,15 @@ __kernel void ConvolveBasic( const int out_idx = get_global_id(0); // 1D task layout: [output_width * output_height * OUTPUT_Z] const int plane_size = output_width * output_height; const int out_plane_idx = out_idx % plane_size; - const int outputZ = out_idx / plane_size; + const int outputZ = out_idx / plane_size; // kernelNum const int outputY = out_plane_idx / output_width; const int outputX = out_plane_idx % output_width; - const int kernelNum = outputZ * ZPAR; - if (kernelNum < OUTPUT_Z) + if (outputZ < OUTPUT_Z) { - Dtype sum[ZPAR]; - for (int kern = 0; kern < ZPAR; kern++) - { - sum[kern] = 0.0f; - } + Dtype sum = 0.0f; const int org_y = outputY * STRIDE_Y - pad_h; const int org_x = outputX * STRIDE_X - pad_w; - const int currentKernelOffset = kernel_offset + kernelNum*KERNEL_HEIGHT*KERNEL_WIDTH*CHANNELS; -#if APPLY_BIAS - const int biasIndex = bias_offset + kernelNum; -#endif + const int currentKernelOffset = kernel_offset + outputZ*KERNEL_HEIGHT*KERNEL_WIDTH*CHANNELS; const int local_image_offset = org_y * input_width + org_x; const int imageSize = input_width * input_height; __global Dtype* image_dataPtr = (image_data + (image_offset + local_image_offset)); @@ -186,17 +178,13 @@ __kernel void ConvolveBasic( { for (int y = 0; y < KERNEL_HEIGHT; y++) { + int y_ = org_y + y * DILATION_Y; for (int x = 0; x < KERNEL_WIDTH; x++) { - int y_ = org_y + y * DILATION_Y; int x_ = org_x + x * DILATION_X; - if (!(y_ >= 0 && y_ < input_height && x_ >= 0 && x_ < input_width)) - { - continue; - } - for (int kern = 0; kern < ZPAR; kern++) + if (y_ >= 0 && y_ < input_height && x_ >= 0 && x_ < input_width) { - sum[kern] += image_dataPtr[x * DILATION_X] * kernel_dataPtr[kern*KERNEL_HEIGHT*KERNEL_WIDTH*CHANNELS + x]; + sum = mad(image_dataPtr[x * DILATION_X], kernel_dataPtr[x], sum); } } image_dataPtr += input_width * DILATION_Y; @@ -205,18 +193,13 @@ __kernel void ConvolveBasic( image_dataPtr += imageSize - input_width*KERNEL_HEIGHT*DILATION_Y; } - for (int kern = 0; kern < ZPAR; kern++) - { - if (kernelNum + kern < OUTPUT_Z) - { - int offset = convolved_image_offset + (kernelNum+kern)*output_height*output_width + outputY*output_width + outputX; + int offset = convolved_image_offset + out_idx; #if APPLY_BIAS - ACTIVATION_FUNCTION(convolved_image, offset, sum[kern] + bias[biasIndex + kern], biasIndex + kern); + int biasIndex = bias_offset + outputZ; + ACTIVATION_FUNCTION(convolved_image, offset, sum + bias[biasIndex], biasIndex); #else - ACTIVATION_FUNCTION(convolved_image, offset, sum[kern], kernelNum + kern); + ACTIVATION_FUNCTION(convolved_image, offset, sum, outputZ); #endif - } - } } }