ocl: avoid rescheduling of async kernels

pull/18362/head
Alexander Alekhin 4 years ago
parent 3e3787ecb6
commit 4fa82809df
  1. 37
      modules/core/src/ocl.cpp
  2. 13
      modules/dnn/src/ocl4dnn/src/math_functions.cpp

@ -2755,7 +2755,7 @@ KernelArg KernelArg::Constant(const Mat& m)
struct Kernel::Impl
{
Impl(const char* kname, const Program& prog) :
refcount(1), handle(NULL), isInProgress(false), nu(0)
refcount(1), handle(NULL), isInProgress(false), isAsyncRun(false), nu(0)
{
cl_program ph = (cl_program)prog.ptr();
cl_int retval = 0;
@ -2832,6 +2832,7 @@ struct Kernel::Impl
enum { MAX_ARRS = 16 };
UMatData* u[MAX_ARRS];
bool isInProgress;
bool isAsyncRun; // true if kernel was scheduled in async mode
int nu;
std::list<Image2D> images;
bool haveTempDstUMats;
@ -3111,13 +3112,45 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
}
static bool isRaiseErrorOnReuseAsyncKernel()
{
static bool initialized = false;
static bool value = false;
if (!initialized)
{
value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_RAISE_ERROR_REUSE_ASYNC_KERNEL", false);
initialized = true;
}
return value;
}
bool Kernel::Impl::run(int dims, size_t globalsize[], size_t localsize[],
bool sync, int64* timeNS, const Queue& q)
{
CV_INSTRUMENT_REGION_OPENCL_RUN(name.c_str());
if (!handle || isInProgress)
if (!handle)
{
CV_LOG_ERROR(NULL, "OpenCL kernel has zero handle: " << name);
return false;
}
if (isAsyncRun)
{
CV_LOG_ERROR(NULL, "OpenCL kernel can't be reused in async mode: " << name);
if (isRaiseErrorOnReuseAsyncKernel())
CV_Assert(0);
return false; // OpenCV 5.0: raise error
}
isAsyncRun = !sync;
if (isInProgress)
{
CV_LOG_ERROR(NULL, "Previous OpenCL kernel launch is not finished: " << name);
if (isRaiseErrorOnReuseAsyncKernel())
CV_Assert(0);
return false; // OpenCV 5.0: raise error
}
cl_command_queue qq = getQueue(q);
if (haveTempDstUMats)

@ -46,6 +46,8 @@
#include <vector>
#include "opencl_kernels_dnn.hpp"
#include "opencv2/core/utils/logger.hpp"
namespace cv { namespace dnn { namespace ocl4dnn {
enum gemm_data_type_t
@ -238,10 +240,6 @@ static bool ocl4dnnFastImageGEMM(const CBLAS_TRANSPOSE TransA,
kernel_name += "_float";
}
ocl::Kernel oclk_gemm_float(kernel_name.c_str(), ocl::dnn::gemm_image_oclsrc, opts);
if (oclk_gemm_float.empty())
return false;
while (C_start_y < M)
{
blockC_width = std::min(static_cast<int>(N) - C_start_x, blocksize);
@ -348,6 +346,10 @@ static bool ocl4dnnFastImageGEMM(const CBLAS_TRANSPOSE TransA,
}
local[1] = 1;
ocl::Kernel oclk_gemm_float(kernel_name.c_str(), ocl::dnn::gemm_image_oclsrc, opts);
if (oclk_gemm_float.empty())
return false;
cl_uint arg_idx = 0;
if (is_image_a)
oclk_gemm_float.set(arg_idx++, ocl::KernelArg::PtrReadOnly(A));
@ -378,7 +380,10 @@ static bool ocl4dnnFastImageGEMM(const CBLAS_TRANSPOSE TransA,
oclk_gemm_float.set(arg_idx++, isFirstColBlock);
if (!oclk_gemm_float.run(2, global, local, false))
{
CV_LOG_WARNING(NULL, "OpenCL kernel enqueue failed: " << kernel_name);
return false;
}
if (TransA == CblasNoTrans)
A_start_x += blockA_width;

Loading…
Cancel
Save