cleanup ocl4dnn spatial convolution kernels

remove unused macros and half definition macros,
also remove unused ocl::Queue

Signed-off-by: Li Peng <peng.li@intel.com>
pull/10143/head
Wu, Zhiwen 7 years ago committed by Li Peng
parent 55260a8d3c
commit 04edc8fe3a
  1. 8
      modules/dnn/src/ocl4dnn/src/math_functions.cpp
  2. 2
      modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
  3. 1
      modules/dnn/src/ocl4dnn/src/ocl4dnn_lrn.cpp
  4. 1
      modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp
  5. 1
      modules/dnn/src/ocl4dnn/src/ocl4dnn_softmax.cpp
  6. 8
      modules/dnn/src/opencl/conv_layer_spatial.cl

@ -65,8 +65,6 @@ ocl::Image2D ocl4dnnGEMMCopyBufferToImage(UMat buffer, int offset,
int padded_width, int height, int padded_width, int height,
int width, int ld) int width, int ld)
{ {
ocl::Context ctx = ocl::Context::getDefault();
ocl::Queue queue = ocl::Queue::getDefault();
ocl::Image2D image; ocl::Image2D image;
if (!is_matrix_a && transpose) if (!is_matrix_a && transpose)
@ -192,9 +190,6 @@ static bool ocl4dnnFastImageGEMM(const CBLAS_TRANSPOSE TransA,
// just padding one line is enough as the sub group block read // just padding one line is enough as the sub group block read
// will clamp to edge according to the spec. // will clamp to edge according to the spec.
ocl::Context ctx = ocl::Context::getDefault();
ocl::Queue queue = ocl::Queue::getDefault();
ocl::Image2D ImA; ocl::Image2D ImA;
ocl::Image2D ImB; ocl::Image2D ImB;
@ -446,7 +441,6 @@ bool ocl4dnnGEMV<float>(const CBLAS_TRANSPOSE TransA,
const int32_t offx, const float beta, UMat y, const int32_t offx, const float beta, UMat y,
const int32_t offy) const int32_t offy)
{ {
ocl::Queue queue = ocl::Queue::getDefault();
bool ret = false; bool ret = false;
if (TransA == CblasNoTrans) if (TransA == CblasNoTrans)
@ -507,8 +501,6 @@ bool ocl4dnnAXPY(const int32_t N, const Dtype alpha,
const UMat X, const int32_t offX, UMat Y, const UMat X, const int32_t offX, UMat Y,
const int32_t offY) const int32_t offY)
{ {
ocl::Context ctx = ocl::Context::getDefault();
ocl::Kernel oclk_axpy(CL_KERNEL_SELECT("axpy"), cv::ocl::dnn::math_oclsrc); ocl::Kernel oclk_axpy(CL_KERNEL_SELECT("axpy"), cv::ocl::dnn::math_oclsrc);
if (oclk_axpy.empty()) if (oclk_axpy.empty())
return false; return false;

@ -184,8 +184,6 @@ void OCL4DNNConvSpatial<Dtype>::collectCommonInformation()
addDef("as_Dtype2", "as_float2"); addDef("as_Dtype2", "as_float2");
addDef("as_Dtype4", "as_float4"); addDef("as_Dtype4", "as_float4");
addDef("as_Dtype8", "as_float8"); addDef("as_Dtype8", "as_float8");
addDef("Dtype_ID", (int)CV_32F);
addDef("Dtype_SIZE", (int)sizeof(Dtype));
} }
typedef enum { typedef enum {

@ -92,7 +92,6 @@ bool OCL4DNNLRN<Dtype>::Forward(const UMat& bottom, UMat& top)
template<typename Dtype> template<typename Dtype>
bool OCL4DNNLRN<Dtype>::crossChannelForward(const UMat& bottom, UMat& top) bool OCL4DNNLRN<Dtype>::crossChannelForward(const UMat& bottom, UMat& top)
{ {
ocl::Queue queue = ocl::Queue::getDefault();
CHECK_EQ(phase_test_, true) << "Only support forward inference."; CHECK_EQ(phase_test_, true) << "Only support forward inference.";
cl_uint argIdx = 0; cl_uint argIdx = 0;

@ -97,7 +97,6 @@ bool OCL4DNNPool<Dtype>::Forward(const UMat& bottom,
UMat& top_mask) UMat& top_mask)
{ {
bool ret = true; bool ret = true;
ocl::Queue queue = ocl::Queue::getDefault();
size_t global[] = { 128 * 128 }; size_t global[] = { 128 * 128 };
size_t local[] = { 128 }; size_t local[] = { 128 };
cl_uint argIdx = 0; cl_uint argIdx = 0;

@ -83,7 +83,6 @@ template<typename Dtype>
bool OCL4DNNSoftmax<Dtype>::Forward(const UMat& bottom, UMat& top) bool OCL4DNNSoftmax<Dtype>::Forward(const UMat& bottom, UMat& top)
{ {
bool ret = false; bool ret = false;
ocl::Queue queue = ocl::Queue::getDefault();
bool intel_subgroup = ocl::Device::getDefault().intelSubgroupsSupport(); bool intel_subgroup = ocl::Device::getDefault().intelSubgroupsSupport();
if (intel_subgroup && inner_num_ < 128) if (intel_subgroup && inner_num_ < 128)
{ {

@ -82,7 +82,6 @@
#define LOOP(N, VAR, STMT) CAT(LOOP, N)((VAR), (STMT)) #define LOOP(N, VAR, STMT) CAT(LOOP, N)((VAR), (STMT))
#if defined(convolve_simd) || defined(Conv_Interleaved) #if defined(convolve_simd) || defined(Conv_Interleaved)
#if Dtype_SIZE == 4
#define INT_TYPE uint #define INT_TYPE uint
#define INT_TYPE2 uint2 #define INT_TYPE2 uint2
#define INT_TYPE4 uint4 #define INT_TYPE4 uint4
@ -91,9 +90,6 @@
#define SUB_GROUP_BLOCK_READ4 intel_sub_group_block_read4 #define SUB_GROUP_BLOCK_READ4 intel_sub_group_block_read4
#define SUB_GROUP_BLOCK_READ8 intel_sub_group_block_read8 #define SUB_GROUP_BLOCK_READ8 intel_sub_group_block_read8
#define SUB_GROUP_BLOCK_READ intel_sub_group_block_read #define SUB_GROUP_BLOCK_READ intel_sub_group_block_read
#else
#error "Unsupported type"
#endif
#endif #endif
#ifdef KERNEL_BASIC #ifdef KERNEL_BASIC
@ -176,11 +172,7 @@ __kernel void ConvolveBasic(
#elif defined KERNEL_IDLF #elif defined KERNEL_IDLF
#if TYPE == TYPE_HALF
#define VLOAD4(_v, _p) do { (_v).s0 = *(_p); (_v).s1 = *(_p + 1); (_v).s2 = *(_p + 2); (_v).s3 = *(_p + 3); } while(0)
#else
#define VLOAD4(_v, _p) do { _v = vload4(0, _p); } while(0) #define VLOAD4(_v, _p) do { _v = vload4(0, _p); } while(0)
#endif
// Each work-item computes a OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT region of one output map. // Each work-item computes a OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT region of one output map.
// Each work-group (which will be mapped to 1 SIMD16/SIMD8 EU thread) will compute 16/8 different feature maps, but each feature map is for the same region of the imput image. // Each work-group (which will be mapped to 1 SIMD16/SIMD8 EU thread) will compute 16/8 different feature maps, but each feature map is for the same region of the imput image.

Loading…
Cancel
Save