cleanup ocl4dnn spatial convolution kernels

remove unused macros and half definition macros,
also remove unused ocl::Queue

Signed-off-by: Li Peng <peng.li@intel.com>
pull/10143/head
Wu, Zhiwen 7 years ago committed by Li Peng
parent 55260a8d3c
commit 04edc8fe3a
  1. 8
      modules/dnn/src/ocl4dnn/src/math_functions.cpp
  2. 2
      modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
  3. 1
      modules/dnn/src/ocl4dnn/src/ocl4dnn_lrn.cpp
  4. 1
      modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp
  5. 1
      modules/dnn/src/ocl4dnn/src/ocl4dnn_softmax.cpp
  6. 8
      modules/dnn/src/opencl/conv_layer_spatial.cl

@ -65,8 +65,6 @@ ocl::Image2D ocl4dnnGEMMCopyBufferToImage(UMat buffer, int offset,
int padded_width, int height,
int width, int ld)
{
ocl::Context ctx = ocl::Context::getDefault();
ocl::Queue queue = ocl::Queue::getDefault();
ocl::Image2D image;
if (!is_matrix_a && transpose)
@ -192,9 +190,6 @@ static bool ocl4dnnFastImageGEMM(const CBLAS_TRANSPOSE TransA,
// just padding one line is enough as the sub group block read
// will clamp to edge according to the spec.
ocl::Context ctx = ocl::Context::getDefault();
ocl::Queue queue = ocl::Queue::getDefault();
ocl::Image2D ImA;
ocl::Image2D ImB;
@ -446,7 +441,6 @@ bool ocl4dnnGEMV<float>(const CBLAS_TRANSPOSE TransA,
const int32_t offx, const float beta, UMat y,
const int32_t offy)
{
ocl::Queue queue = ocl::Queue::getDefault();
bool ret = false;
if (TransA == CblasNoTrans)
@ -507,8 +501,6 @@ bool ocl4dnnAXPY(const int32_t N, const Dtype alpha,
const UMat X, const int32_t offX, UMat Y,
const int32_t offY)
{
ocl::Context ctx = ocl::Context::getDefault();
ocl::Kernel oclk_axpy(CL_KERNEL_SELECT("axpy"), cv::ocl::dnn::math_oclsrc);
if (oclk_axpy.empty())
return false;

@ -184,8 +184,6 @@ void OCL4DNNConvSpatial<Dtype>::collectCommonInformation()
addDef("as_Dtype2", "as_float2");
addDef("as_Dtype4", "as_float4");
addDef("as_Dtype8", "as_float8");
addDef("Dtype_ID", (int)CV_32F);
addDef("Dtype_SIZE", (int)sizeof(Dtype));
}
typedef enum {

@ -92,7 +92,6 @@ bool OCL4DNNLRN<Dtype>::Forward(const UMat& bottom, UMat& top)
template<typename Dtype>
bool OCL4DNNLRN<Dtype>::crossChannelForward(const UMat& bottom, UMat& top)
{
ocl::Queue queue = ocl::Queue::getDefault();
CHECK_EQ(phase_test_, true) << "Only support forward inference.";
cl_uint argIdx = 0;

@ -97,7 +97,6 @@ bool OCL4DNNPool<Dtype>::Forward(const UMat& bottom,
UMat& top_mask)
{
bool ret = true;
ocl::Queue queue = ocl::Queue::getDefault();
size_t global[] = { 128 * 128 };
size_t local[] = { 128 };
cl_uint argIdx = 0;

@ -83,7 +83,6 @@ template<typename Dtype>
bool OCL4DNNSoftmax<Dtype>::Forward(const UMat& bottom, UMat& top)
{
bool ret = false;
ocl::Queue queue = ocl::Queue::getDefault();
bool intel_subgroup = ocl::Device::getDefault().intelSubgroupsSupport();
if (intel_subgroup && inner_num_ < 128)
{

@ -82,7 +82,6 @@
#define LOOP(N, VAR, STMT) CAT(LOOP, N)((VAR), (STMT))
#if defined(convolve_simd) || defined(Conv_Interleaved)
#if Dtype_SIZE == 4
#define INT_TYPE uint
#define INT_TYPE2 uint2
#define INT_TYPE4 uint4
@ -91,9 +90,6 @@
#define SUB_GROUP_BLOCK_READ4 intel_sub_group_block_read4
#define SUB_GROUP_BLOCK_READ8 intel_sub_group_block_read8
#define SUB_GROUP_BLOCK_READ intel_sub_group_block_read
#else
#error "Unsupported type"
#endif
#endif
#ifdef KERNEL_BASIC
@ -176,11 +172,7 @@ __kernel void ConvolveBasic(
#elif defined KERNEL_IDLF
#if TYPE == TYPE_HALF
#define VLOAD4(_v, _p) do { (_v).s0 = *(_p); (_v).s1 = *(_p + 1); (_v).s2 = *(_p + 2); (_v).s3 = *(_p + 3); } while(0)
#else
#define VLOAD4(_v, _p) do { _v = vload4(0, _p); } while(0)
#endif
// Each work-item computes a OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT region of one output map.
// Each work-group (which will be mapped to 1 SIMD16/SIMD8 EU thread) will compute 16/8 different feature maps, but each feature map is for the same region of the imput image.

Loading…
Cancel
Save