ReLU6 layer ocl support

include relu6 ocl kernel and layer fusion support

Signed-off-by: Li Peng <peng.li@intel.com>
pull/10891/head
Li Peng 7 years ago
parent 0e4eed0ba1
commit 2863f950d6
  1. 2
      modules/dnn/include/opencv2/dnn/all_layers.hpp
  2. 1
      modules/dnn/src/dnn.cpp
  3. 14
      modules/dnn/src/layers/convolution_layer.cpp
  4. 29
      modules/dnn/src/layers/elementwise_layers.cpp
  5. 6
      modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
  6. 22
      modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
  7. 11
      modules/dnn/src/opencl/activations.cl
  8. 21
      modules/dnn/src/opencl/conv_layer_spatial.cl

@ -406,6 +406,8 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
class CV_EXPORTS ReLU6Layer : public ActivationLayer class CV_EXPORTS ReLU6Layer : public ActivationLayer
{ {
public: public:
float minValue, maxValue;
static Ptr<ReLU6Layer> create(const LayerParams &params); static Ptr<ReLU6Layer> create(const LayerParams &params);
}; };

@ -1439,6 +1439,7 @@ struct Net::Impl
nextData && nextData &&
((nextData->type == "ReLU") || ((nextData->type == "ReLU") ||
(nextData->type == "ChannelsPReLU") || (nextData->type == "ChannelsPReLU") ||
(nextData->type == "ReLU6") ||
(nextData->type == "TanH") || (nextData->type == "TanH") ||
(nextData->type == "Power"))) ) (nextData->type == "Power"))) )
{ {

@ -860,6 +860,15 @@ public:
activType = OCL4DNN_CONV_FUSED_ACTIV_RELU; activType = OCL4DNN_CONV_FUSED_ACTIV_RELU;
} }
Ptr<ReLU6Layer> activ_relu6 = activ.dynamicCast<ReLU6Layer>();
if( !activ_relu6.empty() )
{
reluslope.resize(2);
reluslope[0] = activ_relu6->minValue;
reluslope[1] = activ_relu6->maxValue;
activType = OCL4DNN_CONV_FUSED_ACTIV_RELU6;
}
Ptr<ChannelsPReLULayer> activ_chprelu = activ.dynamicCast<ChannelsPReLULayer>(); Ptr<ChannelsPReLULayer> activ_chprelu = activ.dynamicCast<ChannelsPReLULayer>();
if( !activ_chprelu.empty() ) if( !activ_chprelu.empty() )
{ {
@ -906,12 +915,17 @@ public:
{ {
convolutionOp->setActivTanh(true); convolutionOp->setActivTanh(true);
} }
else if ( activType == OCL4DNN_CONV_FUSED_ACTIV_RELU6)
{
convolutionOp->setActivReLU6(true, reluslope[0], reluslope[1]);
}
else else
{ {
convolutionOp->setActivReLU(false, 0); convolutionOp->setActivReLU(false, 0);
convolutionOp->setActivPReLU(false, reluslope); convolutionOp->setActivPReLU(false, reluslope);
convolutionOp->setActivPower(false, 1.f); convolutionOp->setActivPower(false, 1.f);
convolutionOp->setActivTanh(false); convolutionOp->setActivTanh(false);
convolutionOp->setActivReLU6(false, 0, 0);
} }
newActiv = false; newActiv = false;
} }

@ -381,8 +381,30 @@ struct ReLU6Functor
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
// TODO: implement OCL version std::vector<UMat> inputs;
return false; std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
String buildopt = oclGetTMacro(inputs[0]);
for (size_t i = 0; i < inputs.size(); i++)
{
UMat& src = inputs[i];
UMat& dst = outputs[i];
ocl::Kernel kernel("ReLU6Forward", ocl::dnn::activations_oclsrc, buildopt);
kernel.set(0, (int)src.total());
kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
kernel.set(3, (float)minValue);
kernel.set(4, (float)maxValue);
size_t gSize = src.total();
CV_Assert(kernel.run(1, &gSize, NULL, false));
}
return true;
} }
#endif #endif
@ -867,6 +889,9 @@ Ptr<ReLU6Layer> ReLU6Layer::create(const LayerParams& params)
float maxValue = params.get<float>("max_value", 6.0f); float maxValue = params.get<float>("max_value", 6.0f);
Ptr<ReLU6Layer> l(new ElementWiseLayer<ReLU6Functor>(ReLU6Functor(minValue, maxValue))); Ptr<ReLU6Layer> l(new ElementWiseLayer<ReLU6Functor>(ReLU6Functor(minValue, maxValue)));
l->setParamsFrom(params); l->setParamsFrom(params);
l->minValue = minValue;
l->maxValue = maxValue;
return l; return l;
} }

@ -78,7 +78,8 @@ typedef enum {
OCL4DNN_CONV_FUSED_ACTIV_RELU = 1, OCL4DNN_CONV_FUSED_ACTIV_RELU = 1,
OCL4DNN_CONV_FUSED_ACTIV_PRELU = 2, OCL4DNN_CONV_FUSED_ACTIV_PRELU = 2,
OCL4DNN_CONV_FUSED_ACTIV_POWER = 3, OCL4DNN_CONV_FUSED_ACTIV_POWER = 3,
OCL4DNN_CONV_FUSED_ACTIV_TANH = 4 OCL4DNN_CONV_FUSED_ACTIV_TANH = 4,
OCL4DNN_CONV_FUSED_ACTIV_RELU6 = 5
} ocl4dnnFusedActiv_t; } ocl4dnnFusedActiv_t;
template<typename Dtype> template<typename Dtype>
@ -96,6 +97,7 @@ class OCL4DNNConvSpatial
void setActivPReLU(bool fuse_activ, std::vector<float> &slope); void setActivPReLU(bool fuse_activ, std::vector<float> &slope);
void setActivPower(bool fuse_activ, float power); void setActivPower(bool fuse_activ, float power);
void setActivTanh(bool fuse_activ); void setActivTanh(bool fuse_activ);
void setActivReLU6(bool fuse_activ, float min, float max);
void setBias(bool bias_term); void setBias(bool bias_term);
private: private:
@ -319,6 +321,8 @@ class OCL4DNNConvSpatial
cv::ocl::ProgramSource src_; cv::ocl::ProgramSource src_;
int32_t prev_kernel_type_; int32_t prev_kernel_type_;
float negative_slope_; float negative_slope_;
float min_value_;
float max_value_;
UMat negative_slope_umat_; UMat negative_slope_umat_;
ocl4dnnFusedActiv_t fused_activ_; ocl4dnnFusedActiv_t fused_activ_;
float power_; float power_;

@ -82,6 +82,8 @@ OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
fused_eltwise_ = false; fused_eltwise_ = false;
power_ = 1.f; power_ = 1.f;
negative_slope_ = 0; negative_slope_ = 0;
min_value_ = 0;
max_value_ = 0;
prev_kernel_type_ = -1; prev_kernel_type_ = -1;
tuned_ = false; tuned_ = false;
@ -162,6 +164,9 @@ void OCL4DNNConvSpatial<Dtype>::setFusionDefine(ocl4dnnFusedActiv_t fused_activ,
case OCL4DNN_CONV_FUSED_ACTIV_TANH: case OCL4DNN_CONV_FUSED_ACTIV_TANH:
addDef("FUSED_CONV_TANH", 1); addDef("FUSED_CONV_TANH", 1);
break; break;
case OCL4DNN_CONV_FUSED_ACTIV_RELU6:
addDef("FUSED_CONV_RELU6", 1);
break;
default: default:
; ;
} }
@ -184,6 +189,10 @@ void OCL4DNNConvSpatial<Dtype>::setFusionArg(ocl4dnnFusedActiv_t fused_activ, bo
case OCL4DNN_CONV_FUSED_ACTIV_POWER: case OCL4DNN_CONV_FUSED_ACTIV_POWER:
kernel.set(argIdx++, (float)power_); kernel.set(argIdx++, (float)power_);
break; break;
case OCL4DNN_CONV_FUSED_ACTIV_RELU6:
kernel.set(argIdx++, (float)min_value_);
kernel.set(argIdx++, (float)max_value_);
break;
default: default:
; ;
} }
@ -393,6 +402,19 @@ void OCL4DNNConvSpatial<Dtype>::setActivReLU(bool fuse_activ, float slope)
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE; fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE;
} }
template<typename Dtype>
void OCL4DNNConvSpatial<Dtype>::setActivReLU6(bool fuse_activ, float min, float max)
{
if ( fuse_activ )
{
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_RELU6;
min_value_ = min;
max_value_ = max;
}
else
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE;
}
template<typename Dtype> template<typename Dtype>
void OCL4DNNConvSpatial<Dtype>::setActivPReLU(bool fuse_activ, std::vector<float> &slope) void OCL4DNNConvSpatial<Dtype>::setActivPReLU(bool fuse_activ, std::vector<float> &slope)
{ {

@ -54,6 +54,17 @@ __kernel void ReLUForward(const int count, __global const T* in, __global T* out
#endif #endif
} }
__kernel void ReLU6Forward(const int count, __global const T* in, __global T* out,
const T minValue, const T maxValue)
{
int index = get_global_id(0);
if(index < count)
{
T x = in[index];
out[index] = clamp(x, minValue, maxValue);
}
}
__kernel void PReLUForward(const int count, const int channels, const int plane_size, __kernel void PReLUForward(const int count, const int channels, const int plane_size,
__global const T* in, __global T* out, __global const T* slope_data) __global const T* in, __global T* out, __global const T* slope_data)
{ {

@ -48,19 +48,22 @@
#if defined(FUSED_CONV_RELU) #if defined(FUSED_CONV_RELU)
#define ACTIVATION_RELU_FUNCTION(x, c) ((Dtype)(x) > 0 ? (Dtype)(x) : ((Dtype)(x) * (Dtype)(negative_slope))) #define ACTIVATION_RELU_FUNCTION(x, c) ((Dtype)(x) > 0 ? (Dtype)(x) : ((Dtype)(x) * (Dtype)(negative_slope)))
#define NEGATIVE_SLOPE_ARG Dtype negative_slope, #define FUSED_ARG Dtype negative_slope,
#elif defined(FUSED_CONV_PRELU) #elif defined(FUSED_CONV_PRELU)
#define ACTIVATION_RELU_FUNCTION(x, c) ((Dtype)(x) > 0 ? (Dtype)(x) : ((Dtype)(x) * (Dtype)(negative_slope[c]))) #define ACTIVATION_RELU_FUNCTION(x, c) ((Dtype)(x) > 0 ? (Dtype)(x) : ((Dtype)(x) * (Dtype)(negative_slope[c])))
#define NEGATIVE_SLOPE_ARG __global const Dtype *negative_slope, #define FUSED_ARG __global const Dtype *negative_slope,
#elif defined(FUSED_CONV_POWER) #elif defined(FUSED_CONV_POWER)
#define ACTIVATION_RELU_FUNCTION(x, c) pow(x, power) #define ACTIVATION_RELU_FUNCTION(x, c) pow(x, power)
#define NEGATIVE_SLOPE_ARG Dtype power, #define FUSED_ARG Dtype power,
#elif defined(FUSED_CONV_TANH) #elif defined(FUSED_CONV_TANH)
#define ACTIVATION_RELU_FUNCTION(x, c) tanh(x) #define ACTIVATION_RELU_FUNCTION(x, c) tanh(x)
#define NEGATIVE_SLOPE_ARG #define FUSED_ARG
#elif defined(FUSED_CONV_RELU6)
#define ACTIVATION_RELU_FUNCTION(x, c) (clamp((Dtype)(x), min_value, max_value))
#define FUSED_ARG Dtype min_value, Dtype max_value,
#else #else
#define ACTIVATION_RELU_FUNCTION(x, c) (x) #define ACTIVATION_RELU_FUNCTION(x, c) (x)
#define NEGATIVE_SLOPE_ARG #define FUSED_ARG
#endif #endif
#ifdef FUSED_CONV_ELTWISE #ifdef FUSED_CONV_ELTWISE
@ -108,7 +111,7 @@
__kernel void ConvolveBasic( __kernel void ConvolveBasic(
ELTWISE_DATA_ARG ELTWISE_DATA_ARG
NEGATIVE_SLOPE_ARG FUSED_ARG
__global Dtype* image_data, __global Dtype* image_data,
int image_offset, int image_offset,
__global Dtype* kernel_data, __global Dtype* kernel_data,
@ -197,7 +200,7 @@ __attribute__((intel_reqd_sub_group_size(SIMD_SIZE)))
__kernel void __kernel void
convolve_simd( convolve_simd(
ELTWISE_DATA_ARG ELTWISE_DATA_ARG
NEGATIVE_SLOPE_ARG FUSED_ARG
__global Dtype* inputs_base, __global Dtype* inputs_base,
filter_qualifier Dtype* weights_base, filter_qualifier Dtype* weights_base,
BIAS_KERNEL_ARG BIAS_KERNEL_ARG
@ -417,7 +420,7 @@ typedef struct float0 { float s0; } float0; //never used but makes compiler happ
#define GEMM_LIKE_KERNEL_ARGS \ #define GEMM_LIKE_KERNEL_ARGS \
ELTWISE_DATA_ARG \ ELTWISE_DATA_ARG \
NEGATIVE_SLOPE_ARG \ FUSED_ARG \
const __global Dtype *src0, \ const __global Dtype *src0, \
const __global Dtype *src1, \ const __global Dtype *src1, \
BIAS_KERNEL_ARG \ BIAS_KERNEL_ARG \
@ -1731,7 +1734,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
__kernel void DWCONV( __kernel void DWCONV(
ELTWISE_DATA_ARG ELTWISE_DATA_ARG
NEGATIVE_SLOPE_ARG FUSED_ARG
__global Dtype* image_data, __global Dtype* image_data,
__global Dtype* kernel_data, __global Dtype* kernel_data,
BIAS_KERNEL_ARG BIAS_KERNEL_ARG

Loading…
Cancel
Save