ReLU6 layer ocl support

include relu6 ocl kernel and layer fusion support

Signed-off-by: Li Peng <peng.li@intel.com>
pull/10891/head
Li Peng 7 years ago
parent 0e4eed0ba1
commit 2863f950d6
  1. 2
      modules/dnn/include/opencv2/dnn/all_layers.hpp
  2. 1
      modules/dnn/src/dnn.cpp
  3. 14
      modules/dnn/src/layers/convolution_layer.cpp
  4. 29
      modules/dnn/src/layers/elementwise_layers.cpp
  5. 6
      modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
  6. 22
      modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
  7. 11
      modules/dnn/src/opencl/activations.cl
  8. 21
      modules/dnn/src/opencl/conv_layer_spatial.cl

@ -406,6 +406,8 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
class CV_EXPORTS ReLU6Layer : public ActivationLayer
{
public:
float minValue, maxValue;
static Ptr<ReLU6Layer> create(const LayerParams &params);
};

@ -1439,6 +1439,7 @@ struct Net::Impl
nextData &&
((nextData->type == "ReLU") ||
(nextData->type == "ChannelsPReLU") ||
(nextData->type == "ReLU6") ||
(nextData->type == "TanH") ||
(nextData->type == "Power"))) )
{

@ -860,6 +860,15 @@ public:
activType = OCL4DNN_CONV_FUSED_ACTIV_RELU;
}
Ptr<ReLU6Layer> activ_relu6 = activ.dynamicCast<ReLU6Layer>();
if( !activ_relu6.empty() )
{
reluslope.resize(2);
reluslope[0] = activ_relu6->minValue;
reluslope[1] = activ_relu6->maxValue;
activType = OCL4DNN_CONV_FUSED_ACTIV_RELU6;
}
Ptr<ChannelsPReLULayer> activ_chprelu = activ.dynamicCast<ChannelsPReLULayer>();
if( !activ_chprelu.empty() )
{
@ -906,12 +915,17 @@ public:
{
convolutionOp->setActivTanh(true);
}
else if ( activType == OCL4DNN_CONV_FUSED_ACTIV_RELU6)
{
convolutionOp->setActivReLU6(true, reluslope[0], reluslope[1]);
}
else
{
convolutionOp->setActivReLU(false, 0);
convolutionOp->setActivPReLU(false, reluslope);
convolutionOp->setActivPower(false, 1.f);
convolutionOp->setActivTanh(false);
convolutionOp->setActivReLU6(false, 0, 0);
}
newActiv = false;
}

@ -381,8 +381,30 @@ struct ReLU6Functor
#ifdef HAVE_OPENCL
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
// TODO: implement OCL version
return false;
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
String buildopt = oclGetTMacro(inputs[0]);
for (size_t i = 0; i < inputs.size(); i++)
{
UMat& src = inputs[i];
UMat& dst = outputs[i];
ocl::Kernel kernel("ReLU6Forward", ocl::dnn::activations_oclsrc, buildopt);
kernel.set(0, (int)src.total());
kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
kernel.set(3, (float)minValue);
kernel.set(4, (float)maxValue);
size_t gSize = src.total();
CV_Assert(kernel.run(1, &gSize, NULL, false));
}
return true;
}
#endif
@ -867,6 +889,9 @@ Ptr<ReLU6Layer> ReLU6Layer::create(const LayerParams& params)
float maxValue = params.get<float>("max_value", 6.0f);
Ptr<ReLU6Layer> l(new ElementWiseLayer<ReLU6Functor>(ReLU6Functor(minValue, maxValue)));
l->setParamsFrom(params);
l->minValue = minValue;
l->maxValue = maxValue;
return l;
}

@ -78,7 +78,8 @@ typedef enum {
OCL4DNN_CONV_FUSED_ACTIV_RELU = 1,
OCL4DNN_CONV_FUSED_ACTIV_PRELU = 2,
OCL4DNN_CONV_FUSED_ACTIV_POWER = 3,
OCL4DNN_CONV_FUSED_ACTIV_TANH = 4
OCL4DNN_CONV_FUSED_ACTIV_TANH = 4,
OCL4DNN_CONV_FUSED_ACTIV_RELU6 = 5
} ocl4dnnFusedActiv_t;
template<typename Dtype>
@ -96,6 +97,7 @@ class OCL4DNNConvSpatial
void setActivPReLU(bool fuse_activ, std::vector<float> &slope);
void setActivPower(bool fuse_activ, float power);
void setActivTanh(bool fuse_activ);
void setActivReLU6(bool fuse_activ, float min, float max);
void setBias(bool bias_term);
private:
@ -319,6 +321,8 @@ class OCL4DNNConvSpatial
cv::ocl::ProgramSource src_;
int32_t prev_kernel_type_;
float negative_slope_;
float min_value_;
float max_value_;
UMat negative_slope_umat_;
ocl4dnnFusedActiv_t fused_activ_;
float power_;

@ -82,6 +82,8 @@ OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
fused_eltwise_ = false;
power_ = 1.f;
negative_slope_ = 0;
min_value_ = 0;
max_value_ = 0;
prev_kernel_type_ = -1;
tuned_ = false;
@ -162,6 +164,9 @@ void OCL4DNNConvSpatial<Dtype>::setFusionDefine(ocl4dnnFusedActiv_t fused_activ,
case OCL4DNN_CONV_FUSED_ACTIV_TANH:
addDef("FUSED_CONV_TANH", 1);
break;
case OCL4DNN_CONV_FUSED_ACTIV_RELU6:
addDef("FUSED_CONV_RELU6", 1);
break;
default:
;
}
@ -184,6 +189,10 @@ void OCL4DNNConvSpatial<Dtype>::setFusionArg(ocl4dnnFusedActiv_t fused_activ, bo
case OCL4DNN_CONV_FUSED_ACTIV_POWER:
kernel.set(argIdx++, (float)power_);
break;
case OCL4DNN_CONV_FUSED_ACTIV_RELU6:
kernel.set(argIdx++, (float)min_value_);
kernel.set(argIdx++, (float)max_value_);
break;
default:
;
}
@ -393,6 +402,19 @@ void OCL4DNNConvSpatial<Dtype>::setActivReLU(bool fuse_activ, float slope)
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE;
}
template<typename Dtype>
void OCL4DNNConvSpatial<Dtype>::setActivReLU6(bool fuse_activ, float min, float max)
{
if ( fuse_activ )
{
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_RELU6;
min_value_ = min;
max_value_ = max;
}
else
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE;
}
template<typename Dtype>
void OCL4DNNConvSpatial<Dtype>::setActivPReLU(bool fuse_activ, std::vector<float> &slope)
{

@ -54,6 +54,17 @@ __kernel void ReLUForward(const int count, __global const T* in, __global T* out
#endif
}
__kernel void ReLU6Forward(const int count, __global const T* in, __global T* out,
const T minValue, const T maxValue)
{
int index = get_global_id(0);
if(index < count)
{
T x = in[index];
out[index] = clamp(x, minValue, maxValue);
}
}
__kernel void PReLUForward(const int count, const int channels, const int plane_size,
__global const T* in, __global T* out, __global const T* slope_data)
{

@ -48,19 +48,22 @@
#if defined(FUSED_CONV_RELU)
#define ACTIVATION_RELU_FUNCTION(x, c) ((Dtype)(x) > 0 ? (Dtype)(x) : ((Dtype)(x) * (Dtype)(negative_slope)))
#define NEGATIVE_SLOPE_ARG Dtype negative_slope,
#define FUSED_ARG Dtype negative_slope,
#elif defined(FUSED_CONV_PRELU)
#define ACTIVATION_RELU_FUNCTION(x, c) ((Dtype)(x) > 0 ? (Dtype)(x) : ((Dtype)(x) * (Dtype)(negative_slope[c])))
#define NEGATIVE_SLOPE_ARG __global const Dtype *negative_slope,
#define FUSED_ARG __global const Dtype *negative_slope,
#elif defined(FUSED_CONV_POWER)
#define ACTIVATION_RELU_FUNCTION(x, c) pow(x, power)
#define NEGATIVE_SLOPE_ARG Dtype power,
#define FUSED_ARG Dtype power,
#elif defined(FUSED_CONV_TANH)
#define ACTIVATION_RELU_FUNCTION(x, c) tanh(x)
#define NEGATIVE_SLOPE_ARG
#define FUSED_ARG
#elif defined(FUSED_CONV_RELU6)
#define ACTIVATION_RELU_FUNCTION(x, c) (clamp((Dtype)(x), min_value, max_value))
#define FUSED_ARG Dtype min_value, Dtype max_value,
#else
#define ACTIVATION_RELU_FUNCTION(x, c) (x)
#define NEGATIVE_SLOPE_ARG
#define FUSED_ARG
#endif
#ifdef FUSED_CONV_ELTWISE
@ -108,7 +111,7 @@
__kernel void ConvolveBasic(
ELTWISE_DATA_ARG
NEGATIVE_SLOPE_ARG
FUSED_ARG
__global Dtype* image_data,
int image_offset,
__global Dtype* kernel_data,
@ -197,7 +200,7 @@ __attribute__((intel_reqd_sub_group_size(SIMD_SIZE)))
__kernel void
convolve_simd(
ELTWISE_DATA_ARG
NEGATIVE_SLOPE_ARG
FUSED_ARG
__global Dtype* inputs_base,
filter_qualifier Dtype* weights_base,
BIAS_KERNEL_ARG
@ -417,7 +420,7 @@ typedef struct float0 { float s0; } float0; //never used but makes compiler happ
#define GEMM_LIKE_KERNEL_ARGS \
ELTWISE_DATA_ARG \
NEGATIVE_SLOPE_ARG \
FUSED_ARG \
const __global Dtype *src0, \
const __global Dtype *src1, \
BIAS_KERNEL_ARG \
@ -1731,7 +1734,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
__kernel void DWCONV(
ELTWISE_DATA_ARG
NEGATIVE_SLOPE_ARG
FUSED_ARG
__global Dtype* image_data,
__global Dtype* kernel_data,
BIAS_KERNEL_ARG

Loading…
Cancel
Save