From e15928b49e62e306450eb14f495aea0f7118a608 Mon Sep 17 00:00:00 2001 From: Li Peng Date: Tue, 23 Jan 2018 20:44:28 +0800 Subject: [PATCH 1/2] convolution and tanh layer fusion Signed-off-by: Li Peng --- modules/dnn/src/dnn.cpp | 9 +++++---- modules/dnn/src/layers/convolution_layer.cpp | 10 ++++++++++ modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp | 4 +++- .../dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp | 14 ++++++++++++++ modules/dnn/src/opencl/conv_layer_spatial.cl | 3 +++ 5 files changed, 35 insertions(+), 5 deletions(-) diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index e1e2e40bca..84dc8af1e6 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -1239,13 +1239,14 @@ struct Net::Impl } } - // For now, OpenCL target only support fusion with activation of ReLU/ChannelsPReLU/Power + // For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh if ( preferableTarget != DNN_TARGET_OPENCL || (preferableTarget == DNN_TARGET_OPENCL && nextData && - (!nextData->type.compare("ReLU") || - !nextData->type.compare("ChannelsPReLU") || - !nextData->type.compare("Power"))) ) + ((nextData->type == "ReLU") || + (nextData->type == "ChannelsPReLU") || + (nextData->type == "TanH") || + (nextData->type == "Power"))) ) { Ptr nextActivLayer; diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index e2ae78cf83..4635c8f259 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -246,6 +246,11 @@ public: power = activ_power->power; activType = OCL4DNN_CONV_FUSED_ACTIV_POWER; } + Ptr activ_tanh = activ.dynamicCast(); + if (!activ_tanh.empty()) + { + activType = OCL4DNN_CONV_FUSED_ACTIV_TANH; + } } #endif return !activ.empty(); @@ -877,11 +882,16 @@ public: { convolutionOp->setActivPower(true, power); } + else if ( activType == OCL4DNN_CONV_FUSED_ACTIV_TANH) + { + convolutionOp->setActivTanh(true); + } else { convolutionOp->setActivReLU(false, 0); convolutionOp->setActivPReLU(false, reluslope); convolutionOp->setActivPower(false, 1.f); + convolutionOp->setActivTanh(false); } newActiv = false; } diff --git a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp index 59eed46e33..b536ce40e6 100644 --- a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp +++ b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp @@ -77,7 +77,8 @@ typedef enum { OCL4DNN_CONV_FUSED_ACTIV_NONE = 0, OCL4DNN_CONV_FUSED_ACTIV_RELU = 1, OCL4DNN_CONV_FUSED_ACTIV_PRELU = 2, - OCL4DNN_CONV_FUSED_ACTIV_POWER = 3 + OCL4DNN_CONV_FUSED_ACTIV_POWER = 3, + OCL4DNN_CONV_FUSED_ACTIV_TANH = 4 } ocl4dnnFusedActiv_t; template @@ -94,6 +95,7 @@ class OCL4DNNConvSpatial void setActivReLU(bool fuse_activ, float slope); void setActivPReLU(bool fuse_activ, std::vector &slope); void setActivPower(bool fuse_activ, float power); + void setActivTanh(bool fuse_activ); void setBias(bool bias_term); private: diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp index 1a05056956..16bea92ca3 100644 --- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp +++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp @@ -159,6 +159,9 @@ void OCL4DNNConvSpatial::setFusionDefine(ocl4dnnFusedActiv_t fused_activ, case OCL4DNN_CONV_FUSED_ACTIV_POWER: addDef("FUSED_CONV_POWER", 1); break; + case OCL4DNN_CONV_FUSED_ACTIV_TANH: + addDef("FUSED_CONV_TANH", 1); + break; default: ; } @@ -415,6 +418,17 @@ void OCL4DNNConvSpatial::setActivPower(bool fuse_activ, float power) fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE; } +template +void OCL4DNNConvSpatial::setActivTanh(bool fuse_activ) +{ + if ( fuse_activ ) + { + fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_TANH; + } + else + fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE; +} + template bool OCL4DNNConvSpatial::Forward(const UMat& bottom, const UMat& bottom2, diff --git a/modules/dnn/src/opencl/conv_layer_spatial.cl b/modules/dnn/src/opencl/conv_layer_spatial.cl index 7d66ed1814..3369c6c971 100644 --- a/modules/dnn/src/opencl/conv_layer_spatial.cl +++ b/modules/dnn/src/opencl/conv_layer_spatial.cl @@ -55,6 +55,9 @@ #elif defined(FUSED_CONV_POWER) #define ACTIVATION_RELU_FUNCTION(x, c) pow(x, power) #define NEGATIVE_SLOPE_ARG Dtype power, +#elif defined(FUSED_CONV_TANH) +#define ACTIVATION_RELU_FUNCTION(x, c) tanh(x) +#define NEGATIVE_SLOPE_ARG #else #define ACTIVATION_RELU_FUNCTION(x, c) (x) #define NEGATIVE_SLOPE_ARG From 249308393566884ca6c22bbba9b7590c5fceb993 Mon Sep 17 00:00:00 2001 From: Li Peng Date: Tue, 23 Jan 2018 23:52:41 +0800 Subject: [PATCH 2/2] mvn, batch_norm and relu layer fusion Signed-off-by: Li Peng --- modules/dnn/src/dnn.cpp | 3 +- modules/dnn/src/layers/batch_norm_layer.cpp | 9 ++-- modules/dnn/src/layers/mvn_layer.cpp | 58 +++++++++++++++++---- modules/dnn/src/opencl/mvn.cl | 18 +++++++ 4 files changed, 75 insertions(+), 13 deletions(-) diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 84dc8af1e6..26ff469e18 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -1190,7 +1190,8 @@ struct Net::Impl // TODO: OpenCL target support more fusion styles. if ( preferableTarget == DNN_TARGET_OPENCL && - (!cv::ocl::useOpenCL() || ld.layerInstance->type.compare("Convolution")) ) + (!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" && + ld.layerInstance->type != "MVN")) ) continue; Ptr& currLayer = ld.layerInstance; diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp index eca30f4570..8acf8b2477 100644 --- a/modules/dnn/src/layers/batch_norm_layer.cpp +++ b/modules/dnn/src/layers/batch_norm_layer.cpp @@ -81,9 +81,6 @@ public: dstWeightsData[i] = w; dstBiasData[i] = (hasBias ? biasData[i] : 0.0f) - w * meanData[i] * varMeanScale; } - - umat_weight = weights_.getUMat(ACCESS_READ); - umat_bias = bias_.getUMat(ACCESS_READ); } void getScaleShift(Mat& scale, Mat& shift) const @@ -119,6 +116,12 @@ public: CV_Assert(blobs.size() >= 2); CV_Assert(inputs.size() == 1); + if (umat_weight.empty()) + { + umat_weight = weights_.getUMat(ACCESS_READ); + umat_bias = bias_.getUMat(ACCESS_READ); + } + UMat &inpBlob = inputs[0]; CV_Assert(inpBlob.dims == 2 || inpBlob.dims == 4); int groups = inpBlob.size[0]; diff --git a/modules/dnn/src/layers/mvn_layer.cpp b/modules/dnn/src/layers/mvn_layer.cpp index d5daa768b7..1d5e12b2b6 100644 --- a/modules/dnn/src/layers/mvn_layer.cpp +++ b/modules/dnn/src/layers/mvn_layer.cpp @@ -60,6 +60,36 @@ public: normVariance = params.get("normalize_variance", true); acrossChannels = params.get("across_channels", false); eps = params.get("eps", 1e-9); + fuse_batch_norm = false; + fuse_relu = false; + relu_slope = 0.f; + } + + Ptr bnorm; + Mat scale, shift; + UMat bnorm_weight, bnorm_bias; + bool fuse_batch_norm; + + bool setBatchNorm(const Ptr& layer ) + { + bnorm = layer; + fuse_batch_norm = !bnorm.empty() && (preferableTarget == DNN_TARGET_OPENCL); + return fuse_batch_norm; + } + + Ptr activ_relu; + float relu_slope; + bool fuse_relu; + bool setActivation(const Ptr& layer) + { + if (!layer.empty() && preferableTarget == DNN_TARGET_OPENCL) + { + activ_relu = layer.dynamicCast(); + if( !activ_relu.empty() ) + relu_slope = activ_relu->negativeSlope; + } + fuse_relu = !activ_relu.empty(); + return fuse_relu; } #ifdef HAVE_OPENCL @@ -71,19 +101,24 @@ public: inputs_.getUMatVector(inputs); outputs_.getUMatVector(outputs); + if( fuse_batch_norm && scale.empty()) + { + bnorm->getScaleShift(scale, shift); + bnorm_weight = scale.getUMat(ACCESS_READ); + bnorm_bias = shift.getUMat(ACCESS_READ); + } + for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++) { - UMat &inpBlob = inputs[inpIdx]; - UMat &outBlob = outputs[inpIdx]; + UMat &inpMat = inputs[inpIdx]; + UMat &outMat = outputs[inpIdx]; int splitDim = (acrossChannels) ? 1 : 2; int i, newRows = 1; for( i = 0; i < splitDim; i++ ) - newRows *= inpBlob.size[i]; + newRows *= inpMat.size[i]; - MatShape s = shape(newRows, inpBlob.total() / newRows); - UMat& inpMat = inpBlob; - UMat& outMat = outBlob; + MatShape s = shape(newRows, inpMat.total() / newRows); UMat oneMat = UMat::ones(s[1], 1, CV_32F); UMat meanMat = UMat(s[0], 1, CV_32F); UMat devMat = UMat(s[0], 1, CV_32F); @@ -121,8 +156,9 @@ public: } String kname = format("mvn%d", number); - if (normVariance) - buildopt += "-DNORM_VARIANCE"; + buildopt += format("%s %s %s ", (normVariance) ? "-DNORM_VARIANCE" : "", + (fuse_batch_norm) ? "-DFUSE_BATCH_NORM" : "", + (fuse_relu) ? "-DFUSE_RELU" : ""); ocl::Kernel kernel1(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt); if (kernel1.empty()) return false; @@ -132,7 +168,11 @@ public: kernel1.set(3, (float)eps); kernel1.set(4, ocl::KernelArg::PtrReadOnly(meanMat)); kernel1.set(5, ocl::KernelArg::PtrReadOnly(devMat)); - kernel1.set(6, ocl::KernelArg::PtrWriteOnly(outMat)); + kernel1.set(6, ocl::KernelArg::PtrReadOnly(bnorm_weight)); + kernel1.set(7, ocl::KernelArg::PtrReadOnly(bnorm_bias)); + kernel1.set(8, (int)inpMat.size[1]); + kernel1.set(9, (float)relu_slope); + kernel1.set(10, ocl::KernelArg::PtrWriteOnly(outMat)); ret = kernel1.run(2, global, NULL, false); if (!ret) return false; diff --git a/modules/dnn/src/opencl/mvn.cl b/modules/dnn/src/opencl/mvn.cl index c1bf1f0c8c..cc059eeb1a 100644 --- a/modules/dnn/src/opencl/mvn.cl +++ b/modules/dnn/src/opencl/mvn.cl @@ -89,6 +89,10 @@ __kernel void MVN(__global const Dtype* src, const Dtype eps, __global const Dtype* mean, __global const Dtype* dev, + __global const Dtype* bnorm_weight, + __global const Dtype* bnorm_bias, + const int channels, + const float relu_slope, __global Dtype* dst) { int x = get_global_id(0); @@ -106,7 +110,21 @@ __kernel void MVN(__global const Dtype* src, #else alpha = 1; #endif + + Dtype w = 1.f, b = 0.f; +#ifdef FUSE_BATCH_NORM + w = bnorm_weight[x % channels]; + b = bnorm_bias[x % channels]; +#endif + vec_type src_vec = load(src, index) - (vec_type)mean_val; vec_type dst_vec = src_vec * alpha; + dst_vec = dst_vec * w + (vec_type)b; + +#ifdef FUSE_RELU + vec_type new_val = dst_vec * relu_slope; + dst_vec = select(new_val, dst_vec, dst_vec > (vec_type)0.f); +#endif + store(dst_vec, dst, index); }