batch_norm layer ocl update

use a batch_norm ocl kernel to do the work Signed-off-by: Li Peng <peng.li@intel.com>
7 years ago · 4189214d04
parent be1207e5e4
commit 4189214d04
2 changed files with 99 additions and 41 deletions
--- a/modules/dnn/src/layers/batch_norm_layer.cpp
+++ b/modules/dnn/src/layers/batch_norm_layer.cpp
@ -12,6 +12,7 @@ Implementation of Batch Normalization layer.
 #include "../precomp.hpp"
 #include "op_halide.hpp"
 #include <opencv2/dnn/shape_utils.hpp>
 #include "opencl_kernels_dnn.hpp"
 namespace cv
 {
@ -22,7 +23,7 @@ class BatchNormLayerImpl : public BatchNormLayer
 {
 public:
    Mat weights_, bias_;
-    Mat weightMat, biasMat;
+    UMat umat_weight, umat_bias;
    BatchNormLayerImpl(const LayerParams& params)
    {
@ -80,6 +81,9 @@ public:
            dstWeightsData[i] = w;
            dstBiasData[i] = (hasBias ? biasData[i] : 0.0f) - w * meanData[i] * varMeanScale;
        }
        umat_weight = weights_.getUMat(ACCESS_READ);
        umat_bias = bias_.getUMat(ACCESS_READ);
    }
    void getScaleShift(Mat& scale, Mat& shift) const
@ -97,25 +101,6 @@ public:
        return true;
    }
    void finalize(const std::vector<Mat*> &inputs, std::vector<Mat> &outputs)
    {
        if (inputs[0]->dims == 4)
        {
            int groups = inputs[0]->size[0];
            int channels = inputs[0]->size[1];
            int rows = inputs[0]->size[2];
            int cols = inputs[0]->size[3];
            MatShape s = shape(groups * channels, rows * cols);
            weightMat = Mat(s[0], s[1], CV_32FC1);
            biasMat = Mat(s[0], s[1], CV_32FC1);
            for (int n = 0; n < s[0]; n++)
            {
                weightMat.row(n).setTo(weights_.at<float>(n % channels));
                biasMat.row(n).setTo(bias_.at<float>(n % channels));
            }
        }
    }
    virtual bool supportBackend(int backendId)
    {
        return backendId == DNN_BACKEND_DEFAULT ||
@ -155,8 +140,23 @@ public:
                MatShape s = shape(groups * channels, rows * cols);
                UMat src = inputs[ii].reshape(1, s.size(), &s[0]);
                UMat dst = outputs[ii].reshape(1, s.size(), &s[0]);
-                multiply(src, weightMat, dst);
+                int number = (s[1] % 8 == 0) ? 8 : ((s[1] % 4 == 0) ? 4 : 1);
-                add(dst, biasMat, dst);
+                String buildopt = format("-DNUM=%d ", number);
                String kname = format("batch_norm%d", number);
                ocl::Kernel kernel(kname.c_str(), ocl::dnn::batchnorm_oclsrc, buildopt);
                if (kernel.empty())
                    return false;
                size_t global[] = { (size_t)s[0], (size_t)(s[1] / number) };
                kernel.set(0, ocl::KernelArg::PtrReadOnly(src));
                kernel.set(1, (int)s[0]);
                kernel.set(2, (int)s[1]);
                kernel.set(3, (int)channels);
                kernel.set(4, ocl::KernelArg::PtrReadOnly(umat_weight));
                kernel.set(5, ocl::KernelArg::PtrReadOnly(umat_bias));
                kernel.set(6, ocl::KernelArg::PtrWriteOnly(dst));
                bool ret = kernel.run(2, global, NULL, false);
                if (!ret)
                    return false;
            }
        }
        return true;
--- a/modules/dnn/src/opencl/batchnorm.cl
+++ b/modules/dnn/src/opencl/batchnorm.cl
@ -1,26 +1,84 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2017, Intel Corporation, all rights reserved.
 // Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-__kernel void batchnorm(__global const T *src, int src_offset,
+#define Dtype float
-                        __global const float *meanMat,
+#define Dtype4 float4
-                        float varMeanScale,
+#define Dtype8 float8
-                        __global const float *invStdMat,
+
-                        __global const float *weight,
+#if NUM == 8
-                        __global const float *bias,
+    #define load(src, index) vload8(0, src + index)
-                        int hasWeight, int hasBias,
+    #define store(vec, dst, index) vstore8(vec, 0, dst + index)
-                        int width, int height, int channel,
+    #define vec_type Dtype8
-                        __global T *dst, int dst_offset)
+    #define BATCH_NORM batch_norm8
 #elif NUM == 4
    #define load(src, index) vload4(0, src + index)
    #define store(vec, dst, index) vstore4(vec, 0, dst + index)
    #define vec_type Dtype4
    #define BATCH_NORM batch_norm4
 #elif NUM == 1
    #define load(src, index) src[index]
    #define store(vec, dst, index) dst[index] = vec
    #define vec_type Dtype
    #define BATCH_NORM batch_norm1
 #endif
 __kernel void BATCH_NORM(__global const Dtype* src,
                         const int rows,
                         const int cols,
                         const int channels,
                         __global const Dtype* weight,
                         __global const Dtype* bias,
                         __global Dtype* dst)
 {
    int x = get_global_id(0);
-    int y = get_global_id(1);
+    int y = get_global_id(1) * NUM;
-    int c = get_global_id(2);
+    int index = x * cols + y;
-    if (x >= width || y >= height || c >= channel)
+    if (x >= rows || y >= cols)
        return;
-    float mean = meanMat[c] * varMeanScale;
+    Dtype w = weight[x % channels];
-    float invstd = invStdMat[c];
+    Dtype b = bias[x % channels];
-    float w = hasWeight ? weight[c] : 1;
+    vec_type src_vec = load(src, index);
-    float b = hasBias ? bias[c] : 0;
+    vec_type dst_vec = src_vec * w + (vec_type)b;
-    int index = y * width + x + c * width * height;
+    store(dst_vec, dst, index);
    T val = (src[index + src_offset] - mean) * w * invstd + b;
    dst[index + dst_offset] = val;
 }