diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp index 5284e4d4a7..eca30f4570 100644 --- a/modules/dnn/src/layers/batch_norm_layer.cpp +++ b/modules/dnn/src/layers/batch_norm_layer.cpp @@ -12,6 +12,7 @@ Implementation of Batch Normalization layer. #include "../precomp.hpp" #include "op_halide.hpp" #include +#include "opencl_kernels_dnn.hpp" namespace cv { @@ -22,7 +23,7 @@ class BatchNormLayerImpl : public BatchNormLayer { public: Mat weights_, bias_; - Mat weightMat, biasMat; + UMat umat_weight, umat_bias; BatchNormLayerImpl(const LayerParams& params) { @@ -80,6 +81,9 @@ public: dstWeightsData[i] = w; dstBiasData[i] = (hasBias ? biasData[i] : 0.0f) - w * meanData[i] * varMeanScale; } + + umat_weight = weights_.getUMat(ACCESS_READ); + umat_bias = bias_.getUMat(ACCESS_READ); } void getScaleShift(Mat& scale, Mat& shift) const @@ -97,25 +101,6 @@ public: return true; } - void finalize(const std::vector &inputs, std::vector &outputs) - { - if (inputs[0]->dims == 4) - { - int groups = inputs[0]->size[0]; - int channels = inputs[0]->size[1]; - int rows = inputs[0]->size[2]; - int cols = inputs[0]->size[3]; - MatShape s = shape(groups * channels, rows * cols); - weightMat = Mat(s[0], s[1], CV_32FC1); - biasMat = Mat(s[0], s[1], CV_32FC1); - for (int n = 0; n < s[0]; n++) - { - weightMat.row(n).setTo(weights_.at(n % channels)); - biasMat.row(n).setTo(bias_.at(n % channels)); - } - } - } - virtual bool supportBackend(int backendId) { return backendId == DNN_BACKEND_DEFAULT || @@ -155,8 +140,23 @@ public: MatShape s = shape(groups * channels, rows * cols); UMat src = inputs[ii].reshape(1, s.size(), &s[0]); UMat dst = outputs[ii].reshape(1, s.size(), &s[0]); - multiply(src, weightMat, dst); - add(dst, biasMat, dst); + int number = (s[1] % 8 == 0) ? 8 : ((s[1] % 4 == 0) ? 4 : 1); + String buildopt = format("-DNUM=%d ", number); + String kname = format("batch_norm%d", number); + ocl::Kernel kernel(kname.c_str(), ocl::dnn::batchnorm_oclsrc, buildopt); + if (kernel.empty()) + return false; + size_t global[] = { (size_t)s[0], (size_t)(s[1] / number) }; + kernel.set(0, ocl::KernelArg::PtrReadOnly(src)); + kernel.set(1, (int)s[0]); + kernel.set(2, (int)s[1]); + kernel.set(3, (int)channels); + kernel.set(4, ocl::KernelArg::PtrReadOnly(umat_weight)); + kernel.set(5, ocl::KernelArg::PtrReadOnly(umat_bias)); + kernel.set(6, ocl::KernelArg::PtrWriteOnly(dst)); + bool ret = kernel.run(2, global, NULL, false); + if (!ret) + return false; } } return true; diff --git a/modules/dnn/src/opencl/batchnorm.cl b/modules/dnn/src/opencl/batchnorm.cl index 3f9401c52e..e0072c9fea 100644 --- a/modules/dnn/src/opencl/batchnorm.cl +++ b/modules/dnn/src/opencl/batchnorm.cl @@ -1,26 +1,84 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2017, Intel Corporation, all rights reserved. +// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ -__kernel void batchnorm(__global const T *src, int src_offset, - __global const float *meanMat, - float varMeanScale, - __global const float *invStdMat, - __global const float *weight, - __global const float *bias, - int hasWeight, int hasBias, - int width, int height, int channel, - __global T *dst, int dst_offset) +#define Dtype float +#define Dtype4 float4 +#define Dtype8 float8 + +#if NUM == 8 + #define load(src, index) vload8(0, src + index) + #define store(vec, dst, index) vstore8(vec, 0, dst + index) + #define vec_type Dtype8 + #define BATCH_NORM batch_norm8 +#elif NUM == 4 + #define load(src, index) vload4(0, src + index) + #define store(vec, dst, index) vstore4(vec, 0, dst + index) + #define vec_type Dtype4 + #define BATCH_NORM batch_norm4 +#elif NUM == 1 + #define load(src, index) src[index] + #define store(vec, dst, index) dst[index] = vec + #define vec_type Dtype + #define BATCH_NORM batch_norm1 +#endif + +__kernel void BATCH_NORM(__global const Dtype* src, + const int rows, + const int cols, + const int channels, + __global const Dtype* weight, + __global const Dtype* bias, + __global Dtype* dst) { int x = get_global_id(0); - int y = get_global_id(1); - int c = get_global_id(2); + int y = get_global_id(1) * NUM; + int index = x * cols + y; - if (x >= width || y >= height || c >= channel) + if (x >= rows || y >= cols) return; - float mean = meanMat[c] * varMeanScale; - float invstd = invStdMat[c]; - float w = hasWeight ? weight[c] : 1; - float b = hasBias ? bias[c] : 0; - int index = y * width + x + c * width * height; - T val = (src[index + src_offset] - mean) * w * invstd + b; - dst[index + dst_offset] = val; + Dtype w = weight[x % channels]; + Dtype b = bias[x % channels]; + vec_type src_vec = load(src, index); + vec_type dst_vec = src_vec * w + (vec_type)b; + store(dst_vec, dst, index); }