diff --git a/modules/dnn/src/layers/mvn_layer.cpp b/modules/dnn/src/layers/mvn_layer.cpp index e7d2ff524e..46ffcc5238 100644 --- a/modules/dnn/src/layers/mvn_layer.cpp +++ b/modules/dnn/src/layers/mvn_layer.cpp @@ -43,6 +43,8 @@ #include "../precomp.hpp" #include "layers_common.hpp" #include +#include "math_functions.hpp" +#include "opencl_kernels_dnn.hpp" namespace cv { @@ -60,11 +62,93 @@ public: eps = params.get("eps", 1e-9); } +#ifdef HAVE_OPENCL + bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_) + { + std::vector inputs; + std::vector outputs; + + inputs_.getUMatVector(inputs); + outputs_.getUMatVector(outputs); + + for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++) + { + UMat &inpBlob = inputs[inpIdx]; + UMat &outBlob = outputs[inpIdx]; + + int splitDim = (acrossChannels) ? 1 : 2; + int i, newRows = 1; + for( i = 0; i < splitDim; i++ ) + newRows *= inpBlob.size[i]; + + MatShape s = shape(newRows, inpBlob.total() / newRows); + UMat& inpMat = inpBlob; + UMat& outMat = outBlob; + UMat oneMat = UMat::ones(s[1], 1, CV_32F); + UMat meanMat = UMat(s[0], 1, CV_32F); + UMat devMat = UMat(s[0], 1, CV_32F); + UMat tmpMat = UMat(s[0], s[1], CV_32F); + float alpha = 1.0f / s[1]; + + bool ret = ocl4dnn::ocl4dnnGEMV(ocl4dnn::CblasNoTrans, s[0], s[1], alpha, + inpMat, 0, oneMat, 0, 0.0f, meanMat, 0); + if (!ret) + return false; + + int number = (s[1] % 8 == 0) ? 8 : ((s[1] % 4 == 0) ? 4 : 1); + String buildopt = format("-DNUM=%d ", number); + String kname = format("calc_mean%d", number); + ocl::Kernel kernel(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt); + if (kernel.empty()) + return false; + size_t global[] = { (size_t)s[0], (size_t)(s[1] / number) }; + kernel.set(0, ocl::KernelArg::PtrReadOnly(inpMat)); + kernel.set(1, (int)s[0]); + kernel.set(2, (int)s[1]); + kernel.set(3, ocl::KernelArg::PtrReadOnly(meanMat)); + kernel.set(4, ocl::KernelArg::PtrWriteOnly(tmpMat)); + ret = kernel.run(2, global, NULL, false); + if (!ret) + return false; + + if (normVariance) + { + ret = ocl4dnn::ocl4dnnGEMV(ocl4dnn::CblasNoTrans, s[0], s[1], alpha, + tmpMat, 0, oneMat, 0, 0.0f, devMat, 0); + if (!ret) + return false; + } + + kname = format("mvn%d", number); + if (normVariance) + buildopt += "-DNORM_VARIANCE"; + ocl::Kernel kernel1(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt); + if (kernel1.empty()) + return false; + kernel1.set(0, ocl::KernelArg::PtrReadOnly(inpMat)); + kernel1.set(1, (int)s[0]); + kernel1.set(2, (int)s[1]); + kernel1.set(3, (float)eps); + kernel1.set(4, ocl::KernelArg::PtrReadOnly(meanMat)); + kernel1.set(5, ocl::KernelArg::PtrReadOnly(devMat)); + kernel1.set(6, ocl::KernelArg::PtrWriteOnly(outMat)); + ret = kernel1.run(2, global, NULL, false); + if (!ret) + return false; + } + return true; + } +#endif + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && + OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + forward_ocl(inputs_arr, outputs_arr, internals_arr)) + Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); } diff --git a/modules/dnn/src/opencl/mvn.cl b/modules/dnn/src/opencl/mvn.cl new file mode 100644 index 0000000000..c87667d38a --- /dev/null +++ b/modules/dnn/src/opencl/mvn.cl @@ -0,0 +1,112 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2017, Intel Corporation, all rights reserved. +// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#define Dtype float +#define Dtype4 float4 +#define Dtype8 float8 + +#if NUM == 8 + #define load(src, index) vload8(0, src + index) + #define store(vec, dst, index) vstore8(vec, 0, dst + index) + #define vec_type Dtype8 + #define CALC_MEAN calc_mean8 + #define MVN mvn8 +#elif NUM == 4 + #define load(src, index) vload4(0, src + index) + #define store(vec, dst, index) vstore4(vec, 0, dst + index) + #define vec_type Dtype4 + #define CALC_MEAN calc_mean4 + #define MVN mvn4 +#elif NUM == 1 + #define load(src, index) src[index] + #define store(vec, dst, index) dst[index] = vec + #define vec_type Dtype + #define CALC_MEAN calc_mean1 + #define MVN mvn1 +#endif + +__kernel void CALC_MEAN(__global const Dtype* src, + const int rows, + const int cols, + __global Dtype* mean, + __global Dtype* dst) +{ + int x = get_global_id(0); + int y = get_global_id(1) * NUM; + int index = x * cols + y; + + if (x >= rows || y >= cols) + return; + + Dtype mean_val = mean[x]; + vec_type src_vec = load(src, index); + vec_type dst_vec = pow(src_vec - (vec_type)mean_val, 2); + store(dst_vec, dst, index); +} + +__kernel void MVN(__global const Dtype* src, + const int rows, + const int cols, + const Dtype eps, + __global const Dtype* mean, + __global const Dtype* dev, + __global Dtype* dst) +{ + int x = get_global_id(0); + int y = get_global_id(1) * NUM; + int index = x * cols + y; + + if (x >= rows || y >= cols) + return; + + Dtype mean_val = mean[x]; + Dtype dev_val = sqrt(dev[x]); + Dtype alpha; +#ifdef NORM_VARIANCE + alpha = 1 / (eps + dev_val); +#else + alpha = 1; +#endif + vec_type src_vec = load(src, index) - (vec_type)mean_val; + vec_type dst_vec = src_vec * alpha; + store(dst_vec, dst, index); +} diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 94c8774e80..d88f01d380 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -202,6 +202,11 @@ TEST(Layer_Test_MVN, Accuracy) testLayerUsingCaffeModels("layer_mvn"); } +OCL_TEST(Layer_Test_MVN, Accuracy) +{ + testLayerUsingCaffeModels("layer_mvn", DNN_TARGET_OPENCL); +} + void testReshape(const MatShape& inputShape, const MatShape& targetShape, int axis = 0, int num_axes = -1, MatShape mask = MatShape())