From bf9e9b81ac2abbdd1a25ab5a589ae727a169c38d Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Wed, 27 Jul 2016 19:22:16 +0300 Subject: [PATCH] Adding of OCL and public interface for Softmax layer --- .../dnn/include/opencv2/dnn/all_layers.hpp | 7 + modules/dnn/src/init.cpp | 2 +- modules/dnn/src/layers/pooling_layer.cpp | 2 +- modules/dnn/src/layers/softmax_layer.cpp | 218 ++++++++++++------ modules/dnn/src/layers/softmax_layer.hpp | 32 ++- modules/dnn/src/opencl/softmax.cl | 75 ++++++ modules/dnn/test/test_layers.cpp | 2 +- 7 files changed, 259 insertions(+), 79 deletions(-) create mode 100644 modules/dnn/src/opencl/softmax.cl diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 91273fad8..1e3d1fc7b 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -260,6 +260,13 @@ namespace dnn static Ptr create(int type = MAX, Size kernel = Size(2, 2), Size stride = Size(1, 1), Size pad = Size(0, 0)); }; + class CV_EXPORTS_W SoftmaxLayer : public Layer + { + public: + + static Ptr create(int axis = 1); + }; + //! @} //! @} diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp index 58e6e0d3f..eac3eb8a5 100644 --- a/modules/dnn/src/init.cpp +++ b/modules/dnn/src/init.cpp @@ -77,7 +77,7 @@ void initModule() return; REG_RUNTIME_LAYER_CLASS(Slice, SliceLayer) - REG_RUNTIME_LAYER_CLASS(Softmax, SoftMaxLayer) + REG_STATIC_LAYER_FUNC(Softmax, createSoftmaxLayerFromCaffe) REG_RUNTIME_LAYER_CLASS(Split, SplitLayer) REG_RUNTIME_LAYER_CLASS(Reshape, ReshapeLayer) REG_STATIC_LAYER_FUNC(Flatten, createFlattenLayer) diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 1906cd58b..cc32aaf40 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -269,7 +269,7 @@ Ptr PoolingLayer::create(int type, Size kernel, Size stride, Size Ptr createPoolingLayerFromCaffe(LayerParams ¶ms) { int type; - Size kernel, pad, stride; + Size kernel, stride, pad; if (params.has("pool")) { diff --git a/modules/dnn/src/layers/softmax_layer.cpp b/modules/dnn/src/layers/softmax_layer.cpp index e90a590ec..4fbedaf63 100644 --- a/modules/dnn/src/layers/softmax_layer.cpp +++ b/modules/dnn/src/layers/softmax_layer.cpp @@ -42,6 +42,8 @@ #include "../precomp.hpp" #include "layers_common.hpp" #include "softmax_layer.hpp" +#include +#include "modules/dnn/opencl_kernels_dnn.hpp" #include #include using std::max; @@ -50,95 +52,179 @@ namespace cv { namespace dnn { - //TODO: set default axis number to 1, and add custom shape length in FullyConnected - SoftMaxLayer::SoftMaxLayer(LayerParams ¶ms) : Layer(params) + +SoftMaxLayerImpl::SoftMaxLayerImpl(int axis) +{ + axisRaw = axis; +} + +void SoftMaxLayerImpl::allocate(const std::vector &inputs, std::vector &outputs) +{ + CV_Assert(inputs.size() == 1); + axis = inputs[0]->canonicalAxis(axisRaw); + + useOpenCL = ocl::useOpenCL(); + + BlobShape shape = inputs[0]->shape(); + outerSize = shape.total(0, axis); + channels = shape[axis]; + innerSize = shape.total(axis + 1); + + int allocFlag = (useOpenCL) ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT; + shape[axis] = 1; + buf.create(shape, inputs[0]->type(), allocFlag); + + outputs.resize(1); + outputs[0].create(inputs[0]->shape(), inputs[0]->type(), allocFlag); +} + +void SoftMaxLayerImpl::forward(std::vector &inputs, std::vector &outputs) +{ + Blob &src = *inputs[0]; + Blob &dst = outputs[0]; + + if (!useOpenCL) + forward_cpu(src, dst); + else { - //hotfix!!! - axis_ = params.get("axis", 1); + CV_Assert(forward_ocl(src, dst)); } +} - void SoftMaxLayer::allocate(const std::vector &inputs, std::vector &outputs) - { - CV_Assert(inputs.size() == 1); - axis = inputs[0]->canonicalAxis(axis_); +#ifdef HAVE_OPENCL +bool SoftMaxLayerImpl::forward_ocl(Blob &src, Blob &dst) +{ + const UMat &srcMat = src.umatRefConst(); + UMat &dstMat = dst.umatRef(); + srcMat.copyTo(dstMat); + UMat &bufMat = buf.umatRef(); + CV_Assert(dstMat.offset == 0); - BlobShape shape = inputs[0]->shape(); - outputs.resize(1); - outputs[0].create(shape); + String buildOpts = String("-DT=") + ocl::typeToStr(src.type()); + ocl::Kernel kmax, ksub, ksum, kdiv; - shape[axis] = 1; - maxAggregator.create(shape); - } + if (!kmax.create("kernel_channel_max", ocl::dnn::softmax_oclsrc, buildOpts)) + return false; - void SoftMaxLayer::forward(std::vector &inputs, std::vector &outputs) - { - Blob &src = *inputs[0]; - Blob &dst = outputs[0]; + if (!ksub.create("kernel_channel_subtract", ocl::dnn::softmax_oclsrc, buildOpts)) + return false; - float *srcPtr = src.ptrf(); - float *dstPtr = dst.ptrf(); - float *bufPtr = maxAggregator.ptrf(); + if (!ksum.create("kernel_channel_sum", ocl::dnn::softmax_oclsrc, buildOpts)) + return false; - size_t outerSize = src.total(0, axis); - size_t channels = src.size(axis); - size_t innerSize = src.total(axis + 1); + if (!kdiv.create("kernel_channel_div", ocl::dnn::softmax_oclsrc, buildOpts)) + return false; - size_t outerStep = src.total(axis); - size_t cnStep = src.total(axis + 1); + size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize(); + size_t bufSize = buf.total(); + size_t totalSize = src.total(); - //compute max along axis - for (size_t outerDim = 0; outerDim < outerSize; outerDim++) - { - size_t srcOffset = outerDim * outerStep; - size_t bufOffset = outerDim * cnStep; + kmax.args((int)outerSize, (int)channels, (int)innerSize, + ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat)); + if (!kmax.run(1, &bufSize, &wgSize, true)) + return false; + + ksub.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize, + ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat)); + if (!ksub.run(1, &totalSize, &wgSize, true)) + return false; + + cv::exp(dstMat, dstMat); - memcpy(bufPtr + bufOffset, srcPtr + srcOffset, innerSize * sizeof(float)); + ksum.args((int)outerSize, (int)channels, (int)innerSize, + ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat)); + if (!ksum.run(1, &bufSize, &wgSize, true)) + return false; - for (size_t cnDim = 1; cnDim < channels; cnDim++) - { - for (size_t i = 0; i < innerSize; i++) - bufPtr[bufOffset + i] = std::max(bufPtr[bufOffset + i], srcPtr[srcOffset + cnDim * cnStep + i]); - } + kdiv.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize, + ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat)); + if (!kdiv.run(1, &totalSize, &wgSize, true)) + return false; + + return true; +} +#else +bool SoftMaxLayerImpl::forward_ocl(Blob&, Blob&) +{ + return false; +} +#endif + +void SoftMaxLayerImpl::forward_cpu(Blob &src, Blob &dst) +{ + CV_Assert(src.type() == CV_32F); + + float *srcPtr = src.ptrf(); + float *dstPtr = dst.ptrf(); + float *bufPtr = buf.ptrf(); + + size_t outerStep = src.total(axis); + size_t cnStep = src.total(axis + 1); + + //compute max along axis + for (size_t outerDim = 0; outerDim < outerSize; outerDim++) + { + size_t srcOffset = outerDim * outerStep; + size_t bufOffset = outerDim * cnStep; + + memcpy(bufPtr + bufOffset, srcPtr + srcOffset, innerSize * sizeof(float)); + + for (size_t cnDim = 1; cnDim < channels; cnDim++) + { + for (size_t i = 0; i < innerSize; i++) + bufPtr[bufOffset + i] = std::max(bufPtr[bufOffset + i], srcPtr[srcOffset + cnDim * cnStep + i]); } + } + + //subtract max + for (size_t outerDim = 0; outerDim < outerSize; outerDim++) + { + size_t srcOffset = outerDim * outerStep; + size_t bufOffset = outerDim * cnStep; - //subtract max - for (size_t outerDim = 0; outerDim < outerSize; outerDim++) + for (size_t cnDim = 0; cnDim < channels; cnDim++) { - size_t srcOffset = outerDim * outerStep; - size_t bufOffset = outerDim * cnStep; - - for (size_t cnDim = 0; cnDim < channels; cnDim++) - { - for (size_t i = 0; i < innerSize; i++) - dstPtr[srcOffset + cnDim * cnStep + i] = srcPtr[srcOffset + cnDim * cnStep + i] - bufPtr[bufOffset + i]; - } + for (size_t i = 0; i < innerSize; i++) + dstPtr[srcOffset + cnDim * cnStep + i] = srcPtr[srcOffset + cnDim * cnStep + i] - bufPtr[bufOffset + i]; } + } - cv::exp(dst.matRef(), dst.matRef()); + cv::exp(dst.matRef(), dst.matRef()); - for (size_t outerDim = 0; outerDim < outerSize; outerDim++) + for (size_t outerDim = 0; outerDim < outerSize; outerDim++) + { + size_t srcOffset = outerDim * outerStep; + size_t bufOffset = outerDim * cnStep; + + //sum exp along axis + for (size_t i = 0; i < innerSize; i++) + bufPtr[bufOffset + i] = 0.f; + + for (size_t cnDim = 0; cnDim < channels; cnDim++) { - size_t srcOffset = outerDim * outerStep; - size_t bufOffset = outerDim * cnStep; + for (size_t i = 0; i < innerSize; i++) + bufPtr[bufOffset + i] += dstPtr[srcOffset + cnDim * cnStep + i]; + } - //sum exp along axis + //divide by computed sum + for (size_t cnDim = 0; cnDim < channels; cnDim++) + { for (size_t i = 0; i < innerSize; i++) - bufPtr[bufOffset + i] = 0.f; - - for (size_t cnDim = 0; cnDim < channels; cnDim++) - { - for (size_t i = 0; i < innerSize; i++) - bufPtr[bufOffset + i] += dstPtr[srcOffset + cnDim * cnStep + i]; - } - - //divide by computed sum - for (size_t cnDim = 0; cnDim < channels; cnDim++) - { - for (size_t i = 0; i < innerSize; i++) - dstPtr[srcOffset + cnDim * cnStep + i] /= bufPtr[bufOffset + i]; - } + dstPtr[srcOffset + cnDim * cnStep + i] /= bufPtr[bufOffset + i]; } } +} + +Ptr SoftmaxLayer::create(int axis) +{ + return Ptr(new SoftMaxLayerImpl(axis)); +} + +Ptr createSoftmaxLayerFromCaffe(LayerParams ¶ms) +{ + int axis = params.get("axis", 1); + return Ptr(SoftmaxLayer::create(axis)); +} } } diff --git a/modules/dnn/src/layers/softmax_layer.hpp b/modules/dnn/src/layers/softmax_layer.hpp index 5b55794ae..5803ee8eb 100644 --- a/modules/dnn/src/layers/softmax_layer.hpp +++ b/modules/dnn/src/layers/softmax_layer.hpp @@ -42,21 +42,33 @@ #ifndef __OPENCV_DNN_LAYERS_SOFTMAX_LAYER_HPP__ #define __OPENCV_DNN_LAYERS_SOFTMAX_LAYER_HPP__ #include "../precomp.hpp" +#include namespace cv { namespace dnn { - class SoftMaxLayer : public Layer - { - int axis_, axis; - Blob maxAggregator; - - public: - SoftMaxLayer(LayerParams ¶ms); - void allocate(const std::vector &inputs, std::vector &outputs); - void forward(std::vector &inputs, std::vector &outputs); - }; + +class SoftMaxLayerImpl : public SoftmaxLayer +{ + int axis, axisRaw; + Blob buf; + bool useOpenCL; + size_t outerSize, channels, innerSize; + + + bool forward_ocl(Blob &src, Blob &dst); + void forward_cpu(Blob &src, Blob &dst); + +public: + + SoftMaxLayerImpl(int axis = 1); + void allocate(const std::vector &inputs, std::vector &outputs); + void forward(std::vector &inputs, std::vector &outputs); +}; + +Ptr createSoftmaxLayerFromCaffe(LayerParams ¶ms); + } } #endif diff --git a/modules/dnn/src/opencl/softmax.cl b/modules/dnn/src/opencl/softmax.cl new file mode 100644 index 000000000..e9fcadce3 --- /dev/null +++ b/modules/dnn/src/opencl/softmax.cl @@ -0,0 +1,75 @@ +/************************************************************************************* + * Copyright (c) 2015, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, + * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + **************************************************************************************/ + +__kernel void kernel_channel_max(const int num, const int channels, + const int spatial_dim, __global const T* data, __global T* out) { + int index = get_global_id(0); + if(index < num * spatial_dim) { + int n = index / spatial_dim; + int s = index % spatial_dim; + T maxval = -FLT_MAX; + for (int c = 0; c < channels; ++c) { + maxval = max(data[(n * channels + c) * spatial_dim + s], maxval); + } + out[index] = maxval; + } +} + +__kernel void kernel_channel_subtract(const int count, + const int num, const int channels, + const int spatial_dim, __global const T* channel_max, __global T* data) { + int index = get_global_id(0); + if(index < count) { + int n = index / channels / spatial_dim; + int s = index % spatial_dim; + data[index] -= channel_max[n * spatial_dim + s]; + } +} + +__kernel void kernel_channel_sum(const int num, const int channels, + const int spatial_dim, __global const T* data, __global T* channel_sum) { + int index = get_global_id(0); + if(index < num * spatial_dim) { + int n = index / spatial_dim; + int s = index % spatial_dim; + T sum = 0; + for (int c = 0; c < channels; ++c) { + sum += data[(n * channels + c) * spatial_dim + s]; + } + channel_sum[index] = sum; + } +} + +__kernel void kernel_channel_div(const int count, + const int num, const int channels, + const int spatial_dim, __global const T* channel_sum, __global T* data) { + int index = get_global_id(0); + if(index < count) { + int n = index / channels / spatial_dim; + int s = index % spatial_dim; + data[index] /= channel_sum[n * spatial_dim + s]; + } +} \ No newline at end of file diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 9958076e7..94d8945cf 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -87,7 +87,7 @@ void testLayerUsingCaffeModels(String basename, bool useCaffeModel = false, bool TEST(Layer_Test_Softmax, Accuracy) { - testLayerUsingCaffeModels("layer_softmax"); + OCL_OFF(testLayerUsingCaffeModels("layer_softmax")); } OCL_TEST(Layer_Test_Softmax, Accuracy) {