Replace Darknet's Reorg to permute layer

6 years ago · 09fa758725
parent 38f8fc6c82
commit 09fa758725
5 changed files with 108 additions and 156 deletions
--- a/modules/dnn/src/layers/permute_layer.cpp
+++ b/modules/dnn/src/layers/permute_layer.cpp
@ -57,23 +57,6 @@ namespace dnn
 class PermuteLayerImpl CV_FINAL : public PermuteLayer
 {
 public:
    void checkCurrentOrder(int currentOrder)
    {
        if(currentOrder < 0 || currentOrder > 3)
        {
            CV_Error(
                     Error::StsBadArg,
                     "Orders of dimensions in Permute layer parameter"
                     "must be in [0...3] interval");
        }
        if(std::find(_order.begin(), _order.end(), currentOrder) != _order.end())
        {
            CV_Error(Error::StsBadArg,
                     "Permute layer parameter contains duplicated orders.");
        }
    }
    void checkNeedForPermutation()
    {
        _needsPermute = false;
@ -96,19 +79,22 @@ public:
        }
        DictValue paramOrder = params.get("order");
        if(paramOrder.size() > 4)
        {
            CV_Error(
                     Error::StsBadArg,
                     "Too many (> 4) orders of dimensions in Permute layer");
        }
        _numAxes = paramOrder.size();
        for (size_t i = 0; i < _numAxes; i++)
        {
            int currentOrder = paramOrder.get<int>(i);
-            checkCurrentOrder(currentOrder);
+            if (currentOrder < 0 || currentOrder > _numAxes)
            {
                CV_Error(Error::StsBadArg,
                         format("Orders of dimensions in Permute layer parameter"
                                "must be in [0...%d]", _numAxes - 1));
            }
            if (std::find(_order.begin(), _order.end(), currentOrder) != _order.end())
            {
                CV_Error(Error::StsBadArg,
                         "Permute layer parameter contains duplicated orders.");
            }
            _order.push_back(currentOrder);
        }
--- a/modules/dnn/src/layers/reorg_layer.cpp
+++ b/modules/dnn/src/layers/reorg_layer.cpp
@ -85,6 +85,54 @@ public:
        return false;
    }
    virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
    {
        std::vector<Mat> inputs, outputs;
        inputs_arr.getMatVector(inputs);
        outputs_arr.getMatVector(outputs);
        Mat inp = inputs[0];
        Mat out = outputs[0];
        int batchSize = inp.size[0];
        LayerParams permParams;
        if (batchSize == 1)
        {
            int order[] = {1, 3, 0, 2};
            permParams.set("order", DictValue::arrayInt(&order[0], 4));
            permuteInpShape.resize(4);
            permuteInpShape[0] = inp.size[1] * inp.size[2] / (reorgStride * reorgStride);  // (channels*height)/(r*r)
            permuteInpShape[1] = reorgStride;
            permuteInpShape[2] = inp.size[3];  // width
            permuteInpShape[3] = reorgStride;
            permuteOutShape.resize(4);
            for (int i = 0; i < 4; ++i)
                permuteOutShape[i] = permuteInpShape[order[i]];
        }
        else
        {
            int order[] = {0, 2, 4, 1, 3};
            permParams.set("order", DictValue::arrayInt(&order[0], 5));
            permuteInpShape.resize(5);
            permuteInpShape[0] = batchSize;
            permuteInpShape[1] = inp.size[1] * inp.size[2] / (reorgStride * reorgStride);  // (channels*height)/(r*r)
            permuteInpShape[2] = reorgStride;
            permuteInpShape[3] = inp.size[3];  // width
            permuteInpShape[4] = reorgStride;
            permuteOutShape.resize(5);
            for (int i = 0; i < 5; ++i)
                permuteOutShape[i] = permuteInpShape[order[i]];
        }
        permute = PermuteLayer::create(permParams);
        std::vector<Mat> permuteInputs(1, inp.reshape(1, permuteInpShape));
        std::vector<Mat> permuteOutputs(1, out.reshape(1, permuteOutShape));
        permute->finalize(permuteInputs, permuteOutputs);
    }
    virtual bool supportBackend(int backendId) CV_OVERRIDE
    {
        return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_INFERENCE_ENGINE;
@ -96,39 +144,13 @@ public:
        std::vector<UMat> inputs;
        std::vector<UMat> outputs;
        bool use_half = (inps.depth() == CV_16S);
        inps.getUMatVector(inputs);
        outs.getUMatVector(outputs);
        String buildopt= format("-DDtype=%s ", use_half ? "half" : "float");
        for (size_t i = 0; i < inputs.size(); i++)
        {
            ocl::Kernel kernel("reorg", ocl::dnn::reorg_oclsrc, buildopt);
            if (kernel.empty())
                return false;
            UMat& srcBlob = inputs[i];
            UMat& dstBlob = outputs[0];
            int batch_size = srcBlob.size[0];
            int channels = srcBlob.size[1];
            int height = srcBlob.size[2];
            int width = srcBlob.size[3];
            size_t nthreads = batch_size * channels * height * width;
            kernel.set(0, (int)nthreads);
            kernel.set(1, ocl::KernelArg::PtrReadOnly(srcBlob));
            kernel.set(2, (int)channels);
            kernel.set(3, (int)height);
            kernel.set(4, (int)width);
            kernel.set(5, (int)reorgStride);
            kernel.set(6, ocl::KernelArg::PtrWriteOnly(dstBlob));
            if (!kernel.run(1, &nthreads, NULL, false))
                return false;
        }
        inputs[0] = inputs[0].reshape(1, permuteInpShape.size(), &permuteInpShape[0]);
        outputs[0] = outputs[0].reshape(1, permuteOutShape.size(), &permuteOutShape[0]);
        permute->preferableTarget = preferableTarget;
        permute->forward(inputs, outputs, internals);
        return true;
    }
 #endif
@ -152,34 +174,9 @@ public:
        inputs_arr.getMatVector(inputs);
        outputs_arr.getMatVector(outputs);
-        for (size_t i = 0; i < inputs.size(); i++)
+        inputs[0] = inputs[0].reshape(1, permuteInpShape);
-        {
+        outputs[0] = outputs[0].reshape(1, permuteOutShape);
-            Mat srcBlob = inputs[i];
+        permute->forward(inputs, outputs, internals_arr);
            MatShape inputShape = shape(srcBlob), outShape = shape(outputs[i]);
            float *dstData = outputs[0].ptr<float>();
            const float *srcData = srcBlob.ptr<float>();
            int channels = inputShape[1], height = inputShape[2], width = inputShape[3];
            int sample_size = channels*height*width;
            int batch_size = inputShape[0];
            int out_c = channels / (reorgStride*reorgStride);
            for (int b = 0; b < batch_size; ++b) {
                for (int k = 0; k < channels; ++k) {
                    for (int j = 0; j < height; ++j) {
                        for (int i = 0; i < width; ++i) {
                            int out_index = i + width*(j + height*k);
                            int c2 = k % out_c;
                            int offset = k / out_c;
                            int w2 = i*reorgStride + offset % reorgStride;
                            int h2 = j*reorgStride + offset / reorgStride;
                            int in_index = w2 + width*reorgStride*(h2 + height*reorgStride*c2);
                            dstData[b*sample_size + out_index] = srcData[b*sample_size + in_index];
                        }
                    }
                }
            }
        }
    }
    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
@ -208,6 +205,10 @@ public:
        }
        return flops;
    }
 private:
    Ptr<PermuteLayer> permute;
    std::vector<int> permuteInpShape, permuteOutShape;
 };
 Ptr<ReorgLayer> ReorgLayer::create(const LayerParams& params)
--- a/modules/dnn/src/layers/shuffle_channel_layer.cpp
+++ b/modules/dnn/src/layers/shuffle_channel_layer.cpp
@ -62,11 +62,40 @@ public:
        }
    }
 #ifdef HAVE_OPENCL
    bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
    {
        std::vector<UMat> inputs;
        std::vector<UMat> outputs;
        inps.getUMatVector(inputs);
        outs.getUMatVector(outputs);
        if (inputs[0].u != outputs[0].u)
        {
            if (!permute.empty())
            {
                inputs[0] = inputs[0].reshape(1, permuteInpShape.size(), &permuteInpShape[0]);
                outputs[0] = outputs[0].reshape(1, permuteOutShape.size(), &permuteOutShape[0]);
                permute->preferableTarget = preferableTarget;
                permute->forward(inputs, outputs, internals);
            }
            else
                inputs[0].copyTo(outputs[0]);
        }
        return true;
    }
 #endif
    void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
    {
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());
        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
                   forward_ocl(inputs_arr, outputs_arr, internals_arr))
        if (inputs_arr.depth() == CV_16S)
        {
            forward_fallback(inputs_arr, outputs_arr, internals_arr);
--- a/modules/dnn/src/opencl/reorg.cl
+++ b/modules/dnn/src/opencl/reorg.cl
@ -1,70 +0,0 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #if defined(cl_khr_fp16)
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 #endif
 __kernel void reorg(const int count,
                    __global const Dtype* src,
                    const int channels,
                    const int height,
                    const int width,
                    const int reorgStride,
                    __global Dtype* dst)
 {
    for (int index = get_global_id(0); index < count; index += get_global_size(0))
    {
        int sample_size = channels*height*width;
        int b = index/sample_size;
        int new_index = index%sample_size;
        int k = new_index / (height * width);
        int j = (new_index - (k * height * width)) / width;
        int i = new_index % width;
        int out_c = channels / (reorgStride*reorgStride);
        int c2 = k % out_c;
        int offset = k / out_c;
        int w2 = i*reorgStride + offset % reorgStride;
        int h2 = j*reorgStride + offset / reorgStride;
        int in_index = w2 + width*reorgStride*(h2 + height*reorgStride*c2);
        dst[index] = src[b*sample_size + in_index];
    }
 }
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@ -1288,13 +1288,15 @@ TEST(Layer_Test_PoolingIndices, Accuracy)
    normAssert(indices, outputs[1].reshape(1, 5));
 }
-typedef testing::TestWithParam<tuple<Vec4i, int> > Layer_Test_ShuffleChannel;
+typedef testing::TestWithParam<tuple<Vec4i, int, tuple<Backend, Target> > > Layer_Test_ShuffleChannel;
 TEST_P(Layer_Test_ShuffleChannel, Accuracy)
 {
    Vec4i inpShapeVec = get<0>(GetParam());
    int group = get<1>(GetParam());
    ASSERT_EQ(inpShapeVec[1] % group, 0);
    const int groupSize = inpShapeVec[1] / group;
    int backendId = get<0>(get<2>(GetParam()));
    int targetId = get<1>(get<2>(GetParam()));
    Net net;
    LayerParams lp;
@ -1308,21 +1310,25 @@ TEST_P(Layer_Test_ShuffleChannel, Accuracy)
    randu(inp, 0, 255);
    net.setInput(inp);
    net.setPreferableBackend(backendId);
    net.setPreferableTarget(targetId);
    Mat out = net.forward();
    double l1 = (targetId == DNN_TARGET_OPENCL_FP16) ? 5e-2 : 1e-5;
    double lInf = (targetId == DNN_TARGET_OPENCL_FP16) ? 7e-2 : 1e-4;
    for (int n = 0; n < inpShapeVec[0]; ++n)
    {
        for (int c = 0; c < inpShapeVec[1]; ++c)
        {
            Mat outChannel = getPlane(out, n, c);
            Mat inpChannel = getPlane(inp, n, groupSize * (c % group) + c / group);
-            normAssert(outChannel, inpChannel);
+            normAssert(outChannel, inpChannel, "", l1, lInf);
        }
    }
 }
 INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_ShuffleChannel, Combine(
 /*input shape*/  Values(Vec4i(1, 6, 5, 7), Vec4i(3, 12, 1, 4)),
-/*group*/        Values(1, 2, 3, 6)
+/*group*/        Values(1, 2, 3, 6), dnnBackendsAndTargets(/*with IE*/ false)
 ));
 // Check if relu is not fused to convolution if we requested it's output