Replace Darknet's Reorg to permute layer

pull/12403/head
Dmitry Kurtaev 6 years ago
parent 38f8fc6c82
commit 09fa758725
  1. 36
      modules/dnn/src/layers/permute_layer.cpp
  2. 117
      modules/dnn/src/layers/reorg_layer.cpp
  3. 29
      modules/dnn/src/layers/shuffle_channel_layer.cpp
  4. 70
      modules/dnn/src/opencl/reorg.cl
  5. 12
      modules/dnn/test/test_layers.cpp

@ -57,23 +57,6 @@ namespace dnn
class PermuteLayerImpl CV_FINAL : public PermuteLayer class PermuteLayerImpl CV_FINAL : public PermuteLayer
{ {
public: public:
void checkCurrentOrder(int currentOrder)
{
if(currentOrder < 0 || currentOrder > 3)
{
CV_Error(
Error::StsBadArg,
"Orders of dimensions in Permute layer parameter"
"must be in [0...3] interval");
}
if(std::find(_order.begin(), _order.end(), currentOrder) != _order.end())
{
CV_Error(Error::StsBadArg,
"Permute layer parameter contains duplicated orders.");
}
}
void checkNeedForPermutation() void checkNeedForPermutation()
{ {
_needsPermute = false; _needsPermute = false;
@ -96,19 +79,22 @@ public:
} }
DictValue paramOrder = params.get("order"); DictValue paramOrder = params.get("order");
if(paramOrder.size() > 4)
{
CV_Error(
Error::StsBadArg,
"Too many (> 4) orders of dimensions in Permute layer");
}
_numAxes = paramOrder.size(); _numAxes = paramOrder.size();
for (size_t i = 0; i < _numAxes; i++) for (size_t i = 0; i < _numAxes; i++)
{ {
int currentOrder = paramOrder.get<int>(i); int currentOrder = paramOrder.get<int>(i);
checkCurrentOrder(currentOrder); if (currentOrder < 0 || currentOrder > _numAxes)
{
CV_Error(Error::StsBadArg,
format("Orders of dimensions in Permute layer parameter"
"must be in [0...%d]", _numAxes - 1));
}
if (std::find(_order.begin(), _order.end(), currentOrder) != _order.end())
{
CV_Error(Error::StsBadArg,
"Permute layer parameter contains duplicated orders.");
}
_order.push_back(currentOrder); _order.push_back(currentOrder);
} }

@ -85,6 +85,54 @@ public:
return false; return false;
} }
virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
{
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
Mat inp = inputs[0];
Mat out = outputs[0];
int batchSize = inp.size[0];
LayerParams permParams;
if (batchSize == 1)
{
int order[] = {1, 3, 0, 2};
permParams.set("order", DictValue::arrayInt(&order[0], 4));
permuteInpShape.resize(4);
permuteInpShape[0] = inp.size[1] * inp.size[2] / (reorgStride * reorgStride); // (channels*height)/(r*r)
permuteInpShape[1] = reorgStride;
permuteInpShape[2] = inp.size[3]; // width
permuteInpShape[3] = reorgStride;
permuteOutShape.resize(4);
for (int i = 0; i < 4; ++i)
permuteOutShape[i] = permuteInpShape[order[i]];
}
else
{
int order[] = {0, 2, 4, 1, 3};
permParams.set("order", DictValue::arrayInt(&order[0], 5));
permuteInpShape.resize(5);
permuteInpShape[0] = batchSize;
permuteInpShape[1] = inp.size[1] * inp.size[2] / (reorgStride * reorgStride); // (channels*height)/(r*r)
permuteInpShape[2] = reorgStride;
permuteInpShape[3] = inp.size[3]; // width
permuteInpShape[4] = reorgStride;
permuteOutShape.resize(5);
for (int i = 0; i < 5; ++i)
permuteOutShape[i] = permuteInpShape[order[i]];
}
permute = PermuteLayer::create(permParams);
std::vector<Mat> permuteInputs(1, inp.reshape(1, permuteInpShape));
std::vector<Mat> permuteOutputs(1, out.reshape(1, permuteOutShape));
permute->finalize(permuteInputs, permuteOutputs);
}
virtual bool supportBackend(int backendId) CV_OVERRIDE virtual bool supportBackend(int backendId) CV_OVERRIDE
{ {
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_INFERENCE_ENGINE; return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_INFERENCE_ENGINE;
@ -96,39 +144,13 @@ public:
std::vector<UMat> inputs; std::vector<UMat> inputs;
std::vector<UMat> outputs; std::vector<UMat> outputs;
bool use_half = (inps.depth() == CV_16S);
inps.getUMatVector(inputs); inps.getUMatVector(inputs);
outs.getUMatVector(outputs); outs.getUMatVector(outputs);
String buildopt= format("-DDtype=%s ", use_half ? "half" : "float");
for (size_t i = 0; i < inputs.size(); i++)
{
ocl::Kernel kernel("reorg", ocl::dnn::reorg_oclsrc, buildopt);
if (kernel.empty())
return false;
UMat& srcBlob = inputs[i];
UMat& dstBlob = outputs[0];
int batch_size = srcBlob.size[0];
int channels = srcBlob.size[1];
int height = srcBlob.size[2];
int width = srcBlob.size[3];
size_t nthreads = batch_size * channels * height * width;
kernel.set(0, (int)nthreads);
kernel.set(1, ocl::KernelArg::PtrReadOnly(srcBlob));
kernel.set(2, (int)channels);
kernel.set(3, (int)height);
kernel.set(4, (int)width);
kernel.set(5, (int)reorgStride);
kernel.set(6, ocl::KernelArg::PtrWriteOnly(dstBlob));
if (!kernel.run(1, &nthreads, NULL, false))
return false;
}
inputs[0] = inputs[0].reshape(1, permuteInpShape.size(), &permuteInpShape[0]);
outputs[0] = outputs[0].reshape(1, permuteOutShape.size(), &permuteOutShape[0]);
permute->preferableTarget = preferableTarget;
permute->forward(inputs, outputs, internals);
return true; return true;
} }
#endif #endif
@ -152,34 +174,9 @@ public:
inputs_arr.getMatVector(inputs); inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs); outputs_arr.getMatVector(outputs);
for (size_t i = 0; i < inputs.size(); i++) inputs[0] = inputs[0].reshape(1, permuteInpShape);
{ outputs[0] = outputs[0].reshape(1, permuteOutShape);
Mat srcBlob = inputs[i]; permute->forward(inputs, outputs, internals_arr);
MatShape inputShape = shape(srcBlob), outShape = shape(outputs[i]);
float *dstData = outputs[0].ptr<float>();
const float *srcData = srcBlob.ptr<float>();
int channels = inputShape[1], height = inputShape[2], width = inputShape[3];
int sample_size = channels*height*width;
int batch_size = inputShape[0];
int out_c = channels / (reorgStride*reorgStride);
for (int b = 0; b < batch_size; ++b) {
for (int k = 0; k < channels; ++k) {
for (int j = 0; j < height; ++j) {
for (int i = 0; i < width; ++i) {
int out_index = i + width*(j + height*k);
int c2 = k % out_c;
int offset = k / out_c;
int w2 = i*reorgStride + offset % reorgStride;
int h2 = j*reorgStride + offset / reorgStride;
int in_index = w2 + width*reorgStride*(h2 + height*reorgStride*c2);
dstData[b*sample_size + out_index] = srcData[b*sample_size + in_index];
}
}
}
}
}
} }
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
@ -208,6 +205,10 @@ public:
} }
return flops; return flops;
} }
private:
Ptr<PermuteLayer> permute;
std::vector<int> permuteInpShape, permuteOutShape;
}; };
Ptr<ReorgLayer> ReorgLayer::create(const LayerParams& params) Ptr<ReorgLayer> ReorgLayer::create(const LayerParams& params)

@ -62,11 +62,40 @@ public:
} }
} }
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
if (inputs[0].u != outputs[0].u)
{
if (!permute.empty())
{
inputs[0] = inputs[0].reshape(1, permuteInpShape.size(), &permuteInpShape[0]);
outputs[0] = outputs[0].reshape(1, permuteOutShape.size(), &permuteOutShape[0]);
permute->preferableTarget = preferableTarget;
permute->forward(inputs, outputs, internals);
}
else
inputs[0].copyTo(outputs[0]);
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S) if (inputs_arr.depth() == CV_16S)
{ {
forward_fallback(inputs_arr, outputs_arr, internals_arr); forward_fallback(inputs_arr, outputs_arr, internals_arr);

@ -1,70 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if defined(cl_khr_fp16)
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#endif
__kernel void reorg(const int count,
__global const Dtype* src,
const int channels,
const int height,
const int width,
const int reorgStride,
__global Dtype* dst)
{
for (int index = get_global_id(0); index < count; index += get_global_size(0))
{
int sample_size = channels*height*width;
int b = index/sample_size;
int new_index = index%sample_size;
int k = new_index / (height * width);
int j = (new_index - (k * height * width)) / width;
int i = new_index % width;
int out_c = channels / (reorgStride*reorgStride);
int c2 = k % out_c;
int offset = k / out_c;
int w2 = i*reorgStride + offset % reorgStride;
int h2 = j*reorgStride + offset / reorgStride;
int in_index = w2 + width*reorgStride*(h2 + height*reorgStride*c2);
dst[index] = src[b*sample_size + in_index];
}
}

@ -1288,13 +1288,15 @@ TEST(Layer_Test_PoolingIndices, Accuracy)
normAssert(indices, outputs[1].reshape(1, 5)); normAssert(indices, outputs[1].reshape(1, 5));
} }
typedef testing::TestWithParam<tuple<Vec4i, int> > Layer_Test_ShuffleChannel; typedef testing::TestWithParam<tuple<Vec4i, int, tuple<Backend, Target> > > Layer_Test_ShuffleChannel;
TEST_P(Layer_Test_ShuffleChannel, Accuracy) TEST_P(Layer_Test_ShuffleChannel, Accuracy)
{ {
Vec4i inpShapeVec = get<0>(GetParam()); Vec4i inpShapeVec = get<0>(GetParam());
int group = get<1>(GetParam()); int group = get<1>(GetParam());
ASSERT_EQ(inpShapeVec[1] % group, 0); ASSERT_EQ(inpShapeVec[1] % group, 0);
const int groupSize = inpShapeVec[1] / group; const int groupSize = inpShapeVec[1] / group;
int backendId = get<0>(get<2>(GetParam()));
int targetId = get<1>(get<2>(GetParam()));
Net net; Net net;
LayerParams lp; LayerParams lp;
@ -1308,21 +1310,25 @@ TEST_P(Layer_Test_ShuffleChannel, Accuracy)
randu(inp, 0, 255); randu(inp, 0, 255);
net.setInput(inp); net.setInput(inp);
net.setPreferableBackend(backendId);
net.setPreferableTarget(targetId);
Mat out = net.forward(); Mat out = net.forward();
double l1 = (targetId == DNN_TARGET_OPENCL_FP16) ? 5e-2 : 1e-5;
double lInf = (targetId == DNN_TARGET_OPENCL_FP16) ? 7e-2 : 1e-4;
for (int n = 0; n < inpShapeVec[0]; ++n) for (int n = 0; n < inpShapeVec[0]; ++n)
{ {
for (int c = 0; c < inpShapeVec[1]; ++c) for (int c = 0; c < inpShapeVec[1]; ++c)
{ {
Mat outChannel = getPlane(out, n, c); Mat outChannel = getPlane(out, n, c);
Mat inpChannel = getPlane(inp, n, groupSize * (c % group) + c / group); Mat inpChannel = getPlane(inp, n, groupSize * (c % group) + c / group);
normAssert(outChannel, inpChannel); normAssert(outChannel, inpChannel, "", l1, lInf);
} }
} }
} }
INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_ShuffleChannel, Combine( INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_ShuffleChannel, Combine(
/*input shape*/ Values(Vec4i(1, 6, 5, 7), Vec4i(3, 12, 1, 4)), /*input shape*/ Values(Vec4i(1, 6, 5, 7), Vec4i(3, 12, 1, 4)),
/*group*/ Values(1, 2, 3, 6) /*group*/ Values(1, 2, 3, 6), dnnBackendsAndTargets(/*with IE*/ false)
)); ));
// Check if relu is not fused to convolution if we requested it's output // Check if relu is not fused to convolution if we requested it's output

Loading…
Cancel
Save