diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index cb1ab51877..5eac3e90f3 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -737,8 +737,9 @@ public: if( relu ) { - r0 = relu[i]; - r1 = relu[i+1]; + r0 = relu[i]; r1 = relu[i+1]; + if( i+1 >= outCn ) + r1 = r0; } int j = 0; diff --git a/modules/dnn/src/layers/layers_common.simd.hpp b/modules/dnn/src/layers/layers_common.simd.hpp index bee3e912e1..b2c0aa6a29 100644 --- a/modules/dnn/src/layers/layers_common.simd.hpp +++ b/modules/dnn/src/layers/layers_common.simd.hpp @@ -101,9 +101,13 @@ void fastConv( const float* weights, size_t wstep, const float* bias, if( relu ) { - r0 = relu[i]; - r1 = relu[i+1]; - r2 = relu[i+2]; + r0 = relu[i]; r1 = relu[i+1]; r2 = relu[i+2]; + if( i+2 >= outCn ) + { + r2 = r1; + if( i+1 >= outCn ) + r2 = r1 = r0; + } vr0 = _mm_set1_ps(r0); vr1 = _mm_set1_ps(r1); vr2 = _mm_set1_ps(r2); diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 8a17d7a026..da7dd775a4 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -786,6 +786,125 @@ TEST(Layer_PriorBox, squares) normAssert(out.reshape(1, 4), target); } +typedef TestWithParam > Layer_Test_DWconv_Prelu; +TEST_P(Layer_Test_DWconv_Prelu, Accuracy) +{ + // Test case + // input img size 3x16x16 value all 1 + // | + // v + // dw_conv weight[0]=-1 weight[1]=-2 weight[2]=-3 bias={1,2,3} + // | + // v + // prelu weight={1,2,3} + // | + // v + // output out size 3x14x14 if right: out[0]=-8 out[0]=-32 out[0]=-72 + // but current opencv output: out[0]=-24 out[0]=-48 out[0]=-72 + + const int num_input = get<0>(GetParam()); //inpChannels + const int group = 3; //outChannels=group when group>1 + const int num_output = get<1>(GetParam()); + const int kernel_depth = num_input/group; + CV_Assert(num_output >= group, num_output % group == 0, num_input % group == 0); + + Net net; + //layer 1: dwconv + LayerParams lp; + lp.name = "dwconv"; + lp.type = "Convolution"; + lp.set("kernel_size", 3); + lp.set("num_output", num_output); + lp.set("pad", 0); + lp.set("group", group); + lp.set("stride", 1); + lp.set("engine", "CAFFE"); + lp.set("bias_term", "true"); + + std::vector weightsShape(4); + weightsShape[0] = num_output; // #outChannels + weightsShape[1] = kernel_depth; // #inpChannels / group + weightsShape[2] = 3; // height + weightsShape[3] = 3; // width + Mat weights(weightsShape, CV_32F, Scalar(1)); + + //assign weights + for (int i = 0; i < weightsShape[0]; ++i) + { + for (int j = 0; j < weightsShape[1]; ++j) + { + for (int k = 0; k < weightsShape[2]; ++k) + { + for (int l = 0; l < weightsShape[3]; ++l) + { + weights.ptr(i, j, k)[l]=-1*(i+1); + } + } + } + } + lp.blobs.push_back(weights); + + //assign bias + Mat bias(1, num_output, CV_32F, Scalar(1)); + for (int i = 0; i < 1; ++i) + { + for (int j = 0; j < num_output; ++j) + { + bias.ptr(i)[j]=j+1; + } + } + lp.blobs.push_back(bias); + net.addLayerToPrev(lp.name, lp.type, lp); + + //layer 2: prelu + LayerParams lpr; + lpr.name = "dw_relu"; + lpr.type = "PReLU"; + Mat weightsp(1, num_output, CV_32F, Scalar(1)); + + //assign weights + for (int i = 0; i < 1; ++i) + { + for (int j = 0; j < num_output; ++j) + { + weightsp.ptr(i)[j]=j+1; + } + } + + lpr.blobs.push_back(weightsp); + net.addLayerToPrev(lpr.name, lpr.type, lpr); + + int shape[] = {1, num_input, 16, 16}; + Mat in_blob(4, &shape[0], CV_32FC1, Scalar(1)); + + net.setInput(in_blob); + Mat out = net.forward(); + + //assign target + std::vector outShape(4); + outShape[0] = 1; + outShape[1] = num_output; // outChannels + outShape[2] = 14; // height + outShape[3] = 14; // width + Mat target(outShape, CV_32F, Scalar(1)); + for (int i = 0; i < outShape[0]; ++i) + { + for (int j = 0; j < outShape[1]; ++j) + { + for (int k = 0; k < outShape[2]; ++k) + { + for (int l = 0; l < outShape[3]; ++l) + { + target.ptr(i, j, k)[l]=(-9*kernel_depth*(j+1)+j+1)*(j+1); + } + } + } + } + + normAssert(out, target); +} +INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_DWconv_Prelu, Combine(Values(3, 6), Values(3, 6))); + #ifdef HAVE_INF_ENGINE // Using Intel's Model Optimizer generate .xml and .bin files: // ./ModelOptimizer -w /path/to/caffemodel -d /path/to/prototxt \