Merge pull request #23952 from zihaomu:fix_depth_conv_5x5

DNN: optimize the speed of general Depth-wise #23952

Try to solve the issue: https://github.com/opencv/opencv/issues/23941

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
pull/24003/head
Zihao Mu 2 years ago committed by GitHub
parent 1f7025f028
commit 1920993525
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 3
      modules/dnn/perf/perf_convolution.cpp
  2. 2
      modules/dnn/src/layers/cpu_kernels/convolution.cpp

@ -744,7 +744,8 @@ static const ConvParam_t testConvolutionConfigs[] = {
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 4, 1, 1}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 864.},
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 96, 1, 1}}, 4, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 772.},
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 8, 1, 1}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 544.},
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 32, 1, 1}}, 8, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 520.}
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 32, 1, 1}}, 8, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 520.},
/* GFLOPS 0.472 x 1 = 0.472 */ {{5, 5}, {{1, 32, 96, 96}}, 32, 32, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 472154112.}
};
struct ConvParamID
{

@ -1290,7 +1290,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr<FastConv>& co
else
Kg_nblocks = 1;
bool separateIm2col = fast_1x1 || stripes_per_plane == 1;
bool separateIm2col = (fast_1x1 || stripes_per_plane == 1) && conv->conv_type != CONV_TYPE_DEPTHWISE_REMAIN;
int Kstripes = Kg_nblocks * stripes_per_plane;
int nsubtasks = N * ngroups * Kstripes;

Loading…
Cancel
Save