From 0488d9bdb24b3b9f75003971e6b91eed2bcb474e Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Tue, 4 Jul 2017 17:23:47 +0300 Subject: [PATCH] optimize out scaleLayer & concatLayer whenever possible fixed problem in concat layer by disabling memory re-use in layers with multiple inputs trying to fix the tests when Halide is used to run deep nets another attempt to fix Halide tests see if the Halide tests will pass with concat layer fusion turned off trying to fix failures in halide tests; another try one more experiment to make halide_concat & halide_enet tests pass continue attempts to fix halide tests moving on uncomment parallel concat layer seemingly fixed failures in Halide tests and re-enabled concat layer fusion; thanks to dkurt for the patch --- modules/dnn/include/opencv2/dnn/dnn.hpp | 32 ++- modules/dnn/src/dnn.cpp | 207 +++++++++++++++---- modules/dnn/src/layers/concat_layer.cpp | 93 ++++++++- modules/dnn/src/layers/convolution_layer.cpp | 59 +++++- modules/dnn/test/test_halide_layers.cpp | 8 +- 5 files changed, 337 insertions(+), 62 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index f4369eef4e..8324fe9d05 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -152,6 +152,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN class CV_EXPORTS ActivationLayer; class CV_EXPORTS BatchNormLayer; + class CV_EXPORTS ScaleLayer; /** @brief This interface class allows to build new Layers - are building blocks of networks. * @@ -269,6 +270,19 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN */ virtual bool setBatchNorm(const Ptr& layer); + /** + * @brief Tries to attach to the layer the subsequent scaling layer, i.e. do the layer fusion in a partial case. + * @param[in] layer The subsequent scaling layer. + * + * Returns true if the scaling layer has been attached successfully. + */ + virtual bool setScale(const Ptr& layer); + + /** + * @brief "Deattaches" all the layers, attached to particular layer. + */ + virtual void unsetAttached(); + virtual bool getMemoryShapes(const std::vector &inputs, const int requiredOutputs, std::vector &outputs, @@ -495,9 +509,10 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN /** @overload */ CV_WRAP void getLayerShapes(const std::vector& netInputShapes, - const int layerId, - std::vector* inLayerShapes, - std::vector* outLayerShapes) const; + const int layerId, + std::vector* inLayerShapes, + std::vector* outLayerShapes) const; + /** @brief Computes FLOP for whole loaded model with specified input shapes. * @param netInputShapes vector of shapes for all net inputs. * @returns computed FLOP. @@ -507,10 +522,10 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN CV_WRAP int64 getFLOPS(const MatShape& netInputShape) const; /** @overload */ CV_WRAP int64 getFLOPS(const int layerId, - const std::vector& netInputShapes) const; + const std::vector& netInputShapes) const; /** @overload */ CV_WRAP int64 getFLOPS(const int layerId, - const MatShape& netInputShape) const; + const MatShape& netInputShape) const; /** @brief Returns list of types for layer used in model. * @param layersTypes output parameter for returning types. @@ -557,8 +572,13 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN CV_WRAP void getMemoryConsumption(const MatShape& netInputShape, CV_OUT std::vector& layerIds, CV_OUT std::vector& weights, CV_OUT std::vector& blobs) const; - private: + /** @brief Enables or disables layer fusion in the network. + * @param fusion true to enable the fusion, false to disable. The fusion is enabled by default. + */ + CV_WRAP void enableFusion(bool fusion); + + private: struct Impl; Ptr impl; }; diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index a371b18540..27433282db 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -464,29 +464,34 @@ public: } } - void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst) + void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool force) { - std::map::iterator hostIt; - std::map::iterator refIt; - - const int targetTotal = total(shape); Mat bestBlob; - int bestBlobTotal = INT_MAX; LayerPin bestBlobPin; - for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt) + + if( !force ) { - refIt = refCounter.find(hostIt->first); - // Use only blobs that had references before because if not, - // it might be used as output. - if (refIt != refCounter.end() && refIt->second == 0) + std::map::iterator hostIt; + std::map::iterator refIt; + + const int targetTotal = total(shape); + int bestBlobTotal = INT_MAX; + + for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt) { - Mat& unusedBlob = hostIt->second; - if (unusedBlob.total() >= targetTotal && - unusedBlob.total() < bestBlobTotal) + refIt = refCounter.find(hostIt->first); + // Use only blobs that had references before because if not, + // it might be used as output. + if (refIt != refCounter.end() && refIt->second == 0) { - bestBlobPin = hostIt->first; - bestBlob = unusedBlob; - bestBlobTotal = unusedBlob.total(); + Mat& unusedBlob = hostIt->second; + if (unusedBlob.total() >= targetTotal && + unusedBlob.total() < bestBlobTotal) + { + bestBlobPin = hostIt->first; + bestBlob = unusedBlob; + bestBlobTotal = unusedBlob.total(); + } } } } @@ -505,7 +510,8 @@ public: } void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes, - std::vector& pinsForInternalBlobs) + std::vector& pinsForInternalBlobs, + bool maximizeReuse) { CV_TRACE_FUNCTION(); @@ -561,6 +567,7 @@ public: } std::map >::reverse_iterator it; + bool force = !maximizeReuse && ld.inputBlobsId.size() > 1; for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++) { for(int j = 0; j < it->second.size(); j++) @@ -569,7 +576,7 @@ public: if (total(shapes[index])) { LayerPin blobPin(ld.id, index); - if (index < outShapes.size() && inPlace) + if (index < outShapes.size() && inPlace && !force) { CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index])); ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]); @@ -577,7 +584,7 @@ public: } else { - reuseOrCreate(shapes[index], blobPin, *blobs[index]); + reuseOrCreate(shapes[index], blobPin, *blobs[index], force); } } } @@ -628,6 +635,7 @@ struct Net::Impl lastLayerId = 1; netWasAllocated = false; + fusion = true; preferableBackend = DNN_BACKEND_DEFAULT; preferableTarget = DNN_TARGET_CPU; } @@ -647,6 +655,7 @@ struct Net::Impl int lastLayerId; bool netWasAllocated; + bool fusion; void compileHalide() { @@ -695,8 +704,7 @@ struct Net::Impl if( currLayer.empty() ) continue; - currLayer->setActivation(Ptr()); - currLayer->setBatchNorm(Ptr()); + currLayer->unsetAttached(); Ptr poolingLayer = currLayer.dynamicCast(); if( !poolingLayer.empty() ) @@ -704,9 +712,11 @@ struct Net::Impl poolingLayer->computeMaxIdx = true; } } + it = layers.find(0); + CV_Assert(it != layers.end()); + it->second.skipFlags[DNN_BACKEND_DEFAULT] = true; } - void setUpNet(const std::vector& blobsToKeep_ = std::vector()) { CV_TRACE_FUNCTION(); @@ -783,13 +793,11 @@ struct Net::Impl LayerData& getLayerData(const DictValue &layerDesc) { + CV_Assert(layerDesc.isInt() || layerDesc.isString()); if (layerDesc.isInt()) return getLayerData(layerDesc.get()); - else if (layerDesc.isString()) + else /*if (layerDesc.isString())*/ return getLayerData(layerDesc.get()); - - CV_Assert(layerDesc.isInt() || layerDesc.isString()); - return *((LayerData*)NULL); } static void addLayerInput(LayerData &ld, int inNum, LayerPin from) @@ -1021,7 +1029,8 @@ struct Net::Impl CV_Assert(layerShapesIt != layersShapes.end()); std::vector pinsForInternalBlobs; - blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs); + bool maximizeReuse = preferableBackend == DNN_BACKEND_HALIDE; + blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs, maximizeReuse); Ptr layerPtr = ld.getLayerInstance(); { @@ -1044,8 +1053,17 @@ struct Net::Impl ld.flag = 1; } +#if 0 +#define printf_(args) printf args +#else +#define printf_(args) +#endif + void fuseLayers(const std::vector& blobsToKeep_) { + if( !fusion || preferableBackend == DNN_BACKEND_HALIDE ) + return; + CV_TRACE_FUNCTION(); // scan through all the layers. If there is convolution layer followed by the activation layer, @@ -1060,11 +1078,17 @@ struct Net::Impl LayerData& ld = layers[lid]; if( ld.skipFlags[DNN_BACKEND_DEFAULT] ) { + printf_(("skipped %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str())); continue; } + printf_(("analyzing %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str())); if( ld.consumers.size() == 0 ) outnames.push_back(ld.layerInstance->name); + // the optimization #1. try to fuse batch norm, scaling and/or activation layers + // with the current layer if they follow it. Normally, the are fused with the convolution layer, + // but some of them (like activation) may be fused with fully-connected, elemwise (+) and + // some other layers. Ptr& currLayer = ld.layerInstance; if( ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0 ) { @@ -1078,10 +1102,29 @@ struct Net::Impl nextData = 0; if( currLayer->setBatchNorm(nextBNormLayer) ) { + printf_(("\tfused with %s\n", nextBNormLayer->name.c_str())); bnormData->skipFlags[DNN_BACKEND_DEFAULT] = true; ld.outputBlobs = layers[lpNext.lid].outputBlobs; if( bnormData->consumers.size() == 1 ) nextData = &layers[bnormData->consumers[0].lid]; + lpNext = LayerPin(bnormData->consumers[0].lid, 0); + } + } + + Ptr nextScaleLayer; + if( nextData ) + nextScaleLayer = nextData->layerInstance.dynamicCast(); + if( !nextScaleLayer.empty() && pinsToKeep.count(lpNext) == 0 ) + { + LayerData* scaleData = nextData; + nextData = 0; + if( currLayer->setScale(nextScaleLayer) ) + { + printf_(("\tfused with %s\n", nextScaleLayer->name.c_str())); + scaleData->skipFlags[DNN_BACKEND_DEFAULT] = true; + ld.outputBlobs = layers[lpNext.lid].outputBlobs; + if( scaleData->consumers.size() == 1 ) + nextData = &layers[scaleData->consumers[0].lid]; } } @@ -1091,11 +1134,16 @@ struct Net::Impl if( !nextActivLayer.empty() && currLayer->setActivation(nextActivLayer) ) { - //printf("successfully merged %s and %s\n", currLayer->name.c_str(), nextActivLayer->name.c_str()); + printf_(("\tfused with %s\n", nextActivLayer->name.c_str())); nextData->skipFlags[DNN_BACKEND_DEFAULT] = true; ld.outputBlobs = layers[lpNext.lid].outputBlobs; } } + + // the optimization #2. if there is no layer that takes max pooling layer's computed + // max indices (and only some semantical segmentation networks might need this; + // many others only take the maximum values), then we switch the max pooling + // layer to the faster operating mode. Ptr poolingLayer = ld.layerInstance.dynamicCast(); if( !poolingLayer.empty() && !ld.consumers.empty() ) { @@ -1108,7 +1156,71 @@ struct Net::Impl if( i >= nconsumers ) { poolingLayer->computeMaxIdx = false; - //printf("simplified pooling layer %s\n", poolingLayer->name.c_str()); + printf_(("\tsimplified pooling layer %s\n", poolingLayer->name.c_str())); + } + } + + // the optimization #3. if there is concat layer that concatenates channels + // from the inputs together (i.e. axis == 1) then we make the inputs of + // the concat layer to write to the concatetion output buffer + // (and so we eliminate the concatenation layer, because the channels + // are concatenated implicitly). + Ptr concatLayer = ld.layerInstance.dynamicCast(); + if( !concatLayer.empty() && concatLayer->axis == 1 && + ld.outputBlobs.size() == 1 ) + { + Mat& output = ld.outputBlobs[0]; + + // TODO: in general, this optimization can always be done, but + // many layers currently check that the input/output blobs are + // continuous arrays. Unfortunately, this is not true when + // the concatenation optimization is applied with batch_size > 1. + // so, for now, we only apply this optimization in the most popular + // case batch_size == 1. + if( output.dims == 4 && output.size[0] == 1 ) + { + size_t i, ninputs = ld.inputBlobsId.size(); + std::vector realinputs(ninputs); + for( i = 0; i < ninputs; i++ ) + { + LayerPin pin = ld.inputBlobsId[i]; + LayerData* inp_i_data = &layers[pin.lid]; + while(inp_i_data->skipFlags[DNN_BACKEND_DEFAULT] && + inp_i_data->inputBlobsId.size() == 1) + { + pin = inp_i_data->inputBlobsId[0]; + inp_i_data = &layers[pin.lid]; + } + printf_(("\treal input for %s is %s\n", + layers[ld.inputBlobsId[i].lid].getLayerInstance()->name.c_str(), + inp_i_data->getLayerInstance()->name.c_str())); + + if(inp_i_data->skipFlags[DNN_BACKEND_DEFAULT]) + break; + realinputs[i] = pin; + } + + if( i >= ninputs ) + { + Range chrange[] = { Range::all(), Range::all(), Range::all(), Range::all() }; + int ofs = 0; + for( i = 0; i < ninputs; i++ ) + { + LayerPin pin = realinputs[i]; + LayerData* inp_i_data = &layers[pin.lid]; + int channels_i = ld.inputBlobs[i]->size[1]; + chrange[1] = Range(ofs, ofs + channels_i); + printf_(("\toutput %s(%d) to channels (%d, %d)\n", inp_i_data->layerInstance->name.c_str(), + pin.oid, ofs, ofs + channels_i)); + ofs += channels_i; + Mat output_slice = output(chrange); + Mat& curr_output = inp_i_data->outputBlobs[pin.oid]; + CV_Assert(output_slice.isContinuous() && output_slice.size == curr_output.size); + curr_output = output_slice; + } + ld.skipFlags[DNN_BACKEND_DEFAULT] = true; + printf_(("\toptimized out Concat layer %s\n", concatLayer->name.c_str())); + } } } } @@ -1458,9 +1570,12 @@ void Net::setPreferableBackend(int backendId) CV_TRACE_FUNCTION(); CV_TRACE_ARG(backendId); - impl->netWasAllocated = impl->netWasAllocated && - impl->preferableBackend == backendId; - impl->preferableBackend = backendId; + if( impl->preferableBackend != backendId ) + { + impl->preferableBackend = backendId; + impl->netWasAllocated = false; + impl->clear(); + } } void Net::setPreferableTarget(int targetId) @@ -1468,9 +1583,12 @@ void Net::setPreferableTarget(int targetId) CV_TRACE_FUNCTION(); CV_TRACE_ARG(targetId); - impl->netWasAllocated = impl->netWasAllocated && - impl->preferableTarget == targetId; - impl->preferableTarget = targetId; + if( impl->preferableTarget != targetId ) + { + impl->preferableTarget = targetId; + impl->netWasAllocated = false; + impl->clear(); + } } void Net::setInputsNames(const std::vector &inputBlobNames) @@ -1825,6 +1943,16 @@ void Net::getMemoryConsumption(const MatShape& netInputShape, std::vector& weights, blobs); } +void Net::enableFusion(bool fusion) +{ + if( impl->fusion != fusion ) + { + impl->fusion = fusion; + impl->netWasAllocated = false; + impl->clear(); + } +} + void Net::setHalideScheduler(const String& scheduler) { CV_TRACE_FUNCTION(); @@ -1950,6 +2078,13 @@ Ptr Layer::tryAttach(const Ptr& node) bool Layer::setActivation(const Ptr&) { return false; } bool Layer::setBatchNorm(const Ptr&) { return false; } +bool Layer::setScale(const Ptr&) { return false; } +void Layer::unsetAttached() +{ + setActivation(Ptr()); + setBatchNorm(Ptr()); + setScale(Ptr()); +} template static void vecToPVec(const std::vector &v, std::vector &pv) diff --git a/modules/dnn/src/layers/concat_layer.cpp b/modules/dnn/src/layers/concat_layer.cpp index f2d6d4e93c..662be1d096 100644 --- a/modules/dnn/src/layers/concat_layer.cpp +++ b/modules/dnn/src/layers/concat_layer.cpp @@ -94,6 +94,78 @@ public: backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1; // By channels } + class ChannelConcatInvoker : public ParallelLoopBody + { + public: + std::vector* inputs; + Mat* output; + int nstripes; + std::vector chptrs; + + static void run(std::vector& inputs, Mat& output, int nstripes) + { + ChannelConcatInvoker cc; + cc.inputs = &inputs; + cc.output = &output; + cc.nstripes = nstripes; + + size_t i, ninputs = inputs.size(); + int nchannels = 0, batchsz = output.size[0]; + for( i = 0; i < ninputs; i++ ) + { + Mat& inp = *inputs[i]; + CV_Assert( inp.isContinuous() && inp.type() == CV_32F && + inp.dims == 4 && inp.size[0] == output.size[0] && + inp.size[2] == output.size[2] && + inp.size[3] == output.size[3] ); + nchannels += inp.size[1]; + } + CV_Assert( nchannels == output.size[1] ); + CV_Assert( output.isContinuous() && output.type() == CV_32F ); + + cc.chptrs.resize(nchannels*batchsz); + + int ofs = 0; + for( i = 0; i < ninputs; i++) + { + Mat& inp = *inputs[i]; + for( int j = 0; j < batchsz; j++ ) + for( int k = 0; k < inp.size[1]; k++ ) + { + const float* ptr = inp.ptr(j, k); + cc.chptrs[ofs + j*nchannels + k] = ptr; + } + ofs += inp.size[1]; + } + + parallel_for_(Range(0, nstripes), cc, nstripes); + } + + ChannelConcatInvoker() {} + + void operator()(const Range& r) const + { + size_t planeSize = (size_t)output->size[2]*output->size[3]; + size_t nch = chptrs.size(); + size_t total = nch*planeSize; + size_t stripeSize = (total + nstripes - 1)/nstripes; + size_t stripeStart = r.start*stripeSize; + size_t stripeEnd = std::min(total, r.end*stripeSize); + const float** ptrs = (const float**)&chptrs[0]; + float* outptr = output->ptr(); + size_t blockSize0 = 1 << 16; + + for( size_t ofs0 = stripeStart; ofs0 < stripeEnd; ) + { + size_t ch = ofs0/planeSize; + size_t ofs = ofs0 - ch*planeSize; + size_t blockSize = std::min(blockSize0, planeSize - ofs); + memcpy(outptr + ofs0, ptrs[ch] + ofs, blockSize*sizeof(outptr[0])); + ofs0 += blockSize; + } + } + }; + void forward(std::vector &inputs, std::vector &outputs, std::vector &internals) { CV_TRACE_FUNCTION(); @@ -101,14 +173,23 @@ public: int cAxis = clamp(axis, inputs[0]->dims); Mat& outMat = outputs[0]; - std::vector ranges(outputs[0].dims, Range::all()); - ranges[cAxis].start = 0; - for (size_t i = 0; i < inputs.size(); i++) + if( cAxis == 1 && outMat.dims == 4 ) + { + int nstripes = getNumThreads(); + ChannelConcatInvoker::run(inputs, outMat, nstripes); + } + else { - ranges[cAxis].end = ranges[cAxis].start + inputs[i]->size[cAxis]; - inputs[i]->copyTo(outMat(&ranges[0])); - ranges[cAxis].start = ranges[cAxis].end; + std::vector ranges(outputs[0].dims, Range::all()); + + ranges[cAxis].start = 0; + for (size_t i = 0; i < inputs.size(); i++) + { + ranges[cAxis].end = ranges[cAxis].start + inputs[i]->size[cAxis]; + inputs[i]->copyTo(outMat(&ranges[0])); + ranges[cAxis].start = ranges[cAxis].end; + } } } diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 6e09c8ca98..3dd63a3c36 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -148,6 +148,7 @@ public: std::vector reluslope; Ptr activ; Ptr bnorm; + Ptr scaleLayer; MatShape computeColRowShape(const MatShape &inpShape, const MatShape &outShape) const { @@ -202,6 +203,9 @@ public: bool setBatchNorm(const Ptr& layer ) { + // for now the scale layer followed by the batch norm cannot be fused, only vice versa. + if( !scaleLayer.empty() ) + return false; bnorm = layer; // we will need to re-compute the weights with the batch // norm coefficients taken into account @@ -209,6 +213,15 @@ public: return !bnorm.empty(); } + bool setScale(const Ptr& layer) + { + scaleLayer = layer; + // we will need to re-compute the weights with the scaling + // coefficients taken into account + weightsMat.release(); + return !scaleLayer.empty(); + } + virtual Ptr initHalide(const std::vector > &inputs) { #ifdef HAVE_HALIDE @@ -678,32 +691,56 @@ public: biasvec[k] = biasMat.at(k); } - if( !bnorm.empty() ) + if( !bnorm.empty() || !scaleLayer.empty() ) { - Mat scale, shift; - bnorm->getScaleShift(scale, shift); + Mat scale, shift, scale2, shift2; + const float *scaleptr = 0, *shiftptr = 0; + const float *scaleptr2 = 0, *shiftptr2 = 0; - CV_Assert( scale.isContinuous() && shift.isContinuous() && - scale.type() == CV_32F && shift.type() == CV_32F && - scale.total() == (size_t)outCn && - shift.total() == (size_t)outCn ); + if( !bnorm.empty() ) + { + bnorm->getScaleShift(scale, shift); + CV_Assert( scale.isContinuous() && shift.isContinuous() && + scale.type() == CV_32F && shift.type() == CV_32F && + scale.total() == (size_t)outCn && + shift.total() == (size_t)outCn ); + scaleptr = scale.ptr(); + shiftptr = shift.ptr(); + } + if( !scaleLayer.empty() ) + { + scale2 = scaleLayer->blobs[0]; + CV_Assert( scale2.isContinuous() && scale2.type() == CV_32F && + scale2.total() == (size_t)outCn ); + scaleptr2 = scale2.ptr(); + if( scaleLayer->hasBias ) + { + shift2 = scaleLayer->blobs[1]; + CV_Assert( shift2.isContinuous() && shift2.type() == CV_32F && + shift2.total() == (size_t)outCn ); + shiftptr2 = shift2.ptr(); + } + } for( int i = 0; i < outCn; i++ ) { - float s = scale.at(i); - float delta = shift.at(i); + float s1 = scaleptr ? scaleptr[i] : 1.f; + float delta1 = shiftptr ? shiftptr[i] : 0.f; + float s2 = scaleptr2 ? scaleptr2[i] : 1.f; + float delta2 = shiftptr2 ? shiftptr2[i] : 0.f; float* w_i = weightsMat.ptr(i); int j, wcols = weightsMat.cols; for( j = 0; j < wcols; j++ ) - w_i[j] *= s; + w_i[j] *= (s1*s2); - biasvec[i] = biasvec[i]*s + delta; + biasvec[i] = biasvec[i]*(s1*s2) + (delta1*s2 + delta2); } } biasvec[outCn] = biasvec[outCn+1] = biasvec[outCn-1]; } + reluslope.clear(); if( activ ) { Ptr activ_relu = activ.dynamicCast(); diff --git a/modules/dnn/test/test_halide_layers.cpp b/modules/dnn/test/test_halide_layers.cpp index b2edf3af93..6801a7cba7 100644 --- a/modules/dnn/test/test_halide_layers.cpp +++ b/modules/dnn/test/test_halide_layers.cpp @@ -517,7 +517,8 @@ TEST_P(Concat, Accuracy) Net net; - std::vector convLayerIds(numChannels.channels); + std::vector convLayerIds; + convLayerIds.reserve(numChannels.channels); for (int i = 0, n = numChannels.channels; i < n; ++i) { if (!numChannels[i]) @@ -537,8 +538,9 @@ TEST_P(Concat, Accuracy) convParam.name = ss.str(); convParam.blobs.push_back(weights); - convLayerIds[i] = net.addLayer(convParam.name, convParam.type, convParam); - net.connect(0, 0, convLayerIds[i], 0); + int layerId = net.addLayer(convParam.name, convParam.type, convParam); + convLayerIds.push_back(layerId); + net.connect(0, 0, layerId, 0); } LayerParams concatParam;