From 09b73b2dc79aeb9fc16ab25c8b0cd019e00e89e1 Mon Sep 17 00:00:00 2001 From: Aleksandr Rybnikov Date: Fri, 9 Jun 2017 21:36:19 +0300 Subject: [PATCH] Blobs reuse improvement (#1205) * Reuse deep learning output blobs * Changed order for iterating through blobs while seeking memory. Refactored a little. --- modules/dnn/include/opencv2/dnn/dnn.hpp | 15 + modules/dnn/misc/python/pyopencv_dnn.hpp | 1 + modules/dnn/src/dnn.cpp | 382 ++++++++++++++---- modules/dnn/src/layers/batch_norm_layer.cpp | 9 + modules/dnn/src/layers/blank_layer.cpp | 7 +- modules/dnn/src/layers/elementwise_layers.cpp | 14 +- modules/dnn/src/layers/reshape_layer.cpp | 7 +- modules/dnn/src/layers/scale_layer.cpp | 9 + modules/dnn/src/layers/split_layer.cpp | 10 +- modules/dnn/test/test_caffe_importer.cpp | 4 + 10 files changed, 374 insertions(+), 84 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index ce671a8b7..384bcb530 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -369,6 +369,21 @@ namespace dnn //! This namespace is used for dnn module functionlaity. CV_WRAP void getMemoryConsumption(const int layerId, const MatShape& netInputShape, size_t& weights, size_t& blobs) const; + + /** @brief Computes bytes number which are requered to store + * all weights and intermediate blobs for each layer. + * @param netInputShapes vector of shapes for all net inputs. + * @param layerIds output vector to save layer IDs. + * @param weights output parameter to store resulting bytes for weights. + * @param blobs output parameter to store resulting bytes for intermediate blobs. + */ + CV_WRAP void getMemoryConsumption(const std::vector& netInputShapes, + std::vector& layerIds, std::vector& weights, + std::vector& blobs) const; + /** @overload */ + CV_WRAP void getMemoryConsumption(const MatShape& netInputShape, + std::vector& layerIds, std::vector& weights, + std::vector& blobs) const; private: struct Impl; diff --git a/modules/dnn/misc/python/pyopencv_dnn.hpp b/modules/dnn/misc/python/pyopencv_dnn.hpp index 15365d896..3ab5a3cd2 100644 --- a/modules/dnn/misc/python/pyopencv_dnn.hpp +++ b/modules/dnn/misc/python/pyopencv_dnn.hpp @@ -2,6 +2,7 @@ typedef dnn::DictValue LayerId; typedef std::vector vector_MatShape; typedef std::vector > vector_vector_MatShape; +typedef std::vector vector_size_t; template<> bool pyopencv_to(PyObject *o, dnn::DictValue &dv, const char *name) diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 0fa9a2f8f..cecf5aa95 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -55,6 +55,22 @@ using std::map; using std::make_pair; using std::set; +namespace +{ + typedef std::vector ShapesVec; + + struct LayerShapes + { + ShapesVec in, out, internal; + // No guarantees that layer which support in-place computations + // will be computed in-place (input.data_ptr == output.data_ptr). + // If layer said that it could work in-place and layers after it + // no longer use input blob, we'll set output = input. + bool supportInPlace; + LayerShapes() {supportInPlace = false;} + }; +} + namespace cv { namespace dnn @@ -154,6 +170,11 @@ struct LayerPin { return (lid == r.lid && oid == r.oid); } + + bool operator<(const LayerPin &r) const + { + return lid < r.lid || lid == r.lid && oid < r.oid; + } }; struct LayerData @@ -219,16 +240,222 @@ private: std::vector outNames; }; -struct Net::Impl +struct BlobManager { - typedef std::vector ShapesVec; - struct LayerShapes +public: + // Increase references counter to layer output. + void addReference(const LayerPin& lp) { - ShapesVec in, out, internal; - bool inplace; - LayerShapes() {inplace = false;} - }; + std::map::iterator it = refCounter.find(lp); + if (it == refCounter.end()) + refCounter[lp] = 1; + else + it->second += 1; + } + void addReferences(const std::vector& pins) + { + for (int i = 0; i < pins.size(); i++) + { + addReference(pins[i]); + } + } + + // Returns number of references to allocated memory that used in specific + // layer blob. + int numReferences(const LayerPin& lp) + { + std::map::iterator mapIt = reuseMap.find(lp); + CV_Assert(mapIt != reuseMap.end()); + LayerPin memHost = mapIt->second; + + std::map::iterator refIt = refCounter.find(memHost); + CV_Assert(refIt != refCounter.end()); + return refIt->second; + } + + // Reuse data allocated in inside the blob. + void reuse(const LayerPin& host, const LayerPin& user) + { + CV_Assert(reuseMap.find(user) == reuseMap.end()); + CV_Assert(reuseMap.find(host) != reuseMap.end()); + LayerPin memHost = reuseMap[host]; + reuseMap[user] = memHost; + if (refCounter.find(memHost) != refCounter.end()) + { + std::map::iterator userRefIt = refCounter.find(user); + if (userRefIt != refCounter.end()) + { + refCounter[memHost] += userRefIt->second; + refCounter.erase(userRefIt); + } + else + refCounter[memHost] += 1; + } + } + + // Decrease references counter to allocated memory inside specific blob. + void releaseReference(const LayerPin& lp) + { + std::map::iterator mapIt = reuseMap.find(lp); + CV_Assert(mapIt != reuseMap.end()); + + std::map::iterator refIt = refCounter.find(mapIt->second); + CV_Assert(refIt != refCounter.end()); + CV_Assert(refIt->second > 0); + refIt->second -= 1; + } + + void releaseReferences(const std::vector& pins) + { + for (int i = 0; i < pins.size(); i++) + { + releaseReference(pins[i]); + } + } + + void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst) + { + std::map::iterator hostIt; + std::map::iterator refIt; + + const int targetTotal = total(shape); + Mat bestBlob; + int bestBlobTotal = INT_MAX; + LayerPin bestBlobPin; + for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt) + { + refIt = refCounter.find(hostIt->first); + // Use only blobs that had references before because if not, + // it might be used as output. + if (refIt != refCounter.end() && refIt->second == 0) + { + Mat& unusedBlob = hostIt->second; + if (unusedBlob.total() >= targetTotal && + unusedBlob.total() < bestBlobTotal) + { + bestBlobPin = hostIt->first; + bestBlob = unusedBlob; + bestBlobTotal = unusedBlob.total(); + } + } + } + if (!bestBlob.empty()) + { + reuse(bestBlobPin, lp); + dst = Mat(shape, CV_32F, bestBlob.data); + } + else + { + dst.create(shape, CV_32F); + addHost(lp, dst); + } + } + + void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes, + std::vector& pinsForInternalBlobs) + { + pinsForInternalBlobs.clear(); + + std::vector& outputBlobs = ld.outputBlobs, + &internalBlobs = ld.internals; + + const ShapesVec& outShapes = layerShapes.out, + internalShapes = layerShapes.internal; + + outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob + internalBlobs.resize(internalShapes.size()); + + CV_Assert(ld.requiredOutputs.size() <= outShapes.size()); + + // Check that layer could work in-place. + bool inPlace = false; + if (layerShapes.supportInPlace) + { + if (ld.inputBlobs.size() == 1) + { + // Get number of references to the input memory. + int numRef = numReferences(ld.inputBlobsId[0]); + // If current layer is one and only customer of this blob. + inPlace = numRef == 1; + } + } + + ShapesVec shapes(outShapes); + shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end()); + std::vector blobs; + for(int i = 0; i < outputBlobs.size(); i++) + { + blobs.push_back(&outputBlobs[i]); + } + + for(int i = 0; i < internalBlobs.size(); i++) + { + blobs.push_back(&internalBlobs[i]); + if (total(internalShapes[i])) + { + pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i)); + } + } + + addReferences(pinsForInternalBlobs); + + std::map > idxSizes; + for(int i = 0; i < shapes.size(); i++) + { + idxSizes[total(shapes[i])].push_back(i); + } + + std::map >::reverse_iterator it; + for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++) + { + for(int j = 0; j < it->second.size(); j++) + { + int index = it->second[j]; + if (total(shapes[index])) + { + LayerPin blobPin(ld.id, index); + if (index < outShapes.size() && inPlace) + { + CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index])); + ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]); + reuse(ld.inputBlobsId[0], blobPin); + } + else + { + reuseOrCreate(shapes[index], blobPin, *blobs[index]); + } + } + } + } + } + + // Clear internal state. Calls before an every reallocation. + void reset() + { + refCounter.clear(); + reuseMap.clear(); + memHosts.clear(); + } + +private: + // Registed allocated memory. + void addHost(const LayerPin& lp, const Mat& mat) + { + CV_Assert(memHosts.find(lp) == memHosts.end()); + reuseMap[lp] = lp; + memHosts[lp] = mat; + } + + std::map refCounter; + // Maps pin to origin blob (for whom memory was allocated firstly). + // For origin blobs key == value. + std::map reuseMap; + std::map memHosts; +}; + +struct Net::Impl +{ typedef std::map LayersShapesMap; typedef std::map MapIdToLayerData; @@ -252,6 +479,7 @@ struct Net::Impl MapIdToLayerData layers; std::map layerNameToId; + BlobManager blobManager; int lastLayerId; @@ -469,37 +697,11 @@ struct Net::Impl LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid); CV_Assert(layerShapesIt != layersShapes.end()); - const ShapesVec& outShapes = layerShapesIt->second.out; - CV_Assert(ld.requiredOutputs.size() <= outShapes.size()); - - ld.outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob - for(int i = 0; i < outShapes.size(); i++) - { - if (shape(ld.outputBlobs[i]) != outShapes[i]) - { - if (layerShapesIt->second.inplace) - { - CV_Assert(ld.inputBlobs.size() == ld.outputBlobs.size()); - CV_Assert(ld.inputBlobs[i]->total() == total(outShapes[i])); - ld.outputBlobs[i] = ld.inputBlobs[i]->reshape(1, outShapes[i]); - } - else - { - ld.outputBlobs[i].create(outShapes[i], CV_32F); - } - } - } - const ShapesVec& intShapes = layerShapesIt->second.internal; - ld.internals.resize(intShapes.size()); - for(int i = 0; i < intShapes.size(); i++) - { - if (shape(ld.internals[i]) != intShapes[i] && total(intShapes[i])) - ld.internals[i].create(intShapes[i], CV_32F); - } + std::vector pinsForInternalBlobs; + blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs); Ptr layerPtr = ld.getLayerInstance(); - //try { layerPtr->finalize(ld.inputBlobs, ld.outputBlobs); #if 0 @@ -512,10 +714,10 @@ struct Net::Impl std::cout << "\n"; #endif } - /*catch (const cv::Exception &err) - { - CV_RETHROW_ERROR(err, format("The following error occured while making allocate() for layer \"%s\": %s", ld.name.c_str(), err.err.c_str())); - }*/ + + // After allocation of layer, we decrease counters to it's input blobs. + blobManager.releaseReferences(ld.inputBlobsId); + blobManager.releaseReferences(pinsForInternalBlobs); ld.flag = 1; } @@ -536,6 +738,13 @@ struct Net::Impl LayersShapesMap layersShapes; getLayersShapes(inputShapes, layersShapes); + blobManager.reset(); + for (it = layers.begin(); it != layers.end(); ++it) + { + const LayerData& ld = it->second; + blobManager.addReferences(ld.inputBlobsId); + } + for (it = layers.begin(); it != layers.end(); it++) { int lid = it->first; @@ -609,7 +818,7 @@ struct Net::Impl ShapesVec& os = inOutShapes[id].out; ShapesVec& ints = inOutShapes[id].internal; int requiredOutputs = layers[id].requiredOutputs.size(); - inOutShapes[id].inplace = + inOutShapes[id].supportInPlace = layers[id].getLayerInstance()->getMemoryShapes(is, requiredOutputs, os, ints); } @@ -718,9 +927,13 @@ void Net::setBlob(String outputName, const Mat &blob_) LayerData &ld = impl->layers[pin.lid]; ld.outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) ); MatShape prevShape = shape(ld.outputBlobs[pin.oid]); - ld.outputBlobs[pin.oid] = blob_.clone(); + bool oldShape = prevShape == shape(blob_); + if (oldShape) + blob_.copyTo(ld.outputBlobs[pin.oid]); + else + ld.outputBlobs[pin.oid] = blob_.clone(); - impl->netWasAllocated = impl->netWasAllocated && prevShape == shape(blob_); + impl->netWasAllocated = impl->netWasAllocated && oldShape; } Mat Net::getBlob(String outputName) @@ -827,10 +1040,10 @@ std::vector Net::getUnconnectedOutLayers() const return layersIds; } -void Net::getLayersShapes(const Net::Impl::ShapesVec& netInputShapes, +void Net::getLayersShapes(const ShapesVec& netInputShapes, std::vector* layersIds, - std::vector* inLayersShapes, - std::vector* outLayersShapes) const + std::vector* inLayersShapes, + std::vector* outLayersShapes) const { if ((layersIds || inLayersShapes || outLayersShapes) == false) return; @@ -856,29 +1069,29 @@ void Net::getLayersShapes(const Net::Impl::ShapesVec& netInputShapes, void Net::getLayersShapes(const MatShape& netInputShape, std::vector* layerIds, - std::vector* inLayersShapes, - std::vector* outLayersShapes) const + std::vector* inLayersShapes, + std::vector* outLayersShapes) const { - getLayersShapes(Net::Impl::ShapesVec(1, netInputShape), + getLayersShapes(ShapesVec(1, netInputShape), layerIds, inLayersShapes, outLayersShapes); } void Net::getLayerShapes(const MatShape& netInputShape, const int layerId, - Net::Impl::ShapesVec* inLayerShapes, - Net::Impl::ShapesVec* outLayerShapes) const + ShapesVec* inLayerShapes, + ShapesVec* outLayerShapes) const { - getLayerShapes(Net::Impl::ShapesVec(1, netInputShape), + getLayerShapes(ShapesVec(1, netInputShape), layerId, inLayerShapes, outLayerShapes); } -void Net::getLayerShapes(const Net::Impl::ShapesVec& netInputShapes, +void Net::getLayerShapes(const ShapesVec& netInputShapes, const int layerId, - Net::Impl::ShapesVec* inLayerShapes, - Net::Impl::ShapesVec* outLayerShapes) const + ShapesVec* inLayerShapes, + ShapesVec* outLayerShapes) const { - Impl::LayerShapes shapes; + LayerShapes shapes; impl->getLayerShapes(netInputShapes, layerId, shapes); if (inLayerShapes) *inLayerShapes = shapes.in; @@ -915,7 +1128,7 @@ int64 Net::getFLOPS(const int layerId, Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId); CV_Assert(layer != impl->layers.end()); - Impl::LayerShapes shapes; + LayerShapes shapes; impl->getLayerShapes(netInputShapes, layerId, shapes); return layer->second.getLayerInstance()->getFLOPS(shapes.in, shapes.out); @@ -986,41 +1199,70 @@ void Net::getMemoryConsumption(const std::vector& netInputShapes, size_t& weights, size_t& blobs) const { std::vector layerIds; + std::vector w, b; + getMemoryConsumption(netInputShapes, layerIds, w, b); + + weights = blobs = 0; + for(int i = 0; i < layerIds.size(); i++) + { + weights += w[i]; + blobs += b[i]; + } +} + +void Net::getMemoryConsumption(const int layerId, + const MatShape& netInputShape, + size_t& weights, size_t& blobs) const +{ + getMemoryConsumption(layerId, std::vector(1, netInputShape), + weights, blobs); +} + +void Net::getMemoryConsumption(const MatShape& netInputShape, + size_t& weights, size_t& blobs) const +{ + getMemoryConsumption(std::vector(1, netInputShape), + weights, blobs); +} + +void Net::getMemoryConsumption(const std::vector& netInputShapes, + std::vector& layerIds, std::vector& weights, + std::vector& blobs) const +{ + layerIds.clear(); + weights.clear(); + blobs.clear(); + std::vector > outLayerShapes; getLayersShapes(netInputShapes, &layerIds, 0, &outLayerShapes); - weights = blobs = 0; for(int i = 0; i < layerIds.size(); i++) { + int w = 0, b = 0; Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerIds[i]); CV_Assert(layer != impl->layers.end()); for(int j = 0; j < layer->second.params.blobs.size(); j++) { const Mat& weightsBlob = layer->second.params.blobs[j]; - weights += weightsBlob.total()*weightsBlob.elemSize(); + w += weightsBlob.total()*weightsBlob.elemSize(); } for(int j = 0; j < outLayerShapes[i].size(); j++) { - blobs += total(outLayerShapes[i][j]) * sizeof(float); + b += total(outLayerShapes[i][j]) * sizeof(float); } - } -} -void Net::getMemoryConsumption(const int layerId, - const MatShape& netInputShape, - size_t& weights, size_t& blobs) const -{ - getMemoryConsumption(layerId, std::vector(1, netInputShape), - weights, blobs); + weights.push_back(w); + blobs.push_back(b); + } } -void Net::getMemoryConsumption(const MatShape& netInputShape, - size_t& weights, size_t& blobs) const +void Net::getMemoryConsumption(const MatShape& netInputShape, std::vector& layerIds, + std::vector& weights, std::vector& blobs) const { - getMemoryConsumption(std::vector(1, netInputShape), + getMemoryConsumption(std::vector(1, netInputShape), layerIds, weights, blobs); } diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp index 0b05b9345..e5f5b68a0 100644 --- a/modules/dnn/src/layers/batch_norm_layer.cpp +++ b/modules/dnn/src/layers/batch_norm_layer.cpp @@ -30,6 +30,15 @@ public: epsilon = params.get("eps", 1E-5); } + bool getMemoryShapes(const std::vector &inputs, + const int requiredOutputs, + std::vector &outputs, + std::vector &internals) const + { + Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals); + return true; + } + void forward(std::vector &inputs, std::vector &outputs, std::vector &internals) { CV_Assert(blobs.size() >= 2); diff --git a/modules/dnn/src/layers/blank_layer.cpp b/modules/dnn/src/layers/blank_layer.cpp index 6b2a7dea1..f90f238c9 100644 --- a/modules/dnn/src/layers/blank_layer.cpp +++ b/modules/dnn/src/layers/blank_layer.cpp @@ -61,7 +61,12 @@ public: return true; } - void forward(std::vector &inputs, std::vector &outputs, std::vector &internals) {} + void forward(std::vector &inputs, std::vector &outputs, std::vector &internals) + { + for (int i = 0, n = outputs.size(); i < n; ++i) + if (outputs[i].data != inputs[i]->data) + inputs[i]->copyTo(outputs[i]); + } }; Ptr BlankLayer::create(const LayerParams& params) diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index e114b797b..87e2d6908 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -20,17 +20,17 @@ public: class PBody : public cv::ParallelLoopBody { Func &func; - Dtype *data; + Dtype *src, *dst; public: - PBody(Mat &mat, Func &func_) : - func(func_), data(mat.ptr()) + PBody(Mat &src, Mat &dst, Func &func_) : + func(func_), src(src.ptr()), dst(dst.ptr()) {} void operator()(const Range &r) const { for (int i = r.start; i < r.end; i++) - data[i] = func(data[i]); + dst[i] = func(src[i]); } }; @@ -49,13 +49,13 @@ public: { for (size_t i = 0; i < inputs.size(); i++) { - const Mat &src = *inputs[i]; + Mat &src = *inputs[i]; Mat &dst = outputs[i]; - CV_Assert(src.ptr() == dst.ptr() && src.isContinuous()); + CV_Assert(src.isContinuous() && dst.isContinuous()); Range sizeRange = Range(0, dst.total()); CV_Assert(src.type() == CV_32F); - PBody body(dst, func); + PBody body(src, dst, func); if( run_parallel ) cv::parallel_for_(sizeRange, body); else diff --git a/modules/dnn/src/layers/reshape_layer.cpp b/modules/dnn/src/layers/reshape_layer.cpp index a98e4e962..4fa089e64 100644 --- a/modules/dnn/src/layers/reshape_layer.cpp +++ b/modules/dnn/src/layers/reshape_layer.cpp @@ -178,7 +178,7 @@ public: for (size_t i = 0; i < inputs.size(); i++) { Mat srcBlob = *inputs[i]; - MatShape inputShape = shape(srcBlob); + MatShape inputShape = shape(srcBlob), outShape = shape(outputs[i]); if (performReordering) { @@ -204,6 +204,11 @@ public: } internals[i].copyTo(outputs[i]); } + else + { + if (outputs[i].data != srcBlob.data) + srcBlob.reshape(1, outShape).copyTo(outputs[i]); + } } } diff --git a/modules/dnn/src/layers/scale_layer.cpp b/modules/dnn/src/layers/scale_layer.cpp index 37db031f3..473b1b38b 100644 --- a/modules/dnn/src/layers/scale_layer.cpp +++ b/modules/dnn/src/layers/scale_layer.cpp @@ -27,6 +27,15 @@ public: hasBias = params.get("bias_term", false); } + bool getMemoryShapes(const std::vector &inputs, + const int requiredOutputs, + std::vector &outputs, + std::vector &internals) const + { + Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals); + return true; + } + void forward(std::vector &inputs, std::vector &outputs, std::vector &internals) { CV_Assert(blobs.size() == 1 + hasBias); diff --git a/modules/dnn/src/layers/split_layer.cpp b/modules/dnn/src/layers/split_layer.cpp index d15702aa2..975230173 100644 --- a/modules/dnn/src/layers/split_layer.cpp +++ b/modules/dnn/src/layers/split_layer.cpp @@ -72,17 +72,17 @@ public: { CV_Assert(inputs.size() == 1); - outputs.resize(outputsCount >= 0 ? outputsCount : requiredOutputs, - inputs[0]); - - return false; + Layer::getMemoryShapes(inputs, outputsCount >= 0 ? outputsCount : requiredOutputs, + outputs, internals); + return true; } void forward(std::vector &inputs, std::vector &outputs, std::vector &internals) { for (size_t i = 0; i < outputs.size(); i++) { - inputs[0]->copyTo(outputs[i]); + if (outputs[i].data != inputs[0]->data) + inputs[0]->copyTo(outputs[i]); } } }; diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index 52869badf..8b8a4e7fe 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -121,6 +121,10 @@ TEST(Reproducibility_FCN, Accuracy) if (sample.size() != inputSize) resize(sample, sample, inputSize); + std::vector layerIds; + std::vector weights, blobs; + net.getMemoryConsumption(shape(1,3,227,227), layerIds, weights, blobs); + net.setBlob(".data", blobFromImage(sample, 1.)); net.forward();