/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #include "precomp.hpp" #include "op_halide.hpp" #include "op_inf_engine.hpp" #include "halide_scheduler.hpp" #include #include #include #include #include #include #include #include #include #include namespace cv { namespace dnn { CV__DNN_EXPERIMENTAL_NS_BEGIN // this option is useful to run valgrind memory errors detection static bool DNN_DISABLE_MEMORY_OPTIMIZATIONS = utils::getConfigurationParameterBool("OPENCV_DNN_DISABLE_MEMORY_OPTIMIZATIONS", false); using std::vector; using std::map; using std::make_pair; using std::set; namespace { typedef std::vector ShapesVec; struct LayerShapes { ShapesVec in, out, internal; // No guarantees that layer which support in-place computations // will be computed in-place (input.data_ptr == output.data_ptr). // If layer said that it could work in-place and layers after it // no longer use input blob, we'll set output = input. bool supportInPlace; LayerShapes() {supportInPlace = false;} }; } Mat blobFromImage(InputArray image, double scalefactor, const Size& size, const Scalar& mean, bool swapRB, bool crop) { CV_TRACE_FUNCTION(); Mat blob; blobFromImage(image, blob, scalefactor, size, mean, swapRB, crop); return blob; } void blobFromImage(InputArray image, OutputArray blob, double scalefactor, const Size& size, const Scalar& mean, bool swapRB, bool crop) { CV_TRACE_FUNCTION(); std::vector images(1, image.getMat()); blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop); } Mat blobFromImages(InputArrayOfArrays images, double scalefactor, Size size, const Scalar& mean, bool swapRB, bool crop) { CV_TRACE_FUNCTION(); Mat blob; blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop); return blob; } void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalefactor, Size size, const Scalar& mean_, bool swapRB, bool crop) { CV_TRACE_FUNCTION(); std::vector images; images_.getMatVector(images); CV_Assert(!images.empty()); for (int i = 0; i < images.size(); i++) { Size imgSize = images[i].size(); if (size == Size()) size = imgSize; if (size != imgSize) { if(crop) { float resizeFactor = std::max(size.width / (float)imgSize.width, size.height / (float)imgSize.height); resize(images[i], images[i], Size(), resizeFactor, resizeFactor, INTER_LINEAR); Rect crop(Point(0.5 * (images[i].cols - size.width), 0.5 * (images[i].rows - size.height)), size); images[i] = images[i](crop); } else resize(images[i], images[i], size, 0, 0, INTER_LINEAR); } if(images[i].depth() == CV_8U) images[i].convertTo(images[i], CV_32F); Scalar mean = mean_; if (swapRB) std::swap(mean[0], mean[2]); images[i] -= mean; images[i] *= scalefactor; } size_t i, nimages = images.size(); Mat image0 = images[0]; int nch = image0.channels(); CV_Assert(image0.dims == 2); Mat image; if (nch == 3 || nch == 4) { int sz[] = { (int)nimages, nch, image0.rows, image0.cols }; blob_.create(4, sz, CV_32F); Mat blob = blob_.getMat(); Mat ch[4]; for( i = 0; i < nimages; i++ ) { image = images[i]; CV_Assert(image.depth() == CV_32F); nch = image.channels(); CV_Assert(image.dims == 2 && (nch == 3 || nch == 4)); CV_Assert(image.size() == image0.size()); for( int j = 0; j < nch; j++ ) ch[j] = Mat(image.rows, image.cols, CV_32F, blob.ptr((int)i, j)); if(swapRB) std::swap(ch[0], ch[2]); split(image, ch); } } else { CV_Assert(nch == 1); int sz[] = { (int)nimages, 1, image0.rows, image0.cols }; blob_.create(4, sz, CV_32F); Mat blob = blob_.getMat(); for( i = 0; i < nimages; i++ ) { Mat image = images[i]; CV_Assert(image.depth() == CV_32F); nch = image.channels(); CV_Assert(image.dims == 2 && (nch == 1)); CV_Assert(image.size() == image0.size()); image.copyTo(Mat(image.rows, image.cols, CV_32F, blob.ptr((int)i, 0))); } } } void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_) { CV_TRACE_FUNCTION(); //A blob is a 4 dimensional matrix in floating point precision //blob_[0] = batchSize = nbOfImages //blob_[1] = nbOfChannels //blob_[2] = height //blob_[3] = width CV_Assert(blob_.depth() == CV_32F); CV_Assert(blob_.dims == 4); images_.create(cv::Size(1, blob_.size[0]), blob_.depth()); std::vector vectorOfChannels(blob_.size[1]); for (int n = 0; n < blob_.size[0]; ++n) { for (int c = 0; c < blob_.size[1]; ++c) { vectorOfChannels[c] = getPlane(blob_, n, c); } cv::merge(vectorOfChannels, images_.getMatRef(n)); } } class OpenCLBackendWrapper : public BackendWrapper { public: OpenCLBackendWrapper(Mat& m) : BackendWrapper(DNN_BACKEND_DEFAULT, DNN_TARGET_OPENCL) { m.copyTo(umat); host = &m; hostDirty = false; } OpenCLBackendWrapper(const Ptr& baseBuffer, Mat& m) : BackendWrapper(DNN_BACKEND_DEFAULT, DNN_TARGET_OPENCL) { Ptr base = baseBuffer.dynamicCast(); CV_Assert(!base.empty()); host = &m; int shape[] = {1, (int)base->umat.total()}; umat = base->umat.reshape(1, 2, &shape[0]) .colRange(0, host->total()) .reshape(1, host->dims, &host->size[0]); hostDirty = false; } static Ptr create(Mat& m) { return Ptr(new OpenCLBackendWrapper(m)); } static Ptr create(const Ptr& baseBuffer, Mat& m) { return Ptr(new OpenCLBackendWrapper(baseBuffer, m)); } static std::vector getUMatVector(const std::vector >& wrappers) { const int numWrappers = wrappers.size(); std::vector mats(wrappers.size()); for (int i = 0; i < numWrappers; ++i) { Ptr umatWrapper = wrappers[i].dynamicCast(); CV_Assert(!umatWrapper.empty()); umatWrapper->copyToDevice(); mats[i] = umatWrapper->umat; } return mats; } // Replaces all umats in wrappers to specific ones. static void update(const std::vector >& wrappers, const std::vector& umats) { CV_Assert(wrappers.size() == umats.size()); for (int i = 0, n = umats.size(); i < n; ++i) { Ptr umatWrapper = wrappers[i].dynamicCast(); CV_Assert(!umatWrapper.empty()); umatWrapper->umat = umats[i]; } } ~OpenCLBackendWrapper() {} // Copies data from device to a host memory. virtual void copyToHost() { umat.copyTo(*host); } virtual void setHostDirty() { hostDirty = true; }; void copyToDevice() { if (hostDirty) { host->copyTo(umat); hostDirty = false; } } private: UMat umat; Mat* host; bool hostDirty; }; struct LayerPin { int lid; int oid; LayerPin(int layerId = -1, int outputId = -1) : lid(layerId), oid(outputId) {} bool valid() const { return (lid >= 0 && oid >= 0); } bool equal(const LayerPin &r) const { return (lid == r.lid && oid == r.oid); } bool operator<(const LayerPin &r) const { return lid < r.lid || lid == r.lid && oid < r.oid; } bool operator ==(const LayerPin &r) const { return lid == r.lid && oid == r.oid; } }; struct LayerData { LayerData() : id(-1), skip(false), flag(0) {} LayerData(int _id, const String &_name, const String &_type, LayerParams &_params) : id(_id), name(_name), type(_type), params(_params), skip(false), flag(0) { CV_TRACE_FUNCTION(); //add logging info params.name = name; params.type = type; } int id; String name; String type; LayerParams params; std::vector inputBlobsId; std::set inputLayersId; std::set requiredOutputs; std::vector consumers; std::vector > outputBlobsWrappers; std::vector > inputBlobsWrappers; std::vector > internalBlobsWrappers; Ptr layerInstance; std::vector outputBlobs; std::vector inputBlobs; std::vector internals; // Computation nodes of implemented backends (except DEFAULT). std::map > backendNodes; // Flag for skip layer computation for specific backend. bool skip; int flag; Ptr getLayerInstance() { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(type, "type", type.c_str()); if (layerInstance) return layerInstance; layerInstance = LayerFactory::createLayerInstance(type, params); if (!layerInstance) { CV_Error(Error::StsError, "Can't create layer \"" + name + "\" of type \"" + type + "\""); } return layerInstance; } }; //fake layer containing network input blobs struct DataLayer : public Layer { void finalize(const std::vector&, std::vector&) {} void forward(std::vector&, std::vector&, std::vector &) {} void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) {} int outputNameToIndex(const String& tgtName) { int idx = (int)(std::find(outNames.begin(), outNames.end(), tgtName) - outNames.begin()); return (idx < (int)outNames.size()) ? idx : -1; } void setNames(const std::vector &names) { outNames.assign(names.begin(), names.end()); } bool getMemoryShapes(const std::vector &inputs, const int requiredOutputs, std::vector &outputs, std::vector &internals) const { CV_Assert(inputs.size() == requiredOutputs); outputs.assign(inputs.begin(), inputs.end()); return false; } std::vector outNames; }; struct BlobManager { public: // Increase references counter to layer output. void addReference(const LayerPin& lp) { std::map::iterator it = refCounter.find(lp); if (it == refCounter.end()) refCounter[lp] = 1; else it->second += 1; } void addReferences(const std::vector& pins) { for (int i = 0; i < pins.size(); i++) { addReference(pins[i]); } } // Returns number of references to allocated memory that used in specific // layer blob. int numReferences(const LayerPin& lp) { std::map::iterator mapIt = reuseMap.find(lp); CV_Assert(mapIt != reuseMap.end()); LayerPin memHost = mapIt->second; std::map::iterator refIt = refCounter.find(memHost); CV_Assert(refIt != refCounter.end()); return refIt->second; } // Reuse data allocated in inside the blob. void reuse(const LayerPin& host, const LayerPin& user) { CV_Assert(reuseMap.find(user) == reuseMap.end()); CV_Assert(reuseMap.find(host) != reuseMap.end()); LayerPin memHost = reuseMap[host]; reuseMap[user] = memHost; if (refCounter.find(memHost) != refCounter.end()) { std::map::iterator userRefIt = refCounter.find(user); if (userRefIt != refCounter.end()) { refCounter[memHost] += userRefIt->second; refCounter.erase(userRefIt); } else refCounter[memHost] += 1; } } // Decrease references counter to allocated memory inside specific blob. void releaseReference(const LayerPin& lp) { std::map::iterator mapIt = reuseMap.find(lp); CV_Assert(mapIt != reuseMap.end()); std::map::iterator refIt = refCounter.find(mapIt->second); CV_Assert(refIt != refCounter.end()); CV_Assert(refIt->second > 0); refIt->second -= 1; } void releaseReferences(const std::vector& pins) { for (int i = 0; i < pins.size(); i++) { releaseReference(pins[i]); } } void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool forceCreate) { if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS && !forceCreate) { Mat bestBlob; LayerPin bestBlobPin; std::map::iterator hostIt; std::map::iterator refIt; const int targetTotal = total(shape); int bestBlobTotal = INT_MAX; for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt) { refIt = refCounter.find(hostIt->first); // Use only blobs that had references before because if not, // it might be used as output. if (refIt != refCounter.end() && refIt->second == 0) { Mat& unusedBlob = hostIt->second; if (unusedBlob.total() >= targetTotal && unusedBlob.total() < bestBlobTotal) { bestBlobPin = hostIt->first; bestBlob = unusedBlob; bestBlobTotal = unusedBlob.total(); } } } if (!bestBlob.empty()) { reuse(bestBlobPin, lp); dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape); return; } } { // if dst already has been allocated with total(shape) elements, // it won't be recrreated and pointer of dst.data remains the same. dst.create(shape, CV_32F); addHost(lp, dst); } } void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes, std::vector& pinsForInternalBlobs, bool forceCreate = false) { CV_TRACE_FUNCTION(); pinsForInternalBlobs.clear(); std::vector& outputBlobs = ld.outputBlobs, &internalBlobs = ld.internals; const ShapesVec& outShapes = layerShapes.out, internalShapes = layerShapes.internal; outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob internalBlobs.resize(internalShapes.size()); CV_Assert(ld.requiredOutputs.size() <= outShapes.size()); // Check that layer could work in-place. bool inPlace = false; if (layerShapes.supportInPlace) { if (ld.inputBlobs.size() == 1) { // Get number of references to the input memory. int numRef = numReferences(ld.inputBlobsId[0]); // If current layer is one and only customer of this blob. inPlace = numRef == 1; } } ShapesVec shapes(outShapes); shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end()); std::vector blobs; for(int i = 0; i < outputBlobs.size(); i++) { blobs.push_back(&outputBlobs[i]); } for(int i = 0; i < internalBlobs.size(); i++) { blobs.push_back(&internalBlobs[i]); if (total(internalShapes[i])) { pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i)); } } addReferences(pinsForInternalBlobs); std::map > idxSizes; for(int i = 0; i < shapes.size(); i++) { idxSizes[total(shapes[i])].push_back(i); } std::map >::reverse_iterator it; for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++) { for(int j = 0; j < it->second.size(); j++) { int index = it->second[j]; if (total(shapes[index])) { LayerPin blobPin(ld.id, index); if (index < outShapes.size() && inPlace) { CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index])); ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]); reuse(ld.inputBlobsId[0], blobPin); } else reuseOrCreate(shapes[index], blobPin, *blobs[index], forceCreate); } } } } // Clear internal state. Calls before an every reallocation. void reset() { CV_TRACE_FUNCTION(); refCounter.clear(); reuseMap.clear(); memHosts.clear(); } private: // Register allocated memory. void addHost(const LayerPin& lp, const Mat& mat) { CV_Assert(memHosts.find(lp) == memHosts.end()); reuseMap[lp] = lp; memHosts[lp] = mat; } std::map refCounter; // Maps pin to origin blob (for whom memory was allocated firstly). // For origin blobs key == value. std::map reuseMap; std::map memHosts; }; static Ptr wrapMat(int backendId, int targetId, cv::Mat& m) { if (backendId == DNN_BACKEND_DEFAULT) { if (targetId == DNN_TARGET_CPU) return Ptr(); else if (targetId == DNN_TARGET_OPENCL) return OpenCLBackendWrapper::create(m); else CV_Error(Error::StsNotImplemented, "Unknown target identifier"); } else if (backendId == DNN_BACKEND_HALIDE) { CV_Assert(haveHalide()); #ifdef HAVE_HALIDE return Ptr(new HalideBackendWrapper(targetId, m)); #endif // HAVE_HALIDE } else if (backendId == DNN_BACKEND_INFERENCE_ENGINE) { CV_Assert(haveInfEngine()); #ifdef HAVE_INF_ENGINE return Ptr(new InfEngineBackendWrapper(targetId, m)); #endif // HAVE_INF_ENGINE } else CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); return Ptr(); } struct Net::Impl { typedef std::map LayersShapesMap; typedef std::map MapIdToLayerData; Impl() { //allocate fake net input layer netInputLayer = Ptr(new DataLayer()); LayerData &inpl = layers.insert( make_pair(0, LayerData()) ).first->second; inpl.id = 0; inpl.name = "_input"; inpl.type = "__NetInputLayer__"; inpl.layerInstance = netInputLayer; layerNameToId.insert(std::make_pair(inpl.name, inpl.id)); lastLayerId = 0; netWasAllocated = false; fusion = true; preferableBackend = DNN_BACKEND_DEFAULT; preferableTarget = DNN_TARGET_CPU; skipInfEngineInit = false; } Ptr netInputLayer; std::vector blobsToKeep; MapIdToLayerData layers; std::map layerNameToId; BlobManager blobManager; int preferableBackend; int preferableTarget; String halideConfigFile; bool skipInfEngineInit; // Map host data to backend specific wrapper. std::map > backendWrappers; int lastLayerId; bool netWasAllocated; bool fusion; std::vector layersTimings; Ptr wrap(Mat& host) { if (preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_CPU) return Ptr(); MatShape shape(host.dims); for (int i = 0; i < host.dims; ++i) shape[i] = host.size[i]; void* data = host.data; if (backendWrappers.find(data) != backendWrappers.end()) { Ptr baseBuffer = backendWrappers[data]; if (preferableBackend == DNN_BACKEND_DEFAULT) { CV_Assert(preferableTarget == DNN_TARGET_OPENCL); return OpenCLBackendWrapper::create(baseBuffer, host); } else if (preferableBackend == DNN_BACKEND_HALIDE) { CV_Assert(haveHalide()); #ifdef HAVE_HALIDE return Ptr(new HalideBackendWrapper(baseBuffer, shape)); #endif // HAVE_HALIDE } else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE) { return wrapMat(preferableBackend, preferableTarget, host); } else CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); } Ptr wrapper = wrapMat(preferableBackend, preferableTarget, host); backendWrappers[data] = wrapper; return wrapper; } #ifdef HAVE_HALIDE void compileHalide() { CV_TRACE_FUNCTION(); CV_Assert(preferableBackend == DNN_BACKEND_HALIDE); HalideScheduler scheduler(halideConfigFile); std::vector< std::reference_wrapper > compileList; compileList.reserve(64); for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) { LayerData &ld = it->second; Ptr layer = ld.layerInstance; if (layer->supportBackend(DNN_BACKEND_HALIDE) && !ld.skip) { CV_Assert(!ld.backendNodes[DNN_BACKEND_HALIDE].empty()); bool scheduled = scheduler.process(ld.backendNodes[DNN_BACKEND_HALIDE]); if (!scheduled) { // Use automatic scheduling provided by layer. layer->applyHalideScheduler(ld.backendNodes[DNN_BACKEND_HALIDE], ld.inputBlobs, ld.outputBlobs, preferableTarget); } compileList.emplace_back(ld); } } std::atomic progress(0); auto fn = ([&] () -> void { for (;;) { int id = progress.fetch_add(1); if ((size_t)id >= compileList.size()) return; const LayerData& ld = compileList[id].get(); Ptr node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second; dnn::compileHalide(ld.outputBlobs, node, preferableTarget); } }); size_t num_threads = std::min(compileList.size(), (size_t)std::thread::hardware_concurrency()); num_threads = std::max((size_t)1u, std::min((size_t)8u, num_threads)); std::vector threads(num_threads - 1); for (auto& t: threads) t = std::thread(fn); fn(); // process own tasks for (auto& t: threads) t.join(); } #endif void clear() { CV_TRACE_FUNCTION(); MapIdToLayerData::iterator it; for (it = layers.begin(); it != layers.end(); it++) { if (it->second.id != 0) { it->second.inputBlobs.clear(); it->second.outputBlobs.clear(); it->second.internals.clear(); } it->second.skip = false; //it->second.consumers.clear(); Ptr currLayer = it->second.layerInstance; if( currLayer.empty() ) continue; currLayer->unsetAttached(); Ptr poolingLayer = currLayer.dynamicCast(); if( !poolingLayer.empty() ) { poolingLayer->computeMaxIdx = true; } } it = layers.find(0); CV_Assert(it != layers.end()); it->second.skip = true; layersTimings.clear(); } void setUpNet(const std::vector& blobsToKeep_ = std::vector()) { CV_TRACE_FUNCTION(); if (!netWasAllocated || this->blobsToKeep != blobsToKeep_) { #ifndef HAVE_OPENCL if (preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_OPENCL) { CV_LOG_WARNING(NULL, "DNN: OpenCL target is not available in this OpenCV build, switching to CPU.") preferableTarget = DNN_TARGET_CPU; } #endif clear(); allocateLayers(blobsToKeep_); initBackend(); if (!netWasAllocated ) { #ifdef HAVE_HALIDE if (preferableBackend == DNN_BACKEND_HALIDE) compileHalide(); #else CV_Assert(preferableBackend != DNN_BACKEND_HALIDE); #endif } netWasAllocated = true; this->blobsToKeep = blobsToKeep_; } } int getLayerId(const String &layerName) { std::map::iterator it = layerNameToId.find(layerName); return (it != layerNameToId.end()) ? it->second : -1; } int getLayerId(int id) { MapIdToLayerData::iterator it = layers.find(id); return (it != layers.end()) ? id : -1; } int getLayerId(DictValue &layerDesc) { if (layerDesc.isInt()) return getLayerId(layerDesc.get()); else if (layerDesc.isString()) return getLayerId(layerDesc.get()); CV_Assert(layerDesc.isInt() || layerDesc.isString()); return -1; } String getLayerName(int id) { MapIdToLayerData::iterator it = layers.find(id); return (it != layers.end()) ? it->second.name : "(unknown layer)"; } LayerData& getLayerData(int id) { MapIdToLayerData::iterator it = layers.find(id); if (it == layers.end()) CV_Error(Error::StsObjectNotFound, format("Layer with requested id=%d not found", id)); return it->second; } LayerData& getLayerData(const String &layerName) { int id = getLayerId(layerName); if (id < 0) CV_Error(Error::StsError, "Requested layer \"" + layerName + "\" not found"); return getLayerData(id); } LayerData& getLayerData(const DictValue &layerDesc) { CV_Assert(layerDesc.isInt() || layerDesc.isString()); if (layerDesc.isInt()) return getLayerData(layerDesc.get()); else /*if (layerDesc.isString())*/ return getLayerData(layerDesc.get()); } static void addLayerInput(LayerData &ld, int inNum, LayerPin from) { if ((int)ld.inputBlobsId.size() <= inNum) { ld.inputBlobsId.resize(inNum + 1); } else { LayerPin storedFrom = ld.inputBlobsId[inNum]; if (storedFrom.valid() && !storedFrom.equal(from)) CV_Error(Error::StsError, format("Input #%d of layer \"%s\" already was connected", inNum, ld.name.c_str())); } ld.inputBlobsId[inNum] = from; } static void splitPin(const String &pinAlias, String &layerName, String &outName) { size_t delimPos = pinAlias.find('.'); layerName = pinAlias.substr(0, delimPos); outName = (delimPos == String::npos) ? String() : pinAlias.substr(delimPos + 1); } int resolvePinOutputName(LayerData &ld, const String &outName) { if (outName.empty()) return 0; if (std::isdigit(outName[0])) { char *lastChar; long inum = std::strtol(outName.c_str(), &lastChar, 10); if (*lastChar == 0) { CV_Assert(inum == (int)inum); return (int)inum; } } return ld.getLayerInstance()->outputNameToIndex(outName); } LayerPin getPinByAlias(const String &pinAlias) { LayerPin pin; String layerName, outName; splitPin(pinAlias, layerName, outName); pin.lid = (layerName.empty()) ? 0 : getLayerId(layerName); if (pin.lid >= 0) pin.oid = resolvePinOutputName(getLayerData(pin.lid), outName); return pin; } std::vector getLayerOutPins(const String &pinAlias) { String layerName, outName; splitPin(pinAlias, layerName, outName); int lid = (layerName.empty()) ? 0 : getLayerId(layerName); std::vector pins; for (int i = 0; i < layers[lid].outputBlobs.size(); i++) { pins.push_back(LayerPin(lid, i)); } return pins; } void connect(int outLayerId, int outNum, int inLayerId, int inNum) { CV_Assert(outLayerId < inLayerId); LayerData &ldOut = getLayerData(outLayerId); LayerData &ldInp = getLayerData(inLayerId); addLayerInput(ldInp, inNum, LayerPin(outLayerId, outNum)); ldOut.requiredOutputs.insert(outNum); ldOut.consumers.push_back(LayerPin(inLayerId, outNum)); } void initBackend() { CV_TRACE_FUNCTION(); if (preferableBackend == DNN_BACKEND_DEFAULT) CV_Assert(preferableTarget == DNN_TARGET_CPU || preferableTarget == DNN_TARGET_OPENCL); else if (preferableBackend == DNN_BACKEND_HALIDE) initHalideBackend(); else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE) initInfEngineBackend(); else CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); } void initHalideBackend() { CV_TRACE_FUNCTION(); CV_Assert(preferableBackend == DNN_BACKEND_HALIDE, haveHalide()); // Iterator to current layer. MapIdToLayerData::iterator it = layers.begin(); // Iterator to base layer for fusion. In example, in case of conv+bn+relu // it'll be a conv layer. MapIdToLayerData::iterator baseIt = layers.begin(); for (; it != layers.end(); it++) { LayerData &ldTop = it->second; Ptr layerTop = ldTop.layerInstance; if (!layerTop->supportBackend(preferableBackend)) { // Move base iterator to layer that don't support preferable // backend to prevent fusion over layer of different backend. baseIt = it; continue; } // Try to do layers fusion. LayerData &ldBot = baseIt->second; Ptr layerBot = ldBot.layerInstance; // 1. Check that bottom and top from the same backends. if (it != layers.begin() && layerBot->supportBackend(preferableBackend)) { // 2. Check that current layer works in-place. bool inPlace = ldTop.inputBlobs.size() == 1 && ldBot.outputBlobs.size() == 1 && ldTop.inputBlobs[0]->data == ldBot.outputBlobs[0].data; if (inPlace) { // 3. Try to attach node. CV_Assert(!ldBot.backendNodes[preferableBackend].empty()); Ptr fusedNode = layerTop->tryAttach(ldBot.backendNodes[preferableBackend]); if (!fusedNode.empty()) { ldTop.skip = true; ldBot.backendNodes[preferableBackend] = fusedNode; ldBot.outputBlobsWrappers = ldTop.outputBlobsWrappers; continue; } } } // No layers fusion. ldTop.skip = false; ldTop.backendNodes[DNN_BACKEND_HALIDE] = layerTop->initHalide(ldTop.inputBlobsWrappers); baseIt = it; } } #ifdef HAVE_INF_ENGINE // Before launching Inference Engine graph we need to specify output blobs. // This function requests output blobs based on inputs references of // layers from default backend or layers from different graphs. void addInfEngineNetOutputs(LayerData &ld) { Ptr layerNet; if (ld.backendNodes.find(preferableBackend) != ld.backendNodes.end()) { Ptr node = ld.backendNodes[preferableBackend]; if (!node.empty()) { Ptr ieNode = node.dynamicCast(); CV_Assert(!ieNode.empty(), !ieNode->net.empty()); layerNet = ieNode->net; } } // For an every input reference we check that it belongs to one of // the Inference Engine backend graphs. Request an output blob if it is. // Do nothing if layer's input is from the same graph. for (int i = 0; i < ld.inputBlobsId.size(); ++i) { LayerData &inpLd = layers[ld.inputBlobsId[i].lid]; Ptr inpNode = inpLd.backendNodes[preferableBackend]; if (!inpNode.empty()) { Ptr ieInpNode = inpNode.dynamicCast(); CV_Assert(!ieInpNode.empty(), !ieInpNode->net.empty()); if (layerNet != ieInpNode->net) { // layerNet is empty or nodes are from different graphs. ieInpNode->net->addOutput(inpLd.name); } } } } #endif // HAVE_INF_ENGINE void initInfEngineBackend() { CV_TRACE_FUNCTION(); CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE, haveInfEngine()); #ifdef HAVE_INF_ENGINE MapIdToLayerData::iterator it; Ptr net; if (skipInfEngineInit) { Ptr node = layers[lastLayerId].backendNodes[preferableBackend]; CV_Assert(!node.empty()); Ptr ieNode = node.dynamicCast(); CV_Assert(!ieNode.empty()); for (it = layers.begin(); it != layers.end(); ++it) { LayerData &ld = it->second; for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) { InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]); dataPtr->name = ld.id == 0 ? netInputLayer->outNames[i] : ld.name; } ieNode->net->addBlobs(ld.inputBlobsWrappers); ieNode->net->addBlobs(ld.outputBlobsWrappers); ld.skip = true; } layers[lastLayerId].skip = false; ieNode->net->init(); return; } // Build Inference Engine networks from sets of layers that support this // backend. Split a whole model on several Inference Engine networks if // some of layers is not implemented. // Set of all input and output blobs wrappers for current network. std::map > netBlobsWrappers; for (it = layers.begin(); it != layers.end(); ++it) { LayerData &ld = it->second; ld.skip = true; // Initially skip all Inference Engine supported layers. Ptr layer = ld.layerInstance; if (!layer->supportBackend(preferableBackend)) { addInfEngineNetOutputs(ld); ld.skip = false; net = Ptr(); netBlobsWrappers.clear(); continue; } // Create a new network if one of inputs from different Inference Engine graph. for (int i = 0; i < ld.inputBlobsId.size(); ++i) { LayerData &inpLd = layers[ld.inputBlobsId[i].lid]; Ptr inpNode = inpLd.backendNodes[preferableBackend]; if (!inpNode.empty()) { Ptr ieInpNode = inpNode.dynamicCast(); CV_Assert(!ieInpNode.empty(), !ieInpNode->net.empty()); if (ieInpNode->net != net) { net = Ptr(); netBlobsWrappers.clear(); break; } } } // The same blobs wrappers cannot be shared between two Inference Engine // networks because of explicit references between layers and blobs. // So we need to rewrap all the external blobs. for (int i = 0; i < ld.inputBlobsId.size(); ++i) { int lid = ld.inputBlobsId[i].lid; LayerData &inpLd = layers[lid]; auto it = netBlobsWrappers.find(lid); if (it == netBlobsWrappers.end()) { ld.inputBlobsWrappers[i] = wrap(*ld.inputBlobs[i]); auto dataPtr = infEngineDataNode(ld.inputBlobsWrappers[i]); dataPtr->name = inpLd.name; netBlobsWrappers[lid] = ld.inputBlobsWrappers[i]; } else ld.inputBlobsWrappers[i] = it->second; } netBlobsWrappers[ld.id] = ld.outputBlobsWrappers[0]; bool fused = false; Ptr node; if (!net.empty()) { // Try to fuse. bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 && ld.inputBlobs[0]->data == ld.outputBlobs[0].data; if (inPlace) { node = layer->tryAttach(layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend]); fused = !node.empty(); if (fused) ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers; } } else net = Ptr(new InfEngineBackendNet()); if (!fused) { node = layer->initInfEngine(ld.inputBlobsWrappers); } CV_Assert(!node.empty()); ld.backendNodes[preferableBackend] = node; Ptr ieNode = node.dynamicCast(); CV_Assert(!ieNode.empty()); ieNode->net = net; ieNode->connect(ld.inputBlobsWrappers, ld.outputBlobsWrappers); net->addBlobs(ld.inputBlobsWrappers); net->addBlobs(ld.outputBlobsWrappers); if (!fused) net->addLayer(ieNode->layer); addInfEngineNetOutputs(ld); } // Initialize all networks. std::set initializedNets; for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it) { LayerData &ld = it->second; if (ld.backendNodes.find(preferableBackend) == ld.backendNodes.end()) continue; Ptr node = ld.backendNodes[preferableBackend]; if (node.empty()) continue; Ptr ieNode = node.dynamicCast(); if (ieNode.empty()) continue; CV_Assert(!ieNode->net.empty()); if (!ieNode->net->isInitialized()) { ieNode->net->init(); ld.skip = false; } } #endif // HAVE_INF_ENGINE } void allocateLayer(int lid, const LayersShapesMap& layersShapes) { CV_TRACE_FUNCTION(); LayerData &ld = layers[lid]; //already allocated if (ld.flag) return; size_t ninputs = ld.inputBlobsId.size(); #if 0 printf("layer %s:", ld.name.c_str()); for (size_t i = 0; i < ninputs; i++) { int inp_lid = ld.inputBlobsId[i].lid; LayerData &inp_ld = layers[inp_lid]; int inp_outputs = (int)inp_ld.outputBlobs.size(); std::cout << " " << inp_ld.name << "(" << inp_outputs; for( int j = 0; j < inp_outputs; j++ ) { std::cout << (j == 0 ? ": " : ", ") << inp_ld.outputBlobs[j].size; } std::cout << ")"; } printf("\n"); #endif //determine parent layers for (size_t i = 0; i < ninputs; i++) ld.inputLayersId.insert(ld.inputBlobsId[i].lid); //allocate parents for (set::iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++) allocateLayer(*i, layersShapes); //bind inputs ld.inputBlobs.resize(ninputs); ld.inputBlobsWrappers.resize(ninputs); for (size_t i = 0; i < ninputs; i++) { LayerPin from = ld.inputBlobsId[i]; CV_Assert(from.valid()); CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid); ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid]; ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid]; } LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid); CV_Assert(layerShapesIt != layersShapes.end()); std::vector pinsForInternalBlobs; blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs, preferableBackend == DNN_BACKEND_INFERENCE_ENGINE); ld.outputBlobsWrappers.resize(ld.outputBlobs.size()); for (int i = 0; i < ld.outputBlobs.size(); ++i) { ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]); } ld.internalBlobsWrappers.resize(ld.internals.size()); for (int i = 0; i < ld.internals.size(); ++i) { ld.internalBlobsWrappers[i] = wrap(ld.internals[i]); } Ptr layerPtr = ld.getLayerInstance(); { layerPtr->finalize(ld.inputBlobs, ld.outputBlobs); layerPtr->preferableTarget = preferableTarget; #if 0 std::cout << "\toutputs:"; size_t noutputs = ld.outputBlobs.size(); for (size_t j = 0; j < noutputs; j++) { std::cout << (j == 0 ? " " : ", ") << ld.outputBlobs[j].size; } std::cout << "\n"; #endif } // After allocation of layer, we decrease counters to it's input blobs. blobManager.releaseReferences(ld.inputBlobsId); blobManager.releaseReferences(pinsForInternalBlobs); ld.flag = 1; } #if 0 #define printf_(args) printf args #else #define printf_(args) #endif void fuseLayers(const std::vector& blobsToKeep_) { if( !fusion || preferableBackend != DNN_BACKEND_DEFAULT) return; CV_TRACE_FUNCTION(); // scan through all the layers. If there is convolution layer followed by the activation layer, // we try to embed this activation into the convolution and disable separate execution of the activation std::set pinsToKeep(blobsToKeep_.begin(), blobsToKeep_.end()); MapIdToLayerData::iterator it; for (it = layers.begin(); it != layers.end(); it++) { int lid = it->first; LayerData& ld = layers[lid]; if( ld.skip ) { printf_(("skipped %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str())); continue; } printf_(("analyzing %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str())); // the optimization #1. try to fuse batch norm, scaling and/or activation layers // with the current layer if they follow it. Normally, the are fused with the convolution layer, // but some of them (like activation) may be fused with fully-connected, elemwise (+) and // some other layers. // TODO: OpenCL target support more fusion styles. if ( preferableTarget == DNN_TARGET_OPENCL && (!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" && ld.layerInstance->type != "MVN")) ) continue; Ptr& currLayer = ld.layerInstance; if( ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0 ) { LayerData* nextData = &layers[ld.consumers[0].lid]; LayerPin lpNext(ld.consumers[0].lid, 0); while (nextData) { Ptr nextLayer = nextData->layerInstance; if (currLayer->tryFuse(nextLayer)) { printf_(("\tfused with %s\n", nextLayer->name.c_str())); nextData->skip = true; ld.outputBlobs = layers[lpNext.lid].outputBlobs; ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers; if (nextData->consumers.size() == 1) { int nextLayerId = nextData->consumers[0].lid; nextData = &layers[nextLayerId]; lpNext = LayerPin(nextLayerId, 0); } else { nextData = 0; break; } } else break; } // For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh if ( preferableTarget != DNN_TARGET_OPENCL || (preferableTarget == DNN_TARGET_OPENCL && nextData && ((nextData->type == "ReLU") || (nextData->type == "ChannelsPReLU") || (nextData->type == "ReLU6") || (nextData->type == "TanH") || (nextData->type == "Power"))) ) { Ptr nextActivLayer; if( nextData ) nextActivLayer = nextData->layerInstance.dynamicCast(); if( !nextActivLayer.empty() && pinsToKeep.count(lpNext) == 0 && currLayer->setActivation(nextActivLayer) ) { LayerData *activData = nextData; printf_(("\tfused with %s\n", nextActivLayer->name.c_str())); activData->skip = true; ld.outputBlobs = layers[lpNext.lid].outputBlobs; ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers; if ( preferableTarget == DNN_TARGET_OPENCL ) { if ( !activData->consumers.empty() ) { nextData = &layers[activData->consumers[0].lid]; lpNext = LayerPin(activData->consumers[0].lid, 0); } } } } // fuse convlution layer followed by eltwise + relu if ( preferableTarget == DNN_TARGET_OPENCL ) { Ptr nextEltwiseLayer; if( nextData ) nextEltwiseLayer = nextData->layerInstance.dynamicCast(); if( !nextEltwiseLayer.empty() && pinsToKeep.count(lpNext) == 0 ) { LayerData *eltwiseData = nextData; // go down from the second input and find the first non-skipped layer. LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[1].lid]; while (downLayerData->skip) { downLayerData = &layers[downLayerData->inputBlobsId[0].lid]; } // second input layer is current layer. if ( ld.id == downLayerData->id ) { // go down from the first input and find the first non-skipped layer downLayerData = &layers[eltwiseData->inputBlobsId[0].lid]; while (downLayerData->skip) { if ( !downLayerData->type.compare("Eltwise") ) downLayerData = &layers[downLayerData->inputBlobsId[1].lid]; else downLayerData = &layers[downLayerData->inputBlobsId[0].lid]; } Ptr convLayer; if( downLayerData ) convLayer = downLayerData->layerInstance.dynamicCast(); // first input layer is convolution layer if( !convLayer.empty() ) { // fuse eltwise + activation layer LayerData *firstConvLayerData = downLayerData; { CV_Assert(eltwiseData->consumers.size() == 1); nextData = &layers[eltwiseData->consumers[0].lid]; lpNext = LayerPin(eltwiseData->consumers[0].lid, 0); Ptr nextActivLayer; if( nextData ) nextActivLayer = nextData->layerInstance.dynamicCast(); if( !nextActivLayer.empty() && pinsToKeep.count(lpNext) == 0 && (!nextData->type.compare("ReLU") || !nextData->type.compare("ChannelsPReLU") || !nextData->type.compare("Power")) && currLayer->setActivation(nextActivLayer) ) { CV_Assert(firstConvLayerData->outputBlobsWrappers.size() == 1 && ld.inputBlobsWrappers.size() == 1); ld.inputBlobsWrappers.push_back(firstConvLayerData->outputBlobsWrappers[0]); printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str())); printf_(("\tfused with %s\n", nextActivLayer->name.c_str())); eltwiseData->skip = true; nextData->skip = true; // This optimization for cases like // some_layer conv // | | // +-- eltwise --+ // | // activ // This way all the element-wise computations // (i.e. some_layer+conv or some_layer*conv) // would be done at [conv] layer. So we need to // replace [conv]'s output blob to [eltwise]'s one // considering that [activ] is an in-place layer. // Also we need to move all the consumers' references. // To prevent memory collisions (i.e. when input of // [conv] and output of [eltwise] is the same blob) // we allocate a new blob. CV_Assert(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1); ld.outputBlobs[0] = ld.outputBlobs[0].clone(); ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]); eltwiseData->outputBlobs = ld.outputBlobs; nextData->outputBlobs = ld.outputBlobs; eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers; nextData->outputBlobsWrappers = ld.outputBlobsWrappers; // Move references of [activ] layer consumers to the newly allocated blob. for (int i = 0; i < nextData->consumers.size(); ++i) { LayerData& consumer = layers[nextData->consumers[i].lid]; for (int j = 0; j < consumer.inputBlobsId.size(); ++j) { if (consumer.inputBlobsId[j].lid == lpNext.lid) { consumer.inputBlobs[j] = &ld.outputBlobs[0]; consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0]; break; } } } } } } } } } } // the optimization #2. if there is no layer that takes max pooling layer's computed // max indices (and only some semantical segmentation networks might need this; // many others only take the maximum values), then we switch the max pooling // layer to the faster operating mode. Ptr poolingLayer = ld.layerInstance.dynamicCast(); if( !poolingLayer.empty() && !ld.consumers.empty() ) { size_t i = 0, nconsumers = ld.consumers.size(); for( ; i < nconsumers; i++ ) if( ld.consumers[i].oid > 0 ) break; // if there is no layer that takes the second output pin of the pooling layer // on input then we don't need to compute the indices if( i >= nconsumers ) { poolingLayer->computeMaxIdx = false; printf_(("\tsimplified pooling layer %s\n", poolingLayer->name.c_str())); } } // the optimization #3. if there is concat layer that concatenates channels // from the inputs together (i.e. axis == 1) then we make the inputs of // the concat layer to write to the concatetion output buffer // (and so we eliminate the concatenation layer, because the channels // are concatenated implicitly). Ptr concatLayer = ld.layerInstance.dynamicCast(); if( !concatLayer.empty() && concatLayer->axis == 1 && !concatLayer->padding && ld.outputBlobs.size() == 1 ) { Mat& output = ld.outputBlobs[0]; // TODO: in general, this optimization can always be done, but // many layers currently check that the input/output blobs are // continuous arrays. Unfortunately, this is not true when // the concatenation optimization is applied with batch_size > 1. // so, for now, we only apply this optimization in the most popular // case batch_size == 1. if( output.dims == 4 && output.size[0] == 1 ) { size_t i, ninputs = ld.inputBlobsId.size(); std::vector realinputs(ninputs); for( i = 0; i < ninputs; i++ ) { LayerPin pin = ld.inputBlobsId[i]; LayerData* inp_i_data = &layers[pin.lid]; while(inp_i_data->skip && inp_i_data->inputBlobsId.size() == 1 && inp_i_data->consumers.size() == 1) { pin = inp_i_data->inputBlobsId[0]; inp_i_data = &layers[pin.lid]; } printf_(("\treal input for %s is %s\n", layers[ld.inputBlobsId[i].lid].getLayerInstance()->name.c_str(), inp_i_data->getLayerInstance()->name.c_str())); if(inp_i_data->skip || inp_i_data->consumers.size() != 1) break; realinputs[i] = pin; } if( i >= ninputs ) { // Allocate new memory to prevent collisions during memory // reusing (see https://github.com/opencv/opencv/pull/10456). output = output.clone(); Range chrange[] = { Range::all(), Range::all(), Range::all(), Range::all() }; int ofs = 0; for( i = 0; i < ninputs; i++ ) { LayerPin pin = realinputs[i]; LayerData* inp_i_data = &layers[pin.lid]; int channels_i = ld.inputBlobs[i]->size[1]; chrange[1] = Range(ofs, ofs + channels_i); printf_(("\toutput %s(%d) to channels (%d, %d)\n", inp_i_data->layerInstance->name.c_str(), pin.oid, ofs, ofs + channels_i)); ofs += channels_i; Mat output_slice = output(chrange); Mat& curr_output = inp_i_data->outputBlobs[pin.oid]; CV_Assert(output_slice.isContinuous() && output_slice.size == curr_output.size); Mat* oldPtr = &curr_output; curr_output = output_slice; // Layers that refer old input Mat will refer to the // new data but the same Mat object. CV_Assert(curr_output.data == output_slice.data, oldPtr == &curr_output); } ld.skip = true; printf_(("\toptimized out Concat layer %s\n", concatLayer->name.c_str())); } } } } } void allocateLayers(const std::vector& blobsToKeep_) { CV_TRACE_FUNCTION(); MapIdToLayerData::iterator it; for (it = layers.begin(); it != layers.end(); it++) it->second.flag = 0; CV_Assert(!layers[0].outputBlobs.empty()); ShapesVec inputShapes; for(int i = 0; i < layers[0].outputBlobs.size(); i++) { CV_Assert(layers[0].outputBlobs[i].total()); inputShapes.push_back(shape(layers[0].outputBlobs[i])); } LayersShapesMap layersShapes; getLayersShapes(inputShapes, layersShapes); blobManager.reset(); backendWrappers.clear(); // Fake references to input blobs. for (int i = 0; i < layers[0].outputBlobs.size(); ++i) blobManager.addReference(LayerPin(0, i)); for (it = layers.begin(); it != layers.end(); ++it) { const LayerData& ld = it->second; blobManager.addReferences(ld.inputBlobsId); } for (int i = 0; i < blobsToKeep_.size(); i++) { blobManager.addReference(blobsToKeep_[i]); } for (it = layers.begin(); it != layers.end(); it++) { int lid = it->first; allocateLayer(lid, layersShapes); } layersTimings.resize(lastLayerId + 1, 0); fuseLayers(blobsToKeep_); } void forwardLayer(LayerData &ld) { CV_TRACE_FUNCTION(); Ptr layer = ld.layerInstance; TickMeter tm; tm.start(); if (preferableBackend == DNN_BACKEND_DEFAULT || !layer->supportBackend(preferableBackend)) { if( !ld.skip ) { if (preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_OPENCL) { std::vector umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); layer->forward(OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers), umat_outputBlobs, OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers)); OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs); } else { for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i) { if (!ld.inputBlobsWrappers[i].empty()) ld.inputBlobsWrappers[i]->copyToHost(); } layer->forward(ld.inputBlobs, ld.outputBlobs, ld.internals); for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i) { if (!ld.outputBlobsWrappers[i].empty()) ld.outputBlobsWrappers[i]->setHostDirty(); } } } else tm.reset(); } else if (!ld.skip) { Ptr node = ld.backendNodes[preferableBackend]; if (preferableBackend == DNN_BACKEND_HALIDE) { forwardHalide(ld.outputBlobsWrappers, node); } else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE) { forwardInfEngine(node); } else { CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); } } tm.stop(); layersTimings[ld.id] = tm.getTimeTicks(); ld.flag = 1; } void forwardToLayer(LayerData &ld, bool clearFlags = true) { CV_TRACE_FUNCTION(); if (clearFlags) { MapIdToLayerData::iterator it; for (it = layers.begin(); it != layers.end(); it++) it->second.flag = 0; } //already was forwarded if (ld.flag) return; //forward parents MapIdToLayerData::iterator it; for (it = layers.begin(); it != layers.end() && (it->second.id < ld.id); ++it) { LayerData &ld = it->second; if (ld.flag) continue; forwardLayer(ld); } //forward itself forwardLayer(ld); } void forwardAll() { CV_TRACE_FUNCTION(); MapIdToLayerData::reverse_iterator last_layer = layers.rbegin(); CV_Assert(last_layer != layers.rend()); forwardToLayer(last_layer->second, true); } void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes) { std::vector& inputLayerIds = layers[id].inputBlobsId; if (inOutShapes[id].in.empty()) { for(int i = 0; i < inputLayerIds.size(); i++) { int layerId = inputLayerIds[i].lid; LayersShapesMap::iterator it = inOutShapes.find(layerId); if(it == inOutShapes.end() || it->second.out.empty()) { getLayerShapesRecursively(layerId, inOutShapes); } const MatShape& shape = inOutShapes[layerId].out[inputLayerIds[i].oid]; inOutShapes[id].in.push_back(shape); } } const ShapesVec& is = inOutShapes[id].in; ShapesVec& os = inOutShapes[id].out; ShapesVec& ints = inOutShapes[id].internal; int requiredOutputs = layers[id].requiredOutputs.size(); inOutShapes[id].supportInPlace = layers[id].getLayerInstance()->getMemoryShapes(is, requiredOutputs, os, ints); } void getLayersShapes(const ShapesVec& netInputShapes, LayersShapesMap& inOutShapes) { inOutShapes.clear(); inOutShapes[0].in = netInputShapes; //insert shape for first input layer for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) { getLayerShapesRecursively(it->first, inOutShapes); } } void getLayerShapes(const ShapesVec& netInputShapes, const int layerId, LayerShapes& shapes) { LayersShapesMap inOutShapes; inOutShapes[0].in = netInputShapes; //insert shape for first input layer getLayerShapesRecursively(layerId, inOutShapes); shapes = inOutShapes[layerId]; } LayerPin getLatestLayerPin(const std::vector& pins) { return *std::max_element(pins.begin(), pins.end()); } Mat getBlob(const LayerPin& pin) { CV_TRACE_FUNCTION(); if (!pin.valid()) CV_Error(Error::StsObjectNotFound, "Requested blob not found"); LayerData &ld = layers[pin.lid]; if ((size_t)pin.oid >= ld.outputBlobs.size()) { CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, " "the #%d was requested", ld.name.c_str(), ld.outputBlobs.size(), pin.oid)); } if (preferableTarget != DNN_TARGET_CPU) { CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty()); // Transfer data to CPU if it's require. ld.outputBlobsWrappers[pin.oid]->copyToHost(); } return ld.outputBlobs[pin.oid]; } Mat getBlob(String outputName) { return getBlob(getPinByAlias(outputName)); } }; Net::Net() : impl(new Net::Impl) { } Net Net::readFromModelOptimizer(const String& xml, const String& bin) { #ifndef HAVE_INF_ENGINE CV_ErrorNoReturn(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer."); #else InferenceEngine::CNNNetReader reader; reader.ReadNetwork(xml); reader.ReadWeights(bin); InferenceEngine::CNNNetwork ieNet = reader.getNetwork(); std::vector inputsNames; for (auto& it : ieNet.getInputsInfo()) { inputsNames.push_back(it.first); } Net cvNet; cvNet.setInputsNames(inputsNames); Ptr backendNode(new InfEngineBackendNode(0)); backendNode->net = Ptr(new InfEngineBackendNet(ieNet)); for (auto& it : ieNet.getOutputsInfo()) { LayerParams lp; int lid = cvNet.addLayer(it.first, "", lp); LayerData& ld = cvNet.impl->layers[lid]; ld.layerInstance = Ptr(new InfEngineBackendLayer(it.second)); ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE] = backendNode; cvNet.connect(0, 0, lid, 0); } cvNet.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE); cvNet.impl->skipInfEngineInit = true; return cvNet; #endif // HAVE_INF_ENGINE } Net::~Net() { } int Net::addLayer(const String &name, const String &type, LayerParams ¶ms) { CV_TRACE_FUNCTION(); if (name.find('.') != String::npos) { CV_Error(Error::StsBadArg, "Added layer name \"" + name + "\" must not contain dot symbol"); return -1; } if (impl->getLayerId(name) >= 0) { CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net"); return -1; } int id = ++impl->lastLayerId; impl->layerNameToId.insert(std::make_pair(name, id)); impl->layers.insert(std::make_pair(id, LayerData(id, name, type, params))); return id; } int Net::addLayerToPrev(const String &name, const String &type, LayerParams ¶ms) { CV_TRACE_FUNCTION(); int prvLid = impl->lastLayerId; int newLid = this->addLayer(name, type, params); this->connect(prvLid, 0, newLid, 0); return newLid; } void Net::connect(int outLayerId, int outNum, int inpLayerId, int inpNum) { CV_TRACE_FUNCTION(); impl->connect(outLayerId, outNum, inpLayerId, inpNum); } void Net::connect(String _outPin, String _inPin) { CV_TRACE_FUNCTION(); LayerPin outPin = impl->getPinByAlias(_outPin); LayerPin inpPin = impl->getPinByAlias(_inPin); CV_Assert(outPin.valid() && inpPin.valid()); impl->connect(outPin.lid, outPin.oid, inpPin.lid, inpPin.oid); } Mat Net::forward(const String& outputName) { CV_TRACE_FUNCTION(); String layerName = outputName; if (layerName.empty()) layerName = getLayerNames().back(); impl->setUpNet(); impl->forwardToLayer(impl->getLayerData(layerName)); return impl->getBlob(layerName); } void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName) { CV_TRACE_FUNCTION(); impl->setUpNet(); String layerName = outputName; if (layerName.empty()) layerName = getLayerNames().back(); impl->forwardToLayer(impl->getLayerData(layerName)); LayerPin pin = impl->getPinByAlias(layerName); LayerData &ld = impl->layers[pin.lid]; if (outputBlobs.isUMat()) { outputBlobs.assign(ld.outputBlobs[pin.oid].getUMat(ACCESS_RW)); } else if (outputBlobs.isMat()) { outputBlobs.assign(impl->getBlob(layerName)); } else if (outputBlobs.isMatVector()) { if (impl->preferableTarget != DNN_TARGET_CPU) { for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) { CV_Assert(!ld.outputBlobsWrappers[i].empty()); ld.outputBlobsWrappers[i]->copyToHost(); } } std::vector & outputvec = *(std::vector *)outputBlobs.getObj(); outputvec = ld.outputBlobs; } else if (outputBlobs.isUMatVector()) { std::vector & outputvec = *(std::vector *)outputBlobs.getObj(); if (impl->preferableBackend == DNN_BACKEND_DEFAULT && impl->preferableTarget == DNN_TARGET_OPENCL) { outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); } else { outputvec.resize(ld.outputBlobs.size()); for (int i = 0; i < outputvec.size(); ++i) outputvec[i] = ld.outputBlobs[i].getUMat(ACCESS_RW); } } } void Net::forward(OutputArrayOfArrays outputBlobs, const std::vector& outBlobNames) { CV_TRACE_FUNCTION(); std::vector pins; for (int i = 0; i < outBlobNames.size(); i++) { pins.push_back(impl->getPinByAlias(outBlobNames[i])); } impl->setUpNet(pins); LayerPin out = impl->getLatestLayerPin(pins); impl->forwardToLayer(impl->getLayerData(out.lid)); std::vector matvec; for (int i = 0; i < pins.size(); i++) { matvec.push_back(impl->getBlob(pins[i])); } std::vector & outputvec = *(std::vector *)outputBlobs.getObj(); outputvec = matvec; } void Net::forward(std::vector >& outputBlobs, const std::vector& outBlobNames) { CV_TRACE_FUNCTION(); std::vector pins; for (int i = 0; i < outBlobNames.size(); i++) { std::vector lp = impl->getLayerOutPins(outBlobNames[i]); pins.insert(pins.end(), lp.begin(), lp.end()); } impl->setUpNet(pins); LayerPin out = impl->getLatestLayerPin(pins); impl->forwardToLayer(impl->getLayerData(out.lid)); outputBlobs.resize(outBlobNames.size()); for (int i = 0; i < outBlobNames.size(); i++) { std::vector lp = impl->getLayerOutPins(outBlobNames[i]); for (int i = 0; i < lp.size(); i++) { outputBlobs[i].push_back(impl->getBlob(lp[i])); } } } void Net::setPreferableBackend(int backendId) { CV_TRACE_FUNCTION(); CV_TRACE_ARG(backendId); if( impl->preferableBackend != backendId ) { impl->preferableBackend = backendId; impl->netWasAllocated = false; impl->clear(); } } void Net::setPreferableTarget(int targetId) { CV_TRACE_FUNCTION(); CV_TRACE_ARG(targetId); if( impl->preferableTarget != targetId ) { impl->preferableTarget = targetId; impl->netWasAllocated = false; impl->clear(); } } void Net::setInputsNames(const std::vector &inputBlobNames) { CV_TRACE_FUNCTION(); impl->netInputLayer->setNames(inputBlobNames); } void Net::setInput(InputArray blob, const String& name) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); LayerPin pin; pin.lid = 0; pin.oid = impl->resolvePinOutputName(impl->getLayerData(pin.lid), name); if (!pin.valid()) CV_Error(Error::StsObjectNotFound, "Requested blob \"" + name + "\" not found"); LayerData &ld = impl->layers[pin.lid]; ld.outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) ); ld.outputBlobsWrappers.resize(ld.outputBlobs.size()); MatShape prevShape = shape(ld.outputBlobs[pin.oid]); Mat blob_ = blob.getMat(); bool oldShape = prevShape == shape(blob_); if (oldShape) { blob_.copyTo(ld.outputBlobs[pin.oid]); } else { ld.outputBlobs[pin.oid] = blob_.clone(); } if (!ld.outputBlobsWrappers[pin.oid].empty()) { ld.outputBlobsWrappers[pin.oid]->setHostDirty(); } impl->netWasAllocated = impl->netWasAllocated && oldShape; } Mat Net::getParam(LayerId layer, int numParam) { LayerData &ld = impl->getLayerData(layer); std::vector &layerBlobs = ld.layerInstance->blobs; CV_Assert(numParam < (int)layerBlobs.size()); return layerBlobs[numParam]; } void Net::setParam(LayerId layer, int numParam, const Mat &blob) { LayerData &ld = impl->getLayerData(layer); std::vector &layerBlobs = ld.layerInstance->blobs; CV_Assert(numParam < (int)layerBlobs.size()); //we don't make strong checks, use this function carefully layerBlobs[numParam] = blob; } int Net::getLayerId(const String &layer) { return impl->getLayerId(layer); } void Net::deleteLayer(LayerId) { CV_Error(Error::StsNotImplemented, ""); } Ptr Net::getLayer(LayerId layerId) { LayerData &ld = impl->getLayerData(layerId); return ld.getLayerInstance(); } std::vector > Net::getLayerInputs(LayerId layerId) { LayerData &ld = impl->getLayerData(layerId); if (!ld.layerInstance) CV_Error(Error::StsNullPtr, format("Requested layer \"%s\" was not initialized", ld.name.c_str())); std::vector > inputLayers; inputLayers.reserve(ld.inputLayersId.size()); std::set::iterator it; for (it = ld.inputLayersId.begin(); it != ld.inputLayersId.end(); ++it) { inputLayers.push_back(getLayer(*it)); } return inputLayers; } std::vector Net::getLayerNames() const { std::vector res; res.reserve(impl->layers.size()); Impl::MapIdToLayerData::iterator it; for (it = impl->layers.begin(); it != impl->layers.end(); it++) { if (it->second.id) //skip Data layer res.push_back(it->second.name); } return res; } bool Net::empty() const { return impl->layers.size() <= 1; //first layer is default Data layer } std::vector Net::getUnconnectedOutLayers() const { std::vector layersIds; Impl::MapIdToLayerData::iterator it; for (it = impl->layers.begin(); it != impl->layers.end(); it++) { int lid = it->first; LayerData &ld = it->second; if (ld.requiredOutputs.size() == 0) layersIds.push_back(lid); } return layersIds; } void Net::getLayersShapes(const ShapesVec& netInputShapes, std::vector& layersIds, std::vector& inLayersShapes, std::vector& outLayersShapes) const { layersIds.clear(); inLayersShapes.clear(); outLayersShapes.clear(); Impl::LayersShapesMap inOutShapes; impl->getLayersShapes(netInputShapes, inOutShapes); for(Impl::LayersShapesMap::const_iterator it = inOutShapes.begin(); it != inOutShapes.end(); it++) { layersIds.push_back(it->first); inLayersShapes.push_back(it->second.in); outLayersShapes.push_back(it->second.out); } } void Net::getLayersShapes(const MatShape& netInputShape, std::vector& layerIds, std::vector& inLayersShapes, std::vector& outLayersShapes) const { getLayersShapes(ShapesVec(1, netInputShape), layerIds, inLayersShapes, outLayersShapes); } void Net::getLayerShapes(const MatShape& netInputShape, const int layerId, ShapesVec& inLayerShapes, ShapesVec& outLayerShapes) const { getLayerShapes(ShapesVec(1, netInputShape), layerId, inLayerShapes, outLayerShapes); } void Net::getLayerShapes(const ShapesVec& netInputShapes, const int layerId, ShapesVec& inLayerShapes, ShapesVec& outLayerShapes) const { LayerShapes shapes; impl->getLayerShapes(netInputShapes, layerId, shapes); inLayerShapes = shapes.in; outLayerShapes = shapes.out; } int64 Net::getFLOPS(const std::vector& netInputShapes) const { CV_TRACE_FUNCTION(); int64 flops = 0; std::vector ids; std::vector > inShapes, outShapes; getLayersShapes(netInputShapes, ids, inShapes, outShapes); CV_Assert(inShapes.size() == outShapes.size()); CV_Assert(inShapes.size() == ids.size()); for(int i = 0; i < ids.size(); i++) { flops += impl->layers[ids[i]].getLayerInstance()->getFLOPS(inShapes[i], outShapes[i]); } return flops; } int64 Net::getFLOPS(const MatShape& netInputShape) const { return getFLOPS(std::vector(1, netInputShape)); } int64 Net::getFLOPS(const int layerId, const std::vector& netInputShapes) const { Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId); CV_Assert(layer != impl->layers.end()); LayerShapes shapes; impl->getLayerShapes(netInputShapes, layerId, shapes); return layer->second.getLayerInstance()->getFLOPS(shapes.in, shapes.out); } int64 Net::getFLOPS(const int layerId, const MatShape& netInputShape) const { return getFLOPS(layerId, std::vector(1, netInputShape)); } void Net::getLayerTypes(std::vector& layersTypes) const { layersTypes.clear(); std::map layers; for (Impl::MapIdToLayerData::iterator it = impl->layers.begin(); it != impl->layers.end(); it++) { if (layers.find(it->second.type) == layers.end()) layers[it->second.type] = 0; layers[it->second.type]++; } for (std::map::iterator it = layers.begin(); it != layers.end(); it++) { layersTypes.push_back(it->first); } } int Net::getLayersCount(const String& layerType) const { int count = 0; for (Impl::MapIdToLayerData::iterator it = impl->layers.begin(); it != impl->layers.end(); it++) { if (it->second.type == layerType) count++; } return count; } void Net::getMemoryConsumption(const int layerId, const std::vector& netInputShapes, size_t& weights, size_t& blobs) const { CV_TRACE_FUNCTION(); Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId); CV_Assert(layer != impl->layers.end()); weights = blobs = 0; for(int i = 0; i < layer->second.params.blobs.size(); i++) { const Mat& weightsBlob = layer->second.params.blobs[i]; weights += weightsBlob.total()*weightsBlob.elemSize(); } ShapesVec inLayerShapes, outLayerShapes; getLayerShapes(netInputShapes, layerId, inLayerShapes, outLayerShapes); for(int i = 0; i < outLayerShapes.size(); i++) { blobs += total(outLayerShapes[i]) * sizeof(float); } } void Net::getMemoryConsumption(const std::vector& netInputShapes, size_t& weights, size_t& blobs) const { CV_TRACE_FUNCTION(); std::vector layerIds; std::vector w, b; getMemoryConsumption(netInputShapes, layerIds, w, b); weights = blobs = 0; for(int i = 0; i < layerIds.size(); i++) { weights += w[i]; blobs += b[i]; } } void Net::getMemoryConsumption(const int layerId, const MatShape& netInputShape, size_t& weights, size_t& blobs) const { getMemoryConsumption(layerId, std::vector(1, netInputShape), weights, blobs); } void Net::getMemoryConsumption(const MatShape& netInputShape, size_t& weights, size_t& blobs) const { getMemoryConsumption(std::vector(1, netInputShape), weights, blobs); } void Net::getMemoryConsumption(const std::vector& netInputShapes, std::vector& layerIds, std::vector& weights, std::vector& blobs) const { CV_TRACE_FUNCTION(); layerIds.clear(); weights.clear(); blobs.clear(); std::vector > inLayerShapes, outLayerShapes; getLayersShapes(netInputShapes, layerIds, inLayerShapes, outLayerShapes); for(int i = 0; i < layerIds.size(); i++) { int w = 0, b = 0; Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerIds[i]); CV_Assert(layer != impl->layers.end()); for(int j = 0; j < layer->second.params.blobs.size(); j++) { const Mat& weightsBlob = layer->second.params.blobs[j]; w += weightsBlob.total()*weightsBlob.elemSize(); } for(int j = 0; j < outLayerShapes[i].size(); j++) { b += total(outLayerShapes[i][j]) * sizeof(float); } weights.push_back(w); blobs.push_back(b); } } void Net::getMemoryConsumption(const MatShape& netInputShape, std::vector& layerIds, std::vector& weights, std::vector& blobs) const { getMemoryConsumption(std::vector(1, netInputShape), layerIds, weights, blobs); } void Net::enableFusion(bool fusion) { if( impl->fusion != fusion ) { impl->fusion = fusion; impl->netWasAllocated = false; impl->clear(); } } void Net::setHalideScheduler(const String& scheduler) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(scheduler, "scheduler", scheduler.c_str()); impl->halideConfigFile = scheduler; } int64 Net::getPerfProfile(std::vector& timings) { timings = std::vector(impl->layersTimings.begin() + 1, impl->layersTimings.end()); int64 total = std::accumulate(timings.begin(), timings.end(), 0); return total; } ////////////////////////////////////////////////////////////////////////// Layer::Layer() { preferableTarget = DNN_TARGET_CPU; } Layer::Layer(const LayerParams ¶ms) : blobs(params.blobs), name(params.name), type(params.type) { preferableTarget = DNN_TARGET_CPU; } void Layer::setParamsFrom(const LayerParams ¶ms) { blobs = params.blobs; name = params.name; type = params.type; } int Layer::inputNameToIndex(String) { return -1; } int Layer::outputNameToIndex(const String&) { return -1; } bool Layer::supportBackend(int backendId) { return backendId == DNN_BACKEND_DEFAULT; } Ptr Layer::initHalide(const std::vector > &) { CV_Error(Error::StsNotImplemented, "Halide pipeline of " + type + " layers is not defined."); return Ptr(); } Ptr Layer::initInfEngine(const std::vector > &) { CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type + " layers is not defined."); return Ptr(); } void Layer::applyHalideScheduler(Ptr& node, const std::vector &inputs, const std::vector &outputs, int targetId) const { #ifdef HAVE_HALIDE CV_TRACE_FUNCTION(); Halide::Var x("x"), y("y"), c("c"), n("n"), co("co"), ci("ci"), xo("xo"), xi("xi"), yo("yo"), yi("yi"), tile("tile"); Halide::Func& top = node.dynamicCast()->funcs.back(); int outW, outH, outC, outN; getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN); if (targetId == DNN_TARGET_CPU) { if (outW == 1 && outH == 1) { if (outC + outN == 1) return; if (outC > 8) top.split(c, co, ci, 8) .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile) .parallel(tile) .vectorize(ci, 8); else top.fuse(x, y, tile).fuse(c, tile, tile).fuse(n, tile, tile) .parallel(tile); } else { if (outH > 2) { top.reorder(x, c, y) .split(y, yo, yi, 2) .fuse(yo, n, tile) .parallel(tile) .unroll(yi) .vectorize(x, outW >= 16 ? 16 : outW); } } } else if (targetId == DNN_TARGET_OPENCL) { int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : outC; if (outW == 1 && outH == 1) { top.split(c, co, ci, c_split) .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile) .gpu_blocks(tile) .gpu_threads(ci); } else { int x_split = outW > 8 ? (outW >= 32 ? 16 : 8) : outW; int y_split = outH > 8 ? (outH >= 32 ? 16 : 8) : outH; top.split(x, xo, xi, x_split).split(y, yo, yi, y_split) .split(c, co, ci, c_split) .gpu_blocks(xo, yo, co) .gpu_threads(xi, yi) .reorder(xi, yi, ci, xo, yo, co) .vectorize(ci); } } else CV_Error(Error::StsNotImplemented, "Unknown target identifier"); #endif // HAVE_HALIDE } Ptr Layer::tryAttach(const Ptr& node) { return Ptr(); } bool Layer::setActivation(const Ptr&) { return false; } bool Layer::tryFuse(Ptr&) { return false; } void Layer::getScaleShift(Mat& scale, Mat& shift) const { scale = Mat(); shift = Mat(); } void Layer::unsetAttached() { setActivation(Ptr()); } template static void vecToPVec(const std::vector &v, std::vector &pv) { pv.resize(v.size()); for (size_t i = 0; i < v.size(); i++) pv[i] = const_cast(&v[i]); } void Layer::finalize(const std::vector &inputs, std::vector &outputs) { CV_TRACE_FUNCTION(); std::vector inputsp; vecToPVec(inputs, inputsp); this->finalize(inputsp, outputs); } void Layer::finalize(const std::vector &input, std::vector &output) { (void)input;(void)output; } std::vector Layer::finalize(const std::vector &inputs) { CV_TRACE_FUNCTION(); std::vector outputs; this->finalize(inputs, outputs); return outputs; } void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); std::vector inpvec; std::vector outputs; std::vector internals; inputs_arr.getMatVector(inpvec); outputs_arr.getMatVector(outputs); internals_arr.getMatVector(internals); std::vector inputs(inpvec.size()); for (int i = 0; i < inpvec.size(); i++) inputs[i] = &inpvec[i]; this->forward(inputs, outputs, internals); // sync results back outputs_arr.assign(outputs); internals_arr.assign(internals); } void Layer::run(const std::vector &inputs, std::vector &outputs, std::vector &internals) { CV_TRACE_FUNCTION(); std::vector inputsp; vecToPVec(inputs, inputsp); this->finalize(inputsp, outputs); this->forward(inputsp, outputs, internals); } Layer::~Layer() {} bool Layer::getMemoryShapes(const std::vector &inputs, const int requiredOutputs, std::vector &outputs, std::vector &internals) const { CV_Assert(inputs.size()); outputs.assign(std::max(requiredOutputs, (int)inputs.size()), inputs[0]); return false; } ////////////////////////////////////////////////////////////////////////// static Mutex& getLayerFactoryMutex() { static Mutex* volatile instance = NULL; if (instance == NULL) { cv::AutoLock lock(getInitializationMutex()); if (instance == NULL) instance = new Mutex(); } return *instance; } typedef std::map LayerFactory_Impl; static LayerFactory_Impl& getLayerFactoryImpl_() { static LayerFactory_Impl impl; return impl; } static LayerFactory_Impl& getLayerFactoryImpl() { static LayerFactory_Impl* volatile instance = NULL; if (instance == NULL) { cv::AutoLock lock(getLayerFactoryMutex()); if (instance == NULL) { instance = &getLayerFactoryImpl_(); initializeLayerFactory(); } } return *instance; } void LayerFactory::registerLayer(const String &type, Constuctor constructor) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(type, "type", type.c_str()); cv::AutoLock lock(getLayerFactoryMutex()); String type_ = type.toLowerCase(); LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type_); if (it != getLayerFactoryImpl().end() && it->second != constructor) { CV_Error(cv::Error::StsBadArg, "Layer \"" + type_ + "\" already was registered"); } getLayerFactoryImpl().insert(std::make_pair(type_, constructor)); } void LayerFactory::unregisterLayer(const String &type) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(type, "type", type.c_str()); cv::AutoLock lock(getLayerFactoryMutex()); String type_ = type.toLowerCase(); getLayerFactoryImpl().erase(type_); } Ptr LayerFactory::createLayerInstance(const String &type, LayerParams& params) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(type, "type", type.c_str()); cv::AutoLock lock(getLayerFactoryMutex()); String type_ = type.toLowerCase(); LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type_); if (it != getLayerFactoryImpl().end()) { return it->second(params); } else { return Ptr(); //NULL } } BackendNode::BackendNode(int backendId) : backendId(backendId) {} BackendNode::~BackendNode() {}; BackendWrapper::BackendWrapper(int backendId, int targetId) : backendId(backendId), targetId(targetId) {} BackendWrapper::BackendWrapper(int targetId, const cv::Mat& m) { CV_Error(Error::StsNotImplemented, "Constructor of backend wrapper must be implemented"); } BackendWrapper::BackendWrapper(const Ptr& base, const MatShape& shape) { CV_Error(Error::StsNotImplemented, "Constructor of backend wrapper must be implemented"); } BackendWrapper::~BackendWrapper() {} Net readNet(const String& _model, const String& _config, const String& _framework) { String framework = _framework.toLowerCase(); String model = _model; String config = _config; const std::string modelExt = model.substr(model.rfind('.') + 1); const std::string configExt = config.substr(config.rfind('.') + 1); if (framework == "caffe" || modelExt == "caffemodel" || configExt == "caffemodel" || modelExt == "prototxt" || configExt == "prototxt") { if (modelExt == "prototxt" || configExt == "caffemodel") std::swap(model, config); return readNetFromCaffe(config, model); } if (framework == "tensorflow" || modelExt == "pb" || configExt == "pb" || modelExt == "pbtxt" || configExt == "pbtxt") { if (modelExt == "pbtxt" || configExt == "pb") std::swap(model, config); return readNetFromTensorflow(model, config); } if (framework == "torch" || modelExt == "t7" || modelExt == "net" || configExt == "t7" || configExt == "net") { return readNetFromTorch(model.empty() ? config : model); } if (framework == "darknet" || modelExt == "weights" || configExt == "weights" || modelExt == "cfg" || configExt == "cfg") { if (modelExt == "cfg" || configExt == "weights") std::swap(model, config); return readNetFromDarknet(config, model); } if (framework == "dldt" || modelExt == "bin" || configExt == "bin" || modelExt == "xml" || configExt == "xml") { if (modelExt == "xml" || configExt == "bin") std::swap(model, config); return readNetFromModelOptimizer(config, model); } CV_ErrorNoReturn(Error::StsError, "Cannot determine an origin framework of files: " + model + (config.empty() ? "" : ", " + config)); } Net readNetFromModelOptimizer(const String &xml, const String &bin) { return Net::readFromModelOptimizer(xml, bin); } CV__DNN_EXPERIMENTAL_NS_END }} // namespace