diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 834a9ebe84..051044af8d 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -180,6 +180,96 @@ Mat blobFromImages(const std::vector& images_, double scalefactor, Size siz return blob; } +class OpenCLBackendWrapper : public BackendWrapper +{ +public: + OpenCLBackendWrapper(Mat& m) : BackendWrapper(DNN_BACKEND_DEFAULT, DNN_TARGET_OPENCL) + { + m.copyTo(umat); + host = &m; + hostDirty = false; + } + + OpenCLBackendWrapper(const Ptr& baseBuffer, Mat& m) + : BackendWrapper(DNN_BACKEND_DEFAULT, DNN_TARGET_OPENCL) + { + Ptr base = baseBuffer.dynamicCast(); + CV_Assert(!base.empty()); + + host = &m; + + int shape[] = {1, (int)base->umat.total()}; + umat = base->umat.reshape(1, 2, &shape[0]) + .colRange(0, host->total()) + .reshape(1, host->dims, &host->size[0]); + hostDirty = false; + } + + static Ptr create(Mat& m) + { + return Ptr(new OpenCLBackendWrapper(m)); + } + + static Ptr create(const Ptr& baseBuffer, Mat& m) + { + return Ptr(new OpenCLBackendWrapper(baseBuffer, m)); + } + + static std::vector getUMatVector(const std::vector >& wrappers) + { + const int numWrappers = wrappers.size(); + std::vector mats(wrappers.size()); + for (int i = 0; i < numWrappers; ++i) + { + Ptr umatWrapper = wrappers[i].dynamicCast(); + CV_Assert(!umatWrapper.empty()); + umatWrapper->copyToDevice(); + mats[i] = umatWrapper->umat; + } + return mats; + } + + // Replaces all umats in wrappers to specific ones. + static void update(const std::vector >& wrappers, + const std::vector& umats) + { + CV_Assert(wrappers.size() == umats.size()); + for (int i = 0, n = umats.size(); i < n; ++i) + { + Ptr umatWrapper = wrappers[i].dynamicCast(); + CV_Assert(!umatWrapper.empty()); + umatWrapper->umat = umats[i]; + } + } + + ~OpenCLBackendWrapper() {} + + // Copies data from device to a host memory. + virtual void copyToHost() + { + umat.copyTo(*host); + } + + virtual void setHostDirty() + { + hostDirty = true; + }; + + void copyToDevice() + { + if (hostDirty) + { + host->copyTo(umat); + hostDirty = false; + } + } + +private: + UMat umat; + Mat* host; + bool hostDirty; +}; + struct LayerPin { int lid; @@ -233,14 +323,12 @@ struct LayerData std::vector consumers; std::vector > outputBlobsWrappers; std::vector > inputBlobsWrappers; + std::vector > internalBlobsWrappers; Ptr layerInstance; std::vector outputBlobs; std::vector inputBlobs; std::vector internals; - std::vector umat_outputBlobs; - std::vector umat_inputBlobs; - std::vector umat_internals; // Computation nodes of implemented backends (except DEFAULT). std::map > backendNodes; // Flag for skip layer computation for specific backend. @@ -418,77 +506,21 @@ public: } } - void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst) - { - if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS) - { - UMat bestBlob; - LayerPin bestBlobPin; - - std::map::iterator hostIt; - std::map::iterator refIt; - - const int targetTotal = total(shape); - int bestBlobTotal = INT_MAX; - - for (hostIt = umat_memHosts.begin(); hostIt != umat_memHosts.end(); ++hostIt) - { - refIt = refCounter.find(hostIt->first); - // Use only blobs that had references before because if not, - // it might be used as output. - if (refIt != refCounter.end() && refIt->second == 0) - { - UMat& unusedBlob = hostIt->second; - if (unusedBlob.total() >= targetTotal && - unusedBlob.total() < bestBlobTotal) - { - bestBlobPin = hostIt->first; - bestBlob = unusedBlob; - bestBlobTotal = unusedBlob.total(); - } - } - } - if (!bestBlob.empty()) - { - reuse(bestBlobPin, lp); - umat_dst.create(shape, CV_32F); - return; - } - } - - { - // if dst already has been allocated with total(shape) elements, - // it won't be recrreated and pointer of dst.data remains the same. - umat_dst.create(shape, CV_32F); - addHost(lp, umat_dst); - } - } - void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes, std::vector& pinsForInternalBlobs) { CV_TRACE_FUNCTION(); - bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT && - preferableTarget == DNN_TARGET_OPENCL); pinsForInternalBlobs.clear(); std::vector& outputBlobs = ld.outputBlobs, &internalBlobs = ld.internals; - std::vector& umat_outputBlobs = ld.umat_outputBlobs, - &umat_internalBlobs = ld.umat_internals; - const ShapesVec& outShapes = layerShapes.out, internalShapes = layerShapes.internal; outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob internalBlobs.resize(internalShapes.size()); - if (use_umat) - { - umat_outputBlobs.resize(std::max((size_t)1, outShapes.size())); - umat_internalBlobs.resize(internalShapes.size()); - } CV_Assert(ld.requiredOutputs.size() <= outShapes.size()); @@ -508,19 +540,14 @@ public: ShapesVec shapes(outShapes); shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end()); std::vector blobs; - std::vector umat_blobs; for(int i = 0; i < outputBlobs.size(); i++) { blobs.push_back(&outputBlobs[i]); - if (use_umat) - umat_blobs.push_back(&umat_outputBlobs[i]); } for(int i = 0; i < internalBlobs.size(); i++) { blobs.push_back(&internalBlobs[i]); - if (use_umat) - umat_blobs.push_back(&umat_internalBlobs[i]); if (total(internalShapes[i])) { pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i)); @@ -546,27 +573,12 @@ public: LayerPin blobPin(ld.id, index); if (index < outShapes.size() && inPlace) { - if (use_umat) - { - CV_Assert(ld.umat_inputBlobs[0].total() == total(shapes[index])); - ld.umat_outputBlobs[index] = - ld.umat_inputBlobs[0].reshape(1, shapes[index].size(), - &shapes[index][0]); - } - else - { - CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index])); - ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]); - } + CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index])); + ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]); reuse(ld.inputBlobsId[0], blobPin); } else - { - if (use_umat) - reuseOrCreate(shapes[index], blobPin, *umat_blobs[index]); - else - reuseOrCreate(shapes[index], blobPin, *blobs[index]); - } + reuseOrCreate(shapes[index], blobPin, *blobs[index]); } } } @@ -580,19 +592,6 @@ public: refCounter.clear(); reuseMap.clear(); memHosts.clear(); - umat_memHosts.clear(); - preferableTarget = DNN_TARGET_CPU; - preferableBackend = DNN_BACKEND_DEFAULT; - } - - void setPreferableTarget(int targetId) - { - preferableTarget = targetId; - } - - void setPreferableBackend(int backendId) - { - preferableBackend = backendId; } private: @@ -604,28 +603,23 @@ private: memHosts[lp] = mat; } - void addHost(const LayerPin& lp, const UMat& umat) - { - CV_Assert(umat_memHosts.find(lp) == umat_memHosts.end()); - reuseMap[lp] = lp; - umat_memHosts[lp] = umat; - } - std::map refCounter; // Maps pin to origin blob (for whom memory was allocated firstly). // For origin blobs key == value. std::map reuseMap; std::map memHosts; - std::map umat_memHosts; - int preferableTarget; - int preferableBackend; }; -static Ptr wrapMat(int backendId, int targetId, const cv::Mat& m) +static Ptr wrapMat(int backendId, int targetId, cv::Mat& m) { if (backendId == DNN_BACKEND_DEFAULT) { - return Ptr(); + if (targetId == DNN_TARGET_CPU) + return Ptr(); + else if (targetId == DNN_TARGET_OPENCL) + return OpenCLBackendWrapper::create(m); + else + CV_Error(Error::StsNotImplemented, "Unknown target identifier"); } else if (backendId == DNN_BACKEND_HALIDE) { @@ -660,8 +654,6 @@ struct Net::Impl fusion = true; preferableBackend = DNN_BACKEND_DEFAULT; preferableTarget = DNN_TARGET_CPU; - blobManager.setPreferableBackend(DNN_BACKEND_DEFAULT); - blobManager.setPreferableTarget(DNN_TARGET_CPU); } Ptr netInputLayer; @@ -682,9 +674,9 @@ struct Net::Impl bool fusion; std::vector layersTimings; - Ptr wrap(const Mat& host) + Ptr wrap(Mat& host) { - if (preferableBackend == DNN_BACKEND_DEFAULT) + if (preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_CPU) return Ptr(); MatShape shape(host.dims); @@ -695,7 +687,12 @@ struct Net::Impl if (backendWrappers.find(data) != backendWrappers.end()) { Ptr baseBuffer = backendWrappers[data]; - if (preferableBackend == DNN_BACKEND_HALIDE) + if (preferableBackend == DNN_BACKEND_DEFAULT) + { + CV_Assert(preferableTarget == DNN_TARGET_OPENCL); + return OpenCLBackendWrapper::create(baseBuffer, host); + } + else if (preferableBackend == DNN_BACKEND_HALIDE) { CV_Assert(haveHalide()); #ifdef HAVE_HALIDE @@ -771,9 +768,6 @@ struct Net::Impl it->second.inputBlobs.clear(); it->second.outputBlobs.clear(); it->second.internals.clear(); - it->second.umat_inputBlobs.clear(); - it->second.umat_outputBlobs.clear(); - it->second.umat_internals.clear(); } it->second.skipFlags.clear(); //it->second.consumers.clear(); @@ -1094,11 +1088,7 @@ struct Net::Impl allocateLayer(*i, layersShapes); //bind inputs - bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT && - preferableTarget == DNN_TARGET_OPENCL); ld.inputBlobs.resize(ninputs); - if (use_umat) - ld.umat_inputBlobs.resize(ninputs); ld.inputBlobsWrappers.resize(ninputs); for (size_t i = 0; i < ninputs; i++) { @@ -1106,8 +1096,6 @@ struct Net::Impl CV_Assert(from.valid()); CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid); ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid]; - if (use_umat) - ld.umat_inputBlobs[i] = layers[from.lid].umat_outputBlobs[from.oid]; ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid]; } @@ -1122,29 +1110,15 @@ struct Net::Impl { ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]); } + ld.internalBlobsWrappers.resize(ld.internals.size()); + for (int i = 0; i < ld.internals.size(); ++i) + { + ld.internalBlobsWrappers[i] = wrap(ld.internals[i]); + } Ptr layerPtr = ld.getLayerInstance(); { - if (use_umat) - { - std::vector input_mats(ld.umat_inputBlobs.size());; - std::vector inputs(ld.umat_inputBlobs.size());; - std::vector outputs(ld.umat_outputBlobs.size()); - for (int i = 0; i < inputs.size(); i++) - { - input_mats[i] = ld.umat_inputBlobs[i].getMat(ACCESS_READ); - inputs[i] = &input_mats[i]; - } - for (int i = 0; i < outputs.size(); i++) - { - outputs[i] = ld.umat_outputBlobs[i].getMat(ACCESS_READ); - } - layerPtr->finalize(inputs, outputs); - } - else - { - layerPtr->finalize(ld.inputBlobs, ld.outputBlobs); - } + layerPtr->finalize(ld.inputBlobs, ld.outputBlobs); layerPtr->preferableTarget = preferableTarget; #if 0 std::cout << "\toutputs:"; @@ -1221,10 +1195,8 @@ struct Net::Impl { printf_(("\tfused with %s\n", nextBNormLayer->name.c_str())); bnormData->skipFlags[DNN_BACKEND_DEFAULT] = true; - if ( preferableTarget == DNN_TARGET_OPENCL ) - ld.umat_outputBlobs = layers[lpNext.lid].umat_outputBlobs; - else - ld.outputBlobs = layers[lpNext.lid].outputBlobs; + ld.outputBlobs = layers[lpNext.lid].outputBlobs; + ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers; if( bnormData->consumers.size() == 1 ) { nextData = &layers[bnormData->consumers[0].lid]; @@ -1244,10 +1216,8 @@ struct Net::Impl { printf_(("\tfused with %s\n", nextScaleLayer->name.c_str())); scaleData->skipFlags[DNN_BACKEND_DEFAULT] = true; - if ( preferableTarget == DNN_TARGET_OPENCL ) - ld.umat_outputBlobs = layers[lpNext.lid].umat_outputBlobs; - else - ld.outputBlobs = layers[lpNext.lid].outputBlobs; + ld.outputBlobs = layers[lpNext.lid].outputBlobs; + ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers; if( scaleData->consumers.size() == 1 ) { nextData = &layers[scaleData->consumers[0].lid]; @@ -1276,10 +1246,8 @@ struct Net::Impl LayerData *activData = nextData; printf_(("\tfused with %s\n", nextActivLayer->name.c_str())); activData->skipFlags[DNN_BACKEND_DEFAULT] = true; - if ( preferableTarget == DNN_TARGET_OPENCL ) - ld.umat_outputBlobs = layers[lpNext.lid].umat_outputBlobs; - else - ld.outputBlobs = layers[lpNext.lid].outputBlobs; + ld.outputBlobs = layers[lpNext.lid].outputBlobs; + ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers; if ( preferableTarget == DNN_TARGET_OPENCL ) { @@ -1329,6 +1297,7 @@ struct Net::Impl // fuse eltwise + activation layer LayerData *firstConvLayerData = downLayerData; { + CV_Assert(eltwiseData->consumers.size() == 1); nextData = &layers[eltwiseData->consumers[0].lid]; lpNext = LayerPin(eltwiseData->consumers[0].lid, 0); Ptr nextActivLayer; @@ -1341,13 +1310,50 @@ struct Net::Impl !nextData->type.compare("Power")) && currLayer->setActivation(nextActivLayer) ) { - CV_Assert(firstConvLayerData->umat_outputBlobs.size() == 1 && ld.umat_inputBlobs.size() == 1); - ld.umat_inputBlobs.push_back(firstConvLayerData->umat_outputBlobs[0]); + CV_Assert(firstConvLayerData->outputBlobsWrappers.size() == 1 && ld.inputBlobsWrappers.size() == 1); + ld.inputBlobsWrappers.push_back(firstConvLayerData->outputBlobsWrappers[0]); printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str())); printf_(("\tfused with %s\n", nextActivLayer->name.c_str())); eltwiseData->skipFlags[DNN_BACKEND_DEFAULT] = true; nextData->skipFlags[DNN_BACKEND_DEFAULT] = true; - ld.umat_outputBlobs = layers[lpNext.lid].umat_outputBlobs; + // This optimization for cases like + // some_layer conv + // | | + // +-- eltwise --+ + // | + // activ + // This way all the element-wise computations + // (i.e. some_layer+conv or some_layer*conv) + // would be done at [conv] layer. So we need to + // replace [conv]'s output blob to [eltwise]'s one + // considering that [activ] is an in-place layer. + // Also we need to move all the consumers' references. + // To prevent memory collisions (i.e. when input of + // [conv] and output of [eltwise] is the same blob) + // we allocate a new blob. + CV_Assert(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1); + ld.outputBlobs[0] = ld.outputBlobs[0].clone(); + ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]); + + eltwiseData->outputBlobs = ld.outputBlobs; + nextData->outputBlobs = ld.outputBlobs; + eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers; + nextData->outputBlobsWrappers = ld.outputBlobsWrappers; + + // Move references of [activ] layer consumers to the newly allocated blob. + for (int i = 0; i < nextData->consumers.size(); ++i) + { + LayerData& consumer = layers[nextData->consumers[i].lid]; + for (int j = 0; j < consumer.inputBlobsId.size(); ++j) + { + if (consumer.inputBlobsId[j].lid == lpNext.lid) + { + consumer.inputBlobs[j] = &ld.outputBlobs[0]; + consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0]; + break; + } + } + } } } } @@ -1469,8 +1475,6 @@ struct Net::Impl getLayersShapes(inputShapes, layersShapes); blobManager.reset(); - blobManager.setPreferableTarget(preferableTarget); - blobManager.setPreferableBackend(preferableBackend); backendWrappers.clear(); // Fake references to input blobs. for (int i = 0; i < layers[0].outputBlobs.size(); ++i) @@ -1510,19 +1514,29 @@ struct Net::Impl { if( !ld.skipFlags[DNN_BACKEND_DEFAULT] ) { - for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i) + if (preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_OPENCL) { - if (!ld.inputBlobsWrappers[i].empty()) - ld.inputBlobsWrappers[i]->copyToHost(); + std::vector umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); + layer->forward(OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers), + umat_outputBlobs, + OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers)); + OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs); } - if (preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_OPENCL) - layer->forward(ld.umat_inputBlobs, ld.umat_outputBlobs, ld.umat_internals); else - layer->forward(ld.inputBlobs, ld.outputBlobs, ld.internals); - for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i) { - if (!ld.outputBlobsWrappers[i].empty()) - ld.outputBlobsWrappers[i]->setHostDirty(); + for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i) + { + if (!ld.inputBlobsWrappers[i].empty()) + ld.inputBlobsWrappers[i]->copyToHost(); + } + + layer->forward(ld.inputBlobs, ld.outputBlobs, ld.internals); + + for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i) + { + if (!ld.outputBlobsWrappers[i].empty()) + ld.outputBlobsWrappers[i]->setHostDirty(); + } } } else @@ -1654,51 +1668,19 @@ struct Net::Impl CV_Error(Error::StsOutOfRange, "Layer \"" + ld.name + "\" produce only " + toString(ld.outputBlobs.size()) + " outputs, the #" + toString(pin.oid) + " was requsted"); } - if (preferableBackend != DNN_BACKEND_DEFAULT) + if (preferableTarget != DNN_TARGET_CPU) { + CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty()); // Transfer data to CPU if it's require. ld.outputBlobsWrappers[pin.oid]->copyToHost(); } - else - { - CV_Assert(preferableTarget == DNN_TARGET_CPU || preferableTarget == DNN_TARGET_OPENCL); - } - - if (ld.umat_outputBlobs.size() > 0 && !ld.umat_outputBlobs[pin.oid].empty()) - ld.umat_outputBlobs[pin.oid].copyTo(ld.outputBlobs[pin.oid]); - return ld.outputBlobs[pin.oid]; } - void getBlob(UMat& umat, const LayerPin& pin) - { - CV_TRACE_FUNCTION(); - - if (!pin.valid()) - CV_Error(Error::StsObjectNotFound, "Requested blob not found"); - - LayerData &ld = layers[pin.lid]; - if ((size_t)pin.oid >= ld.outputBlobs.size()) - { - CV_Error(Error::StsOutOfRange, "Layer \"" + ld.name + "\" produce only " + toString(ld.outputBlobs.size()) + - " outputs, the #" + toString(pin.oid) + " was requsted"); - } - - if (ld.umat_outputBlobs.size() > 0 && !ld.umat_outputBlobs[pin.oid].empty()) - umat = ld.umat_outputBlobs[pin.oid]; - else - umat = UMat(); - } - Mat getBlob(String outputName) { return getBlob(getPinByAlias(outputName)); } - - void getBlob(UMat& umat, String outputName) - { - getBlob(umat, getPinByAlias(outputName)); - } }; Net::Net() : impl(new Net::Impl) @@ -1794,12 +1776,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName) if (outputBlobs.isUMat()) { - if (ld.umat_outputBlobs.size() > 0) - { - UMat umat; - impl->getBlob(umat, layerName); - outputBlobs.assign(umat); - } + outputBlobs.assign(ld.outputBlobs[pin.oid].getUMat(ACCESS_RW)); } else if (outputBlobs.isMat()) { @@ -1807,20 +1784,31 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName) } else if (outputBlobs.isMatVector()) { - if (ld.umat_outputBlobs.size() > 0) + if (impl->preferableTarget != DNN_TARGET_CPU) { - for (int i = 0; i < ld.umat_outputBlobs.size(); i++) - ld.umat_outputBlobs[i].copyTo(ld.outputBlobs[i]); + for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) + { + CV_Assert(!ld.outputBlobsWrappers[i].empty()); + ld.outputBlobsWrappers[i]->copyToHost(); + } } std::vector & outputvec = *(std::vector *)outputBlobs.getObj(); outputvec = ld.outputBlobs; } else if (outputBlobs.isUMatVector()) { - if (ld.umat_outputBlobs.size() > 0) + std::vector & outputvec = *(std::vector *)outputBlobs.getObj(); + + if (impl->preferableBackend == DNN_BACKEND_DEFAULT && + impl->preferableTarget == DNN_TARGET_OPENCL) + { + outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); + } + else { - std::vector & outputvec = *(std::vector *)outputBlobs.getObj(); - outputvec = ld.umat_outputBlobs; + outputvec.resize(ld.outputBlobs.size()); + for (int i = 0; i < outputvec.size(); ++i) + outputvec[i] = ld.outputBlobs[i].getUMat(ACCESS_RW); } } } @@ -1889,7 +1877,6 @@ void Net::setPreferableBackend(int backendId) if( impl->preferableBackend != backendId ) { impl->preferableBackend = backendId; - impl->blobManager.setPreferableBackend(backendId); impl->netWasAllocated = false; impl->clear(); } @@ -1903,7 +1890,6 @@ void Net::setPreferableTarget(int targetId) if( impl->preferableTarget != targetId ) { impl->preferableTarget = targetId; - impl->blobManager.setPreferableTarget(targetId); impl->netWasAllocated = false; impl->clear(); } @@ -1930,10 +1916,6 @@ void Net::setInput(InputArray blob, const String& name) LayerData &ld = impl->layers[pin.lid]; ld.outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) ); - bool use_umat = (impl->preferableBackend == DNN_BACKEND_DEFAULT && - impl->preferableTarget == DNN_TARGET_OPENCL); - if (use_umat) - ld.umat_outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) ); ld.outputBlobsWrappers.resize(ld.outputBlobs.size()); MatShape prevShape = shape(ld.outputBlobs[pin.oid]); Mat blob_ = blob.getMat(); @@ -1941,14 +1923,10 @@ void Net::setInput(InputArray blob, const String& name) if (oldShape) { blob_.copyTo(ld.outputBlobs[pin.oid]); - if (use_umat) - blob_.copyTo(ld.umat_outputBlobs[pin.oid]); } else { ld.outputBlobs[pin.oid] = blob_.clone(); - if (use_umat) - blob_.copyTo(ld.umat_outputBlobs[pin.oid]); } if (!ld.outputBlobsWrappers[pin.oid].empty()) diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index cd02799b62..ba6a8a287d 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -709,6 +709,10 @@ public: inps.getUMatVector(inputs); outs.getUMatVector(outputs); + CV_Assert(outputs.size() == 1); + for (int i = 0; i < inputs.size(); ++i) + CV_Assert(inputs[i].u != outputs[0].u); + int group = inputs[0].size[1] / umat_blobs[0].size[1]; if (convolutionOp.empty()) @@ -913,7 +917,9 @@ public: name.c_str(), inputs[0]->size[0], inputs[0]->size[1], inputs[0]->size[2], inputs[0]->size[3], kernel.width, kernel.height, pad.width, pad.height, stride.width, stride.height, dilation.width, dilation.height);*/ - CV_Assert(inputs.size() == (size_t)1 && inputs[0]->size[1] % blobs[0].size[1] == 0); + CV_Assert(inputs.size() == (size_t)1, inputs[0]->size[1] % blobs[0].size[1] == 0, + outputs.size() == 1, inputs[0]->data != outputs[0].data); + int ngroups = inputs[0]->size[1]/blobs[0].size[1]; CV_Assert(outputs[0].size[1] % ngroups == 0); int k, outCn = blobs[0].size[0]; diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index 09c33c69e3..febf0357c3 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -303,6 +303,14 @@ OCL_TEST(Reproducibility_ResNet50, Accuracy) Mat ref = blobFromNPY(_tf("resnet50_prob.npy")); normAssert(ref, out); + + UMat out_umat; + net.forward(out_umat); + normAssert(ref, out_umat, "out_umat"); + + std::vector out_umats; + net.forward(out_umats); + normAssert(ref, out_umats[0], "out_umat_vector"); } TEST(Reproducibility_SqueezeNet_v1_1, Accuracy) @@ -331,9 +339,14 @@ OCL_TEST(Reproducibility_SqueezeNet_v1_1, Accuracy) Mat input = blobFromImage(imread(_tf("googlenet_0.png")), 1.0f, Size(227,227), Scalar(), false); ASSERT_TRUE(!input.empty()); - net.setInput(input); + // Firstly set a wrong input blob and run the model to receive a wrong output. + net.setInput(input * 2.0f); Mat out = net.forward(); + // Then set a correct input blob to check CPU->GPU synchronization is working well. + net.setInput(input); + out = net.forward(); + Mat ref = blobFromNPY(_tf("squeezenet_v1.1_prob.npy")); normAssert(ref, out); }