diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index da0a407262..ec1cf11661 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -146,6 +146,11 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN */ virtual void copyToHost() = 0; + /** + * @brief Indicate that an actual data is on CPU. + */ + virtual void setHostDirty() = 0; + int backendId; //!< Backend identifier. int targetId; //!< Target identifier. }; diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index ee05de1604..8ddf761d2c 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -199,125 +199,6 @@ struct LayerPin } }; -// Objects of this class manages wrappers. For every CPU memory pointer and shape -// one and only wrapper. Now it support wrapping for single backend and target. -class BackendWrapManager -{ -public: - Ptr wrap(const Mat& m, int backendId, int targetId) - { - CV_TRACE_FUNCTION(); - - CV_Assert(backendId != DNN_BACKEND_DEFAULT); - - std::map >::iterator hostsIt; - // Check that the same CPU memory was previously wrapped. - hostsIt = hostWrappers.find(m.data); - if (hostsIt == hostWrappers.end()) - { - // If not wrapped before. - return (hostWrappers[m.data] = wrapHost(m, backendId, targetId)); - } - else - { - // Find if wrapper of this host and shape was created before. - std::map, Ptr >::iterator it; - std::pair key(m.data, m.size); - it = extraWrappers.find(key); - if (it == extraWrappers.end()) - { - MatShape shape(m.dims); - for (int i = 0; i < m.dims; ++i) - shape[i] = m.size.p[i]; - return (extraWrappers[key] = wrapUser(hostsIt->second, shape)); - } - else - return it->second; - } - } - - std::vector > wrap(const std::vector& mats, - int backendId, int targetId) - { - const int num = mats.size(); - std::vector > dst(num); - for (int i = 0; i < num; ++i) - { - dst[i] = wrap(*mats[i], backendId, targetId); - } - return dst; - } - - std::vector > wrap(const std::vector& mats, - int backendId, int targetId) - { - const int num = mats.size(); - std::vector > dst(num); - for (int i = 0; i < num; ++i) - { - dst[i] = wrap(mats[i], backendId, targetId); - } - return dst; - } - - void reset() - { - CV_TRACE_FUNCTION(); - - hostWrappers.clear(); - extraWrappers.clear(); - } - -private: - // Backend-specific wrapping function. - Ptr wrapHost(const Mat& m, int backendId, int targetId) - { - if (backendId == DNN_BACKEND_DEFAULT) - { - return Ptr(); - } - else if (backendId == DNN_BACKEND_HALIDE) - { - CV_Assert(haveHalide()); -#ifdef HAVE_HALIDE - return Ptr(new HalideBackendWrapper(targetId, m)); -#endif // HAVE_HALIDE - } - else - { - CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); - } - return Ptr(); - } - - // Backend-specific wrapping function. - Ptr wrapUser(const Ptr& host, const MatShape& shape) - { - int backendId = host->backendId; - if (backendId == DNN_BACKEND_DEFAULT) - { - return Ptr(); - } - else if (backendId == DNN_BACKEND_HALIDE) - { - CV_Assert(haveHalide()); -#ifdef HAVE_HALIDE - return Ptr(new HalideBackendWrapper(host, shape)); -#endif // HAVE_HALIDE - } - else - { - CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); - } - return Ptr(); - } - - // Wrappers that initialized for memory hosts (first wrapping of CPU data). - std::map > hostWrappers; - // The rest of wrappers. They initialized for non-host cv::Mat. - std::map, Ptr > extraWrappers; -}; - struct LayerData { LayerData() : id(-1), flag(0) {} @@ -340,6 +221,8 @@ struct LayerData std::set inputLayersId; std::set requiredOutputs; std::vector consumers; + std::vector > outputBlobsWrappers; + std::vector > inputBlobsWrappers; Ptr layerInstance; std::vector outputBlobs; @@ -618,6 +501,24 @@ private: std::map memHosts; }; +static Ptr wrapMat(int backendId, int targetId, const cv::Mat& m) +{ + if (backendId == DNN_BACKEND_DEFAULT) + { + return Ptr(); + } + else if (backendId == DNN_BACKEND_HALIDE) + { + CV_Assert(haveHalide()); +#ifdef HAVE_HALIDE + return Ptr(new HalideBackendWrapper(targetId, m)); +#endif // HAVE_HALIDE + } + else + CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); + return Ptr(); +} + struct Net::Impl { typedef std::map LayersShapesMap; @@ -650,8 +551,8 @@ struct Net::Impl int preferableBackend; int preferableTarget; String halideConfigFile; - // Backend-specific wrapping manager. - BackendWrapManager backendWrapper; + // Map host data to backend specific wrapper. + std::map > backendWrappers; int lastLayerId; @@ -659,6 +560,62 @@ struct Net::Impl bool fusion; std::vector layersTimings; + Ptr wrap(const Mat& host) + { + if (preferableBackend == DNN_BACKEND_DEFAULT) + return Ptr(); + + MatShape shape(host.dims); + for (int i = 0; i < host.dims; ++i) + shape[i] = host.size[i]; + + void* data = host.data; + if (backendWrappers.find(data) != backendWrappers.end()) + { + Ptr baseBuffer = backendWrappers[data]; + if (preferableBackend == DNN_BACKEND_HALIDE) + { + CV_Assert(haveHalide()); + #ifdef HAVE_HALIDE + return Ptr(new HalideBackendWrapper(baseBuffer, shape)); + #endif // HAVE_HALIDE + } + else + CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); + } + + Ptr wrapper = wrapMat(preferableBackend, preferableTarget, host); + backendWrappers[data] = wrapper; + return wrapper; + } + + class HalideCompiler : public ParallelLoopBody + { + public: + HalideCompiler(const MapIdToLayerData& layers_, int preferableTarget_) + : layers(&layers_), preferableTarget(preferableTarget_) {} + + void operator()(const Range& r) const + { + MapIdToLayerData::const_iterator it = layers->begin(); + for (int i = 0; i < r.start && it != layers->end(); ++i, ++it) {} + for (int i = r.start; i < r.end && it != layers->end(); ++i, ++it) + { + const LayerData &ld = it->second; + Ptr layer = ld.layerInstance; + bool skip = ld.skipFlags.find(DNN_BACKEND_HALIDE)->second; + if (layer->supportBackend(DNN_BACKEND_HALIDE) && !skip) + { + Ptr node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second; + dnn::compileHalide(ld.outputBlobs, node, preferableTarget); + } + } + } + private: + const MapIdToLayerData* layers; + int preferableTarget; + }; + void compileHalide() { CV_TRACE_FUNCTION(); @@ -682,10 +639,9 @@ struct Net::Impl ld.inputBlobs, ld.outputBlobs, preferableTarget); } - dnn::compileHalide(ld.outputBlobs, ld.backendNodes[DNN_BACKEND_HALIDE], - preferableTarget); } } + parallel_for_(Range(0, layers.size()), HalideCompiler(layers, preferableTarget)); } void clear() @@ -917,7 +873,6 @@ struct Net::Impl { CV_TRACE_FUNCTION(); - backendWrapper.reset(); if (preferableBackend == DNN_BACKEND_DEFAULT) { CV_Assert(preferableTarget == DNN_TARGET_CPU); @@ -967,12 +922,10 @@ struct Net::Impl } // No layers fusion. ldTop.skipFlags[preferableBackend] = false; - std::vector > inputs = - backendWrapper.wrap(ldTop.inputBlobs, preferableBackend, - preferableTarget); if (preferableBackend == DNN_BACKEND_HALIDE) { - ldTop.backendNodes[DNN_BACKEND_HALIDE] = layerTop->initHalide(inputs); + ldTop.backendNodes[DNN_BACKEND_HALIDE] = + layerTop->initHalide(ldTop.inputBlobsWrappers); baseIt = it; } else @@ -1021,12 +974,14 @@ struct Net::Impl //bind inputs ld.inputBlobs.resize(ninputs); + ld.inputBlobsWrappers.resize(ninputs); for (size_t i = 0; i < ninputs; i++) { LayerPin from = ld.inputBlobsId[i]; CV_Assert(from.valid()); CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid); ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid]; + ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid]; } LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid); @@ -1036,6 +991,11 @@ struct Net::Impl std::vector pinsForInternalBlobs; bool maximizeReuse = preferableBackend == DNN_BACKEND_HALIDE; blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs, maximizeReuse); + ld.outputBlobsWrappers.resize(ld.outputBlobs.size()); + for (int i = 0; i < ld.outputBlobs.size(); ++i) + { + ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]); + } Ptr layerPtr = ld.getLayerInstance(); { @@ -1256,6 +1216,8 @@ struct Net::Impl getLayersShapes(inputShapes, layersShapes); blobManager.reset(); + backendWrappers.clear(); + blobManager.addReference(LayerPin(0, 0)); for (it = layers.begin(); it != layers.end(); ++it) { const LayerData& ld = it->second; @@ -1291,18 +1253,28 @@ struct Net::Impl !layer->supportBackend(preferableBackend)) { if( !ld.skipFlags[DNN_BACKEND_DEFAULT] ) + { + for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i) + { + if (!ld.inputBlobsWrappers[i].empty()) + ld.inputBlobsWrappers[i]->copyToHost(); + } layer->forward(ld.inputBlobs, ld.outputBlobs, ld.internals); + for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i) + { + if (!ld.outputBlobsWrappers[i].empty()) + ld.outputBlobsWrappers[i]->setHostDirty(); + } + } else tm.reset(); } else if (!ld.skipFlags[preferableBackend]) { - std::vector > outputs = - backendWrapper.wrap(ld.outputBlobs, preferableBackend, preferableTarget); Ptr node = ld.backendNodes[preferableBackend]; if (preferableBackend == DNN_BACKEND_HALIDE) { - forwardHalide(outputs, node); + forwardHalide(ld.outputBlobsWrappers, node); } else { @@ -1423,11 +1395,10 @@ struct Net::Impl CV_Error(Error::StsOutOfRange, "Layer \"" + ld.name + "\" produce only " + toString(ld.outputBlobs.size()) + " outputs, the #" + toString(pin.oid) + " was requsted"); } - if (preferableBackend != DNN_BACKEND_DEFAULT) + if (preferableBackend != DNN_TARGET_CPU) { // Transfer data to CPU if it's require. - backendWrapper.wrap(ld.outputBlobs[pin.oid], preferableBackend, - preferableTarget)->copyToHost(); + ld.outputBlobsWrappers[pin.oid]->copyToHost(); } else { @@ -1635,6 +1606,7 @@ void Net::setInput(const Mat &blob_, const String& name) LayerData &ld = impl->layers[pin.lid]; ld.outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) ); + ld.outputBlobsWrappers.resize(ld.outputBlobs.size()); MatShape prevShape = shape(ld.outputBlobs[pin.oid]); bool oldShape = prevShape == shape(blob_); if (oldShape) @@ -1642,6 +1614,10 @@ void Net::setInput(const Mat &blob_, const String& name) else ld.outputBlobs[pin.oid] = blob_.clone(); + if (!ld.outputBlobsWrappers[pin.oid].empty()) + { + ld.outputBlobsWrappers[pin.oid]->setHostDirty(); + } impl->netWasAllocated = impl->netWasAllocated && oldShape; } diff --git a/modules/dnn/src/op_halide.cpp b/modules/dnn/src/op_halide.cpp index 51c7a57ea6..a2d4309194 100644 --- a/modules/dnn/src/op_halide.cpp +++ b/modules/dnn/src/op_halide.cpp @@ -18,11 +18,30 @@ namespace dnn { #ifdef HAVE_HALIDE +static MatShape getBufferShape(const MatShape& shape) +{ + if (shape.size() == 2 || shape.size() == 4) + { + int w, h, c, n; + getCanonicalSize(shape, &w, &h, &c, &n); + return {w, h, c, n}; + } + else + { + MatShape bufferShape(shape); + std::reverse(bufferShape.begin(), bufferShape.end()); + return bufferShape; + } +} + +static MatShape getBufferShape(const MatSize& size) +{ + return getBufferShape(MatShape(size.p, size.p + size[-1])); +} + Halide::Buffer wrapToHalideBuffer(const Mat& mat) { - int n, c, w, h; - getCanonicalSize(mat.size, &w, &h, &c, &n); - return wrapToHalideBuffer(mat, {w, h, c, n}); + return wrapToHalideBuffer(mat, getBufferShape(mat.size)); } Halide::Buffer wrapToHalideBuffer(const Mat& mat, @@ -97,11 +116,9 @@ HalideBackendWrapper::HalideBackendWrapper(const Ptr& base, : BackendWrapper(DNN_BACKEND_HALIDE, base->targetId) { managesDevMemory = false; - int w, h, c, n; - getCanonicalSize(shape, &w, &h, &c, &n); Halide::Buffer baseBuffer = halideBuffer(base); buffer = Halide::Buffer((float*)baseBuffer.raw_buffer()->host, - {w, h, c, n}); + getBufferShape(shape)); if (baseBuffer.has_device_allocation()) { buffer.raw_buffer()->device = baseBuffer.raw_buffer()->device; @@ -127,32 +144,23 @@ HalideBackendWrapper::~HalideBackendWrapper() void HalideBackendWrapper::copyToHost() { - CV_Assert(targetId == DNN_TARGET_CPU || buffer.device_dirty()); if (buffer.device_dirty()) { buffer.device_sync(); buffer.copy_to_host(); } } + +void HalideBackendWrapper::setHostDirty() +{ + buffer.set_device_dirty(false); + buffer.set_host_dirty(); +} #endif // HAVE_HALIDE -void getCanonicalSize(const MatSize& size, int* width, int* height, - int* channels, int* batch) +void getCanonicalSize(const MatSize& size, int* w, int* h, int* c, int* n) { - const int dims = size.p[-1]; - CV_Assert(dims == 2 || dims == 4); - *batch = size[0]; - *channels = size[1]; - if (dims == 4) - { - *width = size[3]; - *height = size[2]; - } - else - { - *width = 1; - *height = 1; - } + getCanonicalSize(MatShape(size.p, size.p + size[-1]), w, h, c, n); } void getCanonicalSize(const MatShape& shape, int* width, int* height, @@ -174,7 +182,7 @@ void getCanonicalSize(const MatShape& shape, int* width, int* height, } } -void compileHalide(std::vector &outputs, Ptr& node, int targetId) +void compileHalide(const std::vector &outputs, Ptr& node, int targetId) { #ifdef HAVE_HALIDE CV_Assert(!node.empty()); diff --git a/modules/dnn/src/op_halide.hpp b/modules/dnn/src/op_halide.hpp index 1e0358edfb..715293d3de 100644 --- a/modules/dnn/src/op_halide.hpp +++ b/modules/dnn/src/op_halide.hpp @@ -61,6 +61,8 @@ namespace dnn virtual void copyToHost(); + virtual void setHostDirty(); + Halide::Buffer buffer; private: @@ -80,7 +82,7 @@ namespace dnn const Ptr& node); // Compile Halide pipeline to specific target. Use outputs to set bounds of functions. - void compileHalide(std::vector &outputs, Ptr& node, int targetId); + void compileHalide(const std::vector &outputs, Ptr& node, int targetId); bool haveHalide(); } // namespace dnn diff --git a/modules/dnn/test/test_halide_layers.cpp b/modules/dnn/test/test_halide_layers.cpp index f3dd2bf2a1..79f767a134 100644 --- a/modules/dnn/test/test_halide_layers.cpp +++ b/modules/dnn/test/test_halide_layers.cpp @@ -646,6 +646,48 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Eltwise, Combine( /*num convs*/ Values(1, 2, 3), /*weighted(for sum only)*/ Bool() )); + +//////////////////////////////////////////////////////////////////////////// +// Mixed backends +//////////////////////////////////////////////////////////////////////////// +TEST(MixedBackends_Halide_Default_Halide, Accuracy) +{ + // Just a layer that supports Halide backend. + LayerParams lrn; + lrn.type = "LRN"; + lrn.name = "testLRN"; + + // Some of layers that doesn't supports Halide backend yet. + LayerParams mvn; + mvn.type = "MVN"; + mvn.name = "testMVN"; + + // Halide layer again. + LayerParams lrn2; + lrn2.type = "LRN"; + lrn2.name = "testLRN2"; + + Net net; + int lrnId = net.addLayer(lrn.name, lrn.type, lrn); + net.connect(0, 0, lrnId, 0); + net.addLayerToPrev(mvn.name, mvn.type, mvn); + net.addLayerToPrev(lrn2.name, lrn2.type, lrn2); + + Mat input({4, 3, 5, 6}, CV_32F); + randu(input, -1.0f, 1.0f); + net.setInput(input); + Mat outputDefault = net.forward().clone(); + + net.setPreferableBackend(DNN_BACKEND_HALIDE); + net.setInput(input); + Mat outputHalide = net.forward().clone(); + normAssert(outputDefault, outputHalide); + + net.setPreferableTarget(DNN_TARGET_OPENCL); + net.setInput(input); + outputHalide = net.forward().clone(); + normAssert(outputDefault, outputHalide); +} #endif // HAVE_HALIDE } // namespace cvtest diff --git a/modules/dnn/test/test_halide_nets.cpp b/modules/dnn/test/test_halide_nets.cpp index c1ac2ff0c0..ada0986d5e 100644 --- a/modules/dnn/test/test_halide_nets.cpp +++ b/modules/dnn/test/test_halide_nets.cpp @@ -62,6 +62,7 @@ static void test(const std::string& weights, const std::string& proto, netHalide.setInput(blobFromImage(input.clone(), 1.0, Size(), Scalar(), false)); normAssert(outputDefault, outputHalide, "Second run", l1, lInf); + std::cout << "." << std::endl; // Swap backends. netHalide.setPreferableBackend(DNN_BACKEND_DEFAULT); @@ -79,6 +80,20 @@ static void test(const std::string& weights, const std::string& proto, //////////////////////////////////////////////////////////////////////////////// // CPU target //////////////////////////////////////////////////////////////////////////////// +TEST(Reproducibility_MobileNetSSD_Halide, Accuracy) +{ + test(findDataFile("dnn/MobileNetSSD_deploy.caffemodel", false), + findDataFile("dnn/MobileNetSSD_deploy.prototxt", false), + "", 300, 300, "detection_out", "caffe", DNN_TARGET_CPU); +}; + +TEST(Reproducibility_SSD_Halide, Accuracy) +{ + test(findDataFile("dnn/VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel", false), + findDataFile("dnn/ssd_vgg16.prototxt", false), + "", 300, 300, "detection_out", "caffe", DNN_TARGET_CPU); +}; + TEST(Reproducibility_GoogLeNet_Halide, Accuracy) { test(findDataFile("dnn/bvlc_googlenet.caffemodel", false), @@ -126,6 +141,20 @@ TEST(Reproducibility_ENet_Halide, Accuracy) //////////////////////////////////////////////////////////////////////////////// // OpenCL target //////////////////////////////////////////////////////////////////////////////// +TEST(Reproducibility_MobileNetSSD_Halide_opencl, Accuracy) +{ + test(findDataFile("dnn/MobileNetSSD_deploy.caffemodel", false), + findDataFile("dnn/MobileNetSSD_deploy.prototxt", false), + "", 300, 300, "detection_out", "caffe", DNN_TARGET_OPENCL); +}; + +TEST(Reproducibility_SSD_Halide_opencl, Accuracy) +{ + test(findDataFile("dnn/VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel", false), + findDataFile("dnn/ssd_vgg16.prototxt", false), + "", 300, 300, "detection_out", "caffe", DNN_TARGET_OPENCL); +}; + TEST(Reproducibility_GoogLeNet_Halide_opencl, Accuracy) { test(findDataFile("dnn/bvlc_googlenet.caffemodel", false),