diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index dfcfa4ed20..fa26388124 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -410,13 +410,13 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN * @param outputName name for layer which output is needed to get * @details If @p outputName is empty, runs forward pass for the whole network. */ - CV_WRAP void forward(std::vector& outputBlobs, const String& outputName = String()); + CV_WRAP void forward(OutputArrayOfArrays outputBlobs, const String& outputName = String()); /** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames. * @param outputBlobs contains blobs for first outputs of specified layers. * @param outBlobNames names for layers which outputs are needed to get */ - CV_WRAP void forward(std::vector& outputBlobs, + CV_WRAP void forward(OutputArrayOfArrays outputBlobs, const std::vector& outBlobNames); /** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames. diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 0e362cfc26..2b3be0e5ae 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -1638,7 +1638,7 @@ struct Net::Impl CV_Error(Error::StsOutOfRange, "Layer \"" + ld.name + "\" produce only " + toString(ld.outputBlobs.size()) + " outputs, the #" + toString(pin.oid) + " was requsted"); } - if (preferableBackend != DNN_TARGET_CPU) + if (preferableBackend != DNN_BACKEND_DEFAULT) { // Transfer data to CPU if it's require. ld.outputBlobsWrappers[pin.oid]->copyToHost(); @@ -1654,10 +1654,35 @@ struct Net::Impl return ld.outputBlobs[pin.oid]; } + void getBlob(UMat& umat, const LayerPin& pin) + { + CV_TRACE_FUNCTION(); + + if (!pin.valid()) + CV_Error(Error::StsObjectNotFound, "Requested blob not found"); + + LayerData &ld = layers[pin.lid]; + if ((size_t)pin.oid >= ld.outputBlobs.size()) + { + CV_Error(Error::StsOutOfRange, "Layer \"" + ld.name + "\" produce only " + toString(ld.outputBlobs.size()) + + " outputs, the #" + toString(pin.oid) + " was requsted"); + } + + if (ld.umat_outputBlobs.size() > 0 && !ld.umat_outputBlobs[pin.oid].empty()) + umat = ld.umat_outputBlobs[pin.oid]; + else + umat = UMat(); + } + Mat getBlob(String outputName) { return getBlob(getPinByAlias(outputName)); } + + void getBlob(UMat& umat, String outputName) + { + getBlob(umat, getPinByAlias(outputName)); + } }; Net::Net() : impl(new Net::Impl) @@ -1735,7 +1760,7 @@ Mat Net::forward(const String& outputName) return impl->getBlob(layerName); } -void Net::forward(std::vector& outputBlobs, const String& outputName) +void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName) { CV_TRACE_FUNCTION(); @@ -1751,16 +1776,40 @@ void Net::forward(std::vector& outputBlobs, const String& outputName) LayerPin pin = impl->getPinByAlias(layerName); LayerData &ld = impl->layers[pin.lid]; - if (ld.umat_outputBlobs.size() > 0) + if (outputBlobs.isUMat()) { - for (int i = 0; i < ld.umat_outputBlobs.size(); i++) - ld.umat_outputBlobs[i].copyTo(ld.outputBlobs[i]); + if (ld.umat_outputBlobs.size() > 0) + { + UMat umat; + impl->getBlob(umat, layerName); + outputBlobs.assign(umat); + } + } + else if (outputBlobs.isMat()) + { + outputBlobs.assign(impl->getBlob(layerName)); + } + else if (outputBlobs.isMatVector()) + { + if (ld.umat_outputBlobs.size() > 0) + { + for (int i = 0; i < ld.umat_outputBlobs.size(); i++) + ld.umat_outputBlobs[i].copyTo(ld.outputBlobs[i]); + } + std::vector & outputvec = *(std::vector *)outputBlobs.getObj(); + outputvec = ld.outputBlobs; + } + else if (outputBlobs.isUMatVector()) + { + if (ld.umat_outputBlobs.size() > 0) + { + std::vector & outputvec = *(std::vector *)outputBlobs.getObj(); + outputvec = ld.umat_outputBlobs; + } } - - outputBlobs = ld.outputBlobs; } -void Net::forward(std::vector& outputBlobs, +void Net::forward(OutputArrayOfArrays outputBlobs, const std::vector& outBlobNames) { CV_TRACE_FUNCTION(); @@ -1768,7 +1817,7 @@ void Net::forward(std::vector& outputBlobs, std::vector pins; for (int i = 0; i < outBlobNames.size(); i++) { - pins.push_back(impl->getPinByAlias(outBlobNames[i])); + pins.push_back(impl->getPinByAlias(outBlobNames[i])); } impl->setUpNet(pins); @@ -1777,11 +1826,14 @@ void Net::forward(std::vector& outputBlobs, impl->forwardToLayer(impl->getLayerData(out.lid)); - outputBlobs.clear(); + std::vector matvec; for (int i = 0; i < pins.size(); i++) { - outputBlobs.push_back(impl->getBlob(pins[i])); + matvec.push_back(impl->getBlob(pins[i])); } + + std::vector & outputvec = *(std::vector *)outputBlobs.getObj(); + outputvec = matvec; } void Net::forward(std::vector >& outputBlobs, diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp index 184e2b824f..59d8ed935b 100644 --- a/modules/dnn/src/layers/fully_connected_layer.cpp +++ b/modules/dnn/src/layers/fully_connected_layer.cpp @@ -286,8 +286,13 @@ public: UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type()); for (size_t i = 0; i < inputs.size(); i++) { - UMat& srcMat = inputs[i]; - UMat& dstMat = outputs[i]; + MatShape inshape, outshape; + inshape = shape(outerSize, innerSize); + outshape = shape(outerSize, numOutput); + + UMat srcMat, dstMat; + srcMat = inputs[i].reshape(1, inshape.size(), &inshape[0]); + dstMat = outputs[i].reshape(1, outshape.size(), &outshape[0]); dstMat.setTo(0.0f); if (!innerProductOp->Forward(srcMat, umat_blobs[0], (bias) ? umat_blobs[1] : UMat(), dstMat)) diff --git a/modules/dnn/src/ocl4dnn/src/math_functions.cpp b/modules/dnn/src/ocl4dnn/src/math_functions.cpp index 42b35572aa..5fe52ac1ba 100644 --- a/modules/dnn/src/ocl4dnn/src/math_functions.cpp +++ b/modules/dnn/src/ocl4dnn/src/math_functions.cpp @@ -65,8 +65,6 @@ ocl::Image2D ocl4dnnGEMMCopyBufferToImage(UMat buffer, int offset, int padded_width, int height, int width, int ld) { - ocl::Context ctx = ocl::Context::getDefault(); - ocl::Queue queue = ocl::Queue::getDefault(); ocl::Image2D image; if (!is_matrix_a && transpose) @@ -192,9 +190,6 @@ static bool ocl4dnnFastImageGEMM(const CBLAS_TRANSPOSE TransA, // just padding one line is enough as the sub group block read // will clamp to edge according to the spec. - ocl::Context ctx = ocl::Context::getDefault(); - ocl::Queue queue = ocl::Queue::getDefault(); - ocl::Image2D ImA; ocl::Image2D ImB; @@ -446,7 +441,6 @@ bool ocl4dnnGEMV(const CBLAS_TRANSPOSE TransA, const int32_t offx, const float beta, UMat y, const int32_t offy) { - ocl::Queue queue = ocl::Queue::getDefault(); bool ret = false; if (TransA == CblasNoTrans) @@ -507,8 +501,6 @@ bool ocl4dnnAXPY(const int32_t N, const Dtype alpha, const UMat X, const int32_t offX, UMat Y, const int32_t offY) { - ocl::Context ctx = ocl::Context::getDefault(); - ocl::Kernel oclk_axpy(CL_KERNEL_SELECT("axpy"), cv::ocl::dnn::math_oclsrc); if (oclk_axpy.empty()) return false; diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp index e072ac3a0c..6a305558eb 100644 --- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp +++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp @@ -198,8 +198,6 @@ void OCL4DNNConvSpatial::collectCommonInformation() addDef("as_Dtype2", "as_float2"); addDef("as_Dtype4", "as_float4"); addDef("as_Dtype8", "as_float8"); - addDef("Dtype_ID", (int)CV_32F); - addDef("Dtype_SIZE", (int)sizeof(Dtype)); } typedef enum { diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_lrn.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_lrn.cpp index 6cc65b7189..476d05287f 100644 --- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_lrn.cpp +++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_lrn.cpp @@ -92,7 +92,6 @@ bool OCL4DNNLRN::Forward(const UMat& bottom, UMat& top) template bool OCL4DNNLRN::crossChannelForward(const UMat& bottom, UMat& top) { - ocl::Queue queue = ocl::Queue::getDefault(); CHECK_EQ(phase_test_, true) << "Only support forward inference."; cl_uint argIdx = 0; diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp index e0bdf71e67..fe8b84b394 100644 --- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp +++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp @@ -97,7 +97,6 @@ bool OCL4DNNPool::Forward(const UMat& bottom, UMat& top_mask) { bool ret = true; - ocl::Queue queue = ocl::Queue::getDefault(); size_t global[] = { 128 * 128 }; size_t local[] = { 128 }; cl_uint argIdx = 0; diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_softmax.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_softmax.cpp index 9ac5ddc8cc..f2452ff654 100644 --- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_softmax.cpp +++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_softmax.cpp @@ -83,7 +83,6 @@ template bool OCL4DNNSoftmax::Forward(const UMat& bottom, UMat& top) { bool ret = false; - ocl::Queue queue = ocl::Queue::getDefault(); bool intel_subgroup = ocl::Device::getDefault().intelSubgroupsSupport(); if (intel_subgroup && inner_num_ < 128) { diff --git a/modules/dnn/src/opencl/conv_layer_spatial.cl b/modules/dnn/src/opencl/conv_layer_spatial.cl index c7d38117d8..91066bdbfd 100644 --- a/modules/dnn/src/opencl/conv_layer_spatial.cl +++ b/modules/dnn/src/opencl/conv_layer_spatial.cl @@ -91,7 +91,6 @@ #define LOOP(N, VAR, STMT) CAT(LOOP, N)((VAR), (STMT)) #if defined(convolve_simd) || defined(Conv_Interleaved) -#if Dtype_SIZE == 4 #define INT_TYPE uint #define INT_TYPE2 uint2 #define INT_TYPE4 uint4 @@ -100,9 +99,6 @@ #define SUB_GROUP_BLOCK_READ4 intel_sub_group_block_read4 #define SUB_GROUP_BLOCK_READ8 intel_sub_group_block_read8 #define SUB_GROUP_BLOCK_READ intel_sub_group_block_read -#else -#error "Unsupported type" -#endif #endif #ifdef KERNEL_BASIC @@ -186,11 +182,7 @@ __kernel void ConvolveBasic( #elif defined KERNEL_IDLF -#if TYPE == TYPE_HALF -#define VLOAD4(_v, _p) do { (_v).s0 = *(_p); (_v).s1 = *(_p + 1); (_v).s2 = *(_p + 2); (_v).s3 = *(_p + 3); } while(0) -#else #define VLOAD4(_v, _p) do { _v = vload4(0, _p); } while(0) -#endif // Each work-item computes a OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT region of one output map. // Each work-group (which will be mapped to 1 SIMD16/SIMD8 EU thread) will compute 16/8 different feature maps, but each feature map is for the same region of the imput image.