|
|
|
@ -233,6 +233,9 @@ struct LayerData |
|
|
|
|
std::vector<Mat> outputBlobs; |
|
|
|
|
std::vector<Mat*> inputBlobs; |
|
|
|
|
std::vector<Mat> internals; |
|
|
|
|
std::vector<UMat> umat_outputBlobs; |
|
|
|
|
std::vector<UMat> umat_inputBlobs; |
|
|
|
|
std::vector<UMat> umat_internals; |
|
|
|
|
// Computation nodes of implemented backends (except DEFAULT).
|
|
|
|
|
std::map<int, Ptr<BackendNode> > backendNodes; |
|
|
|
|
// Flag for skip layer computation for specific backend.
|
|
|
|
@ -263,6 +266,7 @@ struct DataLayer : public Layer |
|
|
|
|
{ |
|
|
|
|
void finalize(const std::vector<Mat*>&, std::vector<Mat>&) {} |
|
|
|
|
void forward(std::vector<Mat*>&, std::vector<Mat>&, std::vector<Mat> &) {} |
|
|
|
|
void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) {} |
|
|
|
|
|
|
|
|
|
int outputNameToIndex(String tgtName) |
|
|
|
|
{ |
|
|
|
@ -398,22 +402,77 @@ public: |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst, bool force) |
|
|
|
|
{ |
|
|
|
|
UMat bestBlob; |
|
|
|
|
LayerPin bestBlobPin; |
|
|
|
|
|
|
|
|
|
if( !force ) |
|
|
|
|
{ |
|
|
|
|
std::map<LayerPin, UMat>::iterator hostIt; |
|
|
|
|
std::map<LayerPin, int>::iterator refIt; |
|
|
|
|
|
|
|
|
|
const int targetTotal = total(shape); |
|
|
|
|
int bestBlobTotal = INT_MAX; |
|
|
|
|
|
|
|
|
|
for (hostIt = umat_memHosts.begin(); hostIt != umat_memHosts.end(); ++hostIt) |
|
|
|
|
{ |
|
|
|
|
refIt = refCounter.find(hostIt->first); |
|
|
|
|
// Use only blobs that had references before because if not,
|
|
|
|
|
// it might be used as output.
|
|
|
|
|
if (refIt != refCounter.end() && refIt->second == 0) |
|
|
|
|
{ |
|
|
|
|
UMat& unusedBlob = hostIt->second; |
|
|
|
|
if (unusedBlob.total() >= targetTotal && |
|
|
|
|
unusedBlob.total() < bestBlobTotal) |
|
|
|
|
{ |
|
|
|
|
bestBlobPin = hostIt->first; |
|
|
|
|
bestBlob = unusedBlob; |
|
|
|
|
bestBlobTotal = unusedBlob.total(); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
if (!bestBlob.empty()) |
|
|
|
|
{ |
|
|
|
|
reuse(bestBlobPin, lp); |
|
|
|
|
umat_dst.create(shape, CV_32F); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
// if dst already has been allocated with total(shape) elements,
|
|
|
|
|
// it won't be recrreated and pointer of dst.data remains the same.
|
|
|
|
|
umat_dst.create(shape, CV_32F); |
|
|
|
|
addHost(lp, umat_dst); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes, |
|
|
|
|
std::vector<LayerPin>& pinsForInternalBlobs, |
|
|
|
|
bool maximizeReuse) |
|
|
|
|
{ |
|
|
|
|
CV_TRACE_FUNCTION(); |
|
|
|
|
bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT && |
|
|
|
|
preferableTarget == DNN_TARGET_OPENCL); |
|
|
|
|
|
|
|
|
|
pinsForInternalBlobs.clear(); |
|
|
|
|
|
|
|
|
|
std::vector<Mat>& outputBlobs = ld.outputBlobs, |
|
|
|
|
&internalBlobs = ld.internals; |
|
|
|
|
|
|
|
|
|
std::vector<UMat>& umat_outputBlobs = ld.umat_outputBlobs, |
|
|
|
|
&umat_internalBlobs = ld.umat_internals; |
|
|
|
|
|
|
|
|
|
const ShapesVec& outShapes = layerShapes.out, |
|
|
|
|
internalShapes = layerShapes.internal; |
|
|
|
|
|
|
|
|
|
outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob
|
|
|
|
|
internalBlobs.resize(internalShapes.size()); |
|
|
|
|
if (use_umat) |
|
|
|
|
{ |
|
|
|
|
umat_outputBlobs.resize(std::max((size_t)1, outShapes.size())); |
|
|
|
|
umat_internalBlobs.resize(internalShapes.size()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
CV_Assert(ld.requiredOutputs.size() <= outShapes.size()); |
|
|
|
|
|
|
|
|
@ -433,14 +492,19 @@ public: |
|
|
|
|
ShapesVec shapes(outShapes); |
|
|
|
|
shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end()); |
|
|
|
|
std::vector<Mat*> blobs; |
|
|
|
|
std::vector<UMat*> umat_blobs; |
|
|
|
|
for(int i = 0; i < outputBlobs.size(); i++) |
|
|
|
|
{ |
|
|
|
|
blobs.push_back(&outputBlobs[i]); |
|
|
|
|
if (use_umat) |
|
|
|
|
umat_blobs.push_back(&umat_outputBlobs[i]); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
for(int i = 0; i < internalBlobs.size(); i++) |
|
|
|
|
{ |
|
|
|
|
blobs.push_back(&internalBlobs[i]); |
|
|
|
|
if (use_umat) |
|
|
|
|
umat_blobs.push_back(&umat_internalBlobs[i]); |
|
|
|
|
if (total(internalShapes[i])) |
|
|
|
|
{ |
|
|
|
|
pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i)); |
|
|
|
@ -466,13 +530,26 @@ public: |
|
|
|
|
{ |
|
|
|
|
LayerPin blobPin(ld.id, index); |
|
|
|
|
if (index < outShapes.size() && inPlace && !force) |
|
|
|
|
{ |
|
|
|
|
if (use_umat) |
|
|
|
|
{ |
|
|
|
|
CV_Assert(ld.umat_inputBlobs[0].total() == total(shapes[index])); |
|
|
|
|
ld.umat_outputBlobs[index] = |
|
|
|
|
ld.umat_inputBlobs[0].reshape(1, shapes[index].size(), |
|
|
|
|
&shapes[index][0]); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index])); |
|
|
|
|
ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]); |
|
|
|
|
} |
|
|
|
|
reuse(ld.inputBlobsId[0], blobPin); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
if (use_umat) |
|
|
|
|
reuseOrCreate(shapes[index], blobPin, *umat_blobs[index], force); |
|
|
|
|
else |
|
|
|
|
reuseOrCreate(shapes[index], blobPin, *blobs[index], force); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
@ -488,6 +565,19 @@ public: |
|
|
|
|
refCounter.clear(); |
|
|
|
|
reuseMap.clear(); |
|
|
|
|
memHosts.clear(); |
|
|
|
|
umat_memHosts.clear(); |
|
|
|
|
preferableTarget = DNN_TARGET_CPU; |
|
|
|
|
preferableBackend = DNN_BACKEND_DEFAULT; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void setPreferableTarget(int targetId) |
|
|
|
|
{ |
|
|
|
|
preferableTarget = targetId; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void setPreferableBackend(int backendId) |
|
|
|
|
{ |
|
|
|
|
preferableBackend = backendId; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private: |
|
|
|
@ -499,11 +589,21 @@ private: |
|
|
|
|
memHosts[lp] = mat; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void addHost(const LayerPin& lp, const UMat& umat) |
|
|
|
|
{ |
|
|
|
|
CV_Assert(umat_memHosts.find(lp) == umat_memHosts.end()); |
|
|
|
|
reuseMap[lp] = lp; |
|
|
|
|
umat_memHosts[lp] = umat; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
std::map<LayerPin, int> refCounter; |
|
|
|
|
// Maps pin to origin blob (for whom memory was allocated firstly).
|
|
|
|
|
// For origin blobs key == value.
|
|
|
|
|
std::map<LayerPin, LayerPin> reuseMap; |
|
|
|
|
std::map<LayerPin, Mat> memHosts; |
|
|
|
|
std::map<LayerPin, UMat> umat_memHosts; |
|
|
|
|
int preferableTarget; |
|
|
|
|
int preferableBackend; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, const cv::Mat& m) |
|
|
|
@ -654,6 +754,9 @@ struct Net::Impl |
|
|
|
|
it->second.inputBlobs.clear(); |
|
|
|
|
it->second.outputBlobs.clear(); |
|
|
|
|
it->second.internals.clear(); |
|
|
|
|
it->second.umat_inputBlobs.clear(); |
|
|
|
|
it->second.umat_outputBlobs.clear(); |
|
|
|
|
it->second.umat_internals.clear(); |
|
|
|
|
} |
|
|
|
|
it->second.skipFlags.clear(); |
|
|
|
|
//it->second.consumers.clear();
|
|
|
|
@ -974,7 +1077,11 @@ struct Net::Impl |
|
|
|
|
allocateLayer(*i, layersShapes); |
|
|
|
|
|
|
|
|
|
//bind inputs
|
|
|
|
|
bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT && |
|
|
|
|
preferableTarget == DNN_TARGET_OPENCL); |
|
|
|
|
ld.inputBlobs.resize(ninputs); |
|
|
|
|
if (use_umat) |
|
|
|
|
ld.umat_inputBlobs.resize(ninputs); |
|
|
|
|
ld.inputBlobsWrappers.resize(ninputs); |
|
|
|
|
for (size_t i = 0; i < ninputs; i++) |
|
|
|
|
{ |
|
|
|
@ -982,6 +1089,8 @@ struct Net::Impl |
|
|
|
|
CV_Assert(from.valid()); |
|
|
|
|
CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid); |
|
|
|
|
ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid]; |
|
|
|
|
if (use_umat) |
|
|
|
|
ld.umat_inputBlobs[i] = layers[from.lid].umat_outputBlobs[from.oid]; |
|
|
|
|
ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid]; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -999,8 +1108,27 @@ struct Net::Impl |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
Ptr<Layer> layerPtr = ld.getLayerInstance(); |
|
|
|
|
{ |
|
|
|
|
if (use_umat) |
|
|
|
|
{ |
|
|
|
|
std::vector<Mat*> inputs(ld.umat_inputBlobs.size());; |
|
|
|
|
std::vector<Mat> outputs(ld.umat_outputBlobs.size()); |
|
|
|
|
Mat mat; |
|
|
|
|
for (int i = 0; i < inputs.size(); i++) |
|
|
|
|
{ |
|
|
|
|
mat = ld.umat_inputBlobs[i].getMat(ACCESS_READ); |
|
|
|
|
inputs[i] = &mat; |
|
|
|
|
} |
|
|
|
|
for (int i = 0; i < outputs.size(); i++) |
|
|
|
|
{ |
|
|
|
|
outputs[i] = ld.umat_outputBlobs[i].getMat(ACCESS_READ); |
|
|
|
|
} |
|
|
|
|
layerPtr->finalize(inputs, outputs); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
layerPtr->finalize(ld.inputBlobs, ld.outputBlobs); |
|
|
|
|
} |
|
|
|
|
layerPtr->preferableTarget = preferableTarget; |
|
|
|
|
#if 0 |
|
|
|
|
std::cout << "\toutputs:"; |
|
|
|
@ -1234,6 +1362,8 @@ struct Net::Impl |
|
|
|
|
getLayersShapes(inputShapes, layersShapes); |
|
|
|
|
|
|
|
|
|
blobManager.reset(); |
|
|
|
|
blobManager.setPreferableTarget(preferableTarget); |
|
|
|
|
blobManager.setPreferableBackend(preferableBackend); |
|
|
|
|
backendWrappers.clear(); |
|
|
|
|
blobManager.addReference(LayerPin(0, 0)); |
|
|
|
|
for (it = layers.begin(); it != layers.end(); ++it) |
|
|
|
@ -1276,6 +1406,9 @@ struct Net::Impl |
|
|
|
|
if (!ld.inputBlobsWrappers[i].empty()) |
|
|
|
|
ld.inputBlobsWrappers[i]->copyToHost(); |
|
|
|
|
} |
|
|
|
|
if (preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_OPENCL) |
|
|
|
|
layer->forward(ld.umat_inputBlobs, ld.umat_outputBlobs, ld.umat_internals); |
|
|
|
|
else |
|
|
|
|
layer->forward(ld.inputBlobs, ld.outputBlobs, ld.internals); |
|
|
|
|
for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i) |
|
|
|
|
{ |
|
|
|
@ -1421,6 +1554,10 @@ struct Net::Impl |
|
|
|
|
{ |
|
|
|
|
CV_Assert(preferableTarget == DNN_TARGET_CPU || preferableTarget == DNN_TARGET_OPENCL); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (ld.umat_outputBlobs.size() > 0 && !ld.umat_outputBlobs[pin.oid].empty()) |
|
|
|
|
ld.umat_outputBlobs[pin.oid].copyTo(ld.outputBlobs[pin.oid]); |
|
|
|
|
|
|
|
|
|
return ld.outputBlobs[pin.oid]; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -1520,6 +1657,13 @@ void Net::forward(std::vector<Mat>& outputBlobs, const String& outputName) |
|
|
|
|
|
|
|
|
|
LayerPin pin = impl->getPinByAlias(layerName); |
|
|
|
|
LayerData &ld = impl->layers[pin.lid]; |
|
|
|
|
|
|
|
|
|
if (ld.umat_outputBlobs.size() > 0) |
|
|
|
|
{ |
|
|
|
|
for (int i = 0; i < ld.umat_outputBlobs.size(); i++) |
|
|
|
|
ld.umat_outputBlobs[i].copyTo(ld.outputBlobs[i]); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
outputBlobs = ld.outputBlobs; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -1584,6 +1728,7 @@ void Net::setPreferableBackend(int backendId) |
|
|
|
|
if( impl->preferableBackend != backendId ) |
|
|
|
|
{ |
|
|
|
|
impl->preferableBackend = backendId; |
|
|
|
|
impl->blobManager.setPreferableBackend(backendId); |
|
|
|
|
impl->netWasAllocated = false; |
|
|
|
|
impl->clear(); |
|
|
|
|
} |
|
|
|
@ -1597,6 +1742,7 @@ void Net::setPreferableTarget(int targetId) |
|
|
|
|
if( impl->preferableTarget != targetId ) |
|
|
|
|
{ |
|
|
|
|
impl->preferableTarget = targetId; |
|
|
|
|
impl->blobManager.setPreferableTarget(targetId); |
|
|
|
|
impl->netWasAllocated = false; |
|
|
|
|
impl->clear(); |
|
|
|
|
} |
|
|
|
@ -1623,13 +1769,25 @@ void Net::setInput(const Mat &blob_, const String& name) |
|
|
|
|
|
|
|
|
|
LayerData &ld = impl->layers[pin.lid]; |
|
|
|
|
ld.outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) ); |
|
|
|
|
bool use_umat = (impl->preferableBackend == DNN_BACKEND_DEFAULT && |
|
|
|
|
impl->preferableTarget == DNN_TARGET_OPENCL); |
|
|
|
|
if (use_umat) |
|
|
|
|
ld.umat_outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) ); |
|
|
|
|
ld.outputBlobsWrappers.resize(ld.outputBlobs.size()); |
|
|
|
|
MatShape prevShape = shape(ld.outputBlobs[pin.oid]); |
|
|
|
|
bool oldShape = prevShape == shape(blob_); |
|
|
|
|
if (oldShape) |
|
|
|
|
{ |
|
|
|
|
blob_.copyTo(ld.outputBlobs[pin.oid]); |
|
|
|
|
if (use_umat) |
|
|
|
|
blob_.copyTo(ld.umat_outputBlobs[pin.oid]); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
ld.outputBlobs[pin.oid] = blob_.clone(); |
|
|
|
|
if (use_umat) |
|
|
|
|
blob_.copyTo(ld.umat_outputBlobs[pin.oid]); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (!ld.outputBlobsWrappers[pin.oid].empty()) |
|
|
|
|
{ |
|
|
|
@ -2132,13 +2290,24 @@ std::vector<Mat> Layer::finalize(const std::vector<Mat> &inputs) |
|
|
|
|
return outputs; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void Layer::forward(const std::vector<Mat> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) |
|
|
|
|
void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) |
|
|
|
|
{ |
|
|
|
|
CV_TRACE_FUNCTION(); |
|
|
|
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); |
|
|
|
|
|
|
|
|
|
std::vector<Mat*> inputsp; |
|
|
|
|
vecToPVec(inputs, inputsp); |
|
|
|
|
this->forward(inputsp, outputs, internals); |
|
|
|
|
std::vector<Mat> inpvec; |
|
|
|
|
std::vector<Mat> outputs; |
|
|
|
|
std::vector<Mat> internals; |
|
|
|
|
|
|
|
|
|
inputs_arr.getMatVector(inpvec); |
|
|
|
|
outputs_arr.getMatVector(outputs); |
|
|
|
|
internals_arr.getMatVector(internals); |
|
|
|
|
|
|
|
|
|
std::vector<Mat*> inputs(inpvec.size()); |
|
|
|
|
for (int i = 0; i < inpvec.size(); i++) |
|
|
|
|
inputs[i] = &inpvec[i]; |
|
|
|
|
|
|
|
|
|
this->forward(inputs, outputs, internals); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void Layer::run(const std::vector<Mat> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) |
|
|
|
|