|
|
|
@ -180,6 +180,96 @@ Mat blobFromImages(const std::vector<Mat>& images_, double scalefactor, Size siz |
|
|
|
|
return blob; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
class OpenCLBackendWrapper : public BackendWrapper |
|
|
|
|
{ |
|
|
|
|
public: |
|
|
|
|
OpenCLBackendWrapper(Mat& m) : BackendWrapper(DNN_BACKEND_DEFAULT, DNN_TARGET_OPENCL) |
|
|
|
|
{ |
|
|
|
|
m.copyTo(umat); |
|
|
|
|
host = &m; |
|
|
|
|
hostDirty = false; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
OpenCLBackendWrapper(const Ptr<BackendWrapper>& baseBuffer, Mat& m) |
|
|
|
|
: BackendWrapper(DNN_BACKEND_DEFAULT, DNN_TARGET_OPENCL) |
|
|
|
|
{ |
|
|
|
|
Ptr<OpenCLBackendWrapper> base = baseBuffer.dynamicCast<OpenCLBackendWrapper>(); |
|
|
|
|
CV_Assert(!base.empty()); |
|
|
|
|
|
|
|
|
|
host = &m; |
|
|
|
|
|
|
|
|
|
int shape[] = {1, (int)base->umat.total()}; |
|
|
|
|
umat = base->umat.reshape(1, 2, &shape[0]) |
|
|
|
|
.colRange(0, host->total()) |
|
|
|
|
.reshape(1, host->dims, &host->size[0]); |
|
|
|
|
hostDirty = false; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static Ptr<BackendWrapper> create(Mat& m) |
|
|
|
|
{ |
|
|
|
|
return Ptr<BackendWrapper>(new OpenCLBackendWrapper(m)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static Ptr<BackendWrapper> create(const Ptr<BackendWrapper>& baseBuffer, Mat& m) |
|
|
|
|
{ |
|
|
|
|
return Ptr<BackendWrapper>(new OpenCLBackendWrapper(baseBuffer, m)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static std::vector<UMat> getUMatVector(const std::vector<Ptr<BackendWrapper> >& wrappers) |
|
|
|
|
{ |
|
|
|
|
const int numWrappers = wrappers.size(); |
|
|
|
|
std::vector<UMat> mats(wrappers.size()); |
|
|
|
|
for (int i = 0; i < numWrappers; ++i) |
|
|
|
|
{ |
|
|
|
|
Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>(); |
|
|
|
|
CV_Assert(!umatWrapper.empty()); |
|
|
|
|
umatWrapper->copyToDevice(); |
|
|
|
|
mats[i] = umatWrapper->umat; |
|
|
|
|
} |
|
|
|
|
return mats; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Replaces all umats in wrappers to specific ones.
|
|
|
|
|
static void update(const std::vector<Ptr<BackendWrapper> >& wrappers, |
|
|
|
|
const std::vector<UMat>& umats) |
|
|
|
|
{ |
|
|
|
|
CV_Assert(wrappers.size() == umats.size()); |
|
|
|
|
for (int i = 0, n = umats.size(); i < n; ++i) |
|
|
|
|
{ |
|
|
|
|
Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>(); |
|
|
|
|
CV_Assert(!umatWrapper.empty()); |
|
|
|
|
umatWrapper->umat = umats[i]; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
~OpenCLBackendWrapper() {} |
|
|
|
|
|
|
|
|
|
// Copies data from device to a host memory.
|
|
|
|
|
virtual void copyToHost() |
|
|
|
|
{ |
|
|
|
|
umat.copyTo(*host); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
virtual void setHostDirty() |
|
|
|
|
{ |
|
|
|
|
hostDirty = true; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
void copyToDevice() |
|
|
|
|
{ |
|
|
|
|
if (hostDirty) |
|
|
|
|
{ |
|
|
|
|
host->copyTo(umat); |
|
|
|
|
hostDirty = false; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private: |
|
|
|
|
UMat umat; |
|
|
|
|
Mat* host; |
|
|
|
|
bool hostDirty; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
struct LayerPin |
|
|
|
|
{ |
|
|
|
|
int lid; |
|
|
|
@ -233,14 +323,12 @@ struct LayerData |
|
|
|
|
std::vector<LayerPin> consumers; |
|
|
|
|
std::vector<Ptr<BackendWrapper> > outputBlobsWrappers; |
|
|
|
|
std::vector<Ptr<BackendWrapper> > inputBlobsWrappers; |
|
|
|
|
std::vector<Ptr<BackendWrapper> > internalBlobsWrappers; |
|
|
|
|
|
|
|
|
|
Ptr<Layer> layerInstance; |
|
|
|
|
std::vector<Mat> outputBlobs; |
|
|
|
|
std::vector<Mat*> inputBlobs; |
|
|
|
|
std::vector<Mat> internals; |
|
|
|
|
std::vector<UMat> umat_outputBlobs; |
|
|
|
|
std::vector<UMat> umat_inputBlobs; |
|
|
|
|
std::vector<UMat> umat_internals; |
|
|
|
|
// Computation nodes of implemented backends (except DEFAULT).
|
|
|
|
|
std::map<int, Ptr<BackendNode> > backendNodes; |
|
|
|
|
// Flag for skip layer computation for specific backend.
|
|
|
|
@ -418,77 +506,21 @@ public: |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst) |
|
|
|
|
{ |
|
|
|
|
if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS) |
|
|
|
|
{ |
|
|
|
|
UMat bestBlob; |
|
|
|
|
LayerPin bestBlobPin; |
|
|
|
|
|
|
|
|
|
std::map<LayerPin, UMat>::iterator hostIt; |
|
|
|
|
std::map<LayerPin, int>::iterator refIt; |
|
|
|
|
|
|
|
|
|
const int targetTotal = total(shape); |
|
|
|
|
int bestBlobTotal = INT_MAX; |
|
|
|
|
|
|
|
|
|
for (hostIt = umat_memHosts.begin(); hostIt != umat_memHosts.end(); ++hostIt) |
|
|
|
|
{ |
|
|
|
|
refIt = refCounter.find(hostIt->first); |
|
|
|
|
// Use only blobs that had references before because if not,
|
|
|
|
|
// it might be used as output.
|
|
|
|
|
if (refIt != refCounter.end() && refIt->second == 0) |
|
|
|
|
{ |
|
|
|
|
UMat& unusedBlob = hostIt->second; |
|
|
|
|
if (unusedBlob.total() >= targetTotal && |
|
|
|
|
unusedBlob.total() < bestBlobTotal) |
|
|
|
|
{ |
|
|
|
|
bestBlobPin = hostIt->first; |
|
|
|
|
bestBlob = unusedBlob; |
|
|
|
|
bestBlobTotal = unusedBlob.total(); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
if (!bestBlob.empty()) |
|
|
|
|
{ |
|
|
|
|
reuse(bestBlobPin, lp); |
|
|
|
|
umat_dst.create(shape, CV_32F); |
|
|
|
|
return; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
{ |
|
|
|
|
// if dst already has been allocated with total(shape) elements,
|
|
|
|
|
// it won't be recrreated and pointer of dst.data remains the same.
|
|
|
|
|
umat_dst.create(shape, CV_32F); |
|
|
|
|
addHost(lp, umat_dst); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes, |
|
|
|
|
std::vector<LayerPin>& pinsForInternalBlobs) |
|
|
|
|
{ |
|
|
|
|
CV_TRACE_FUNCTION(); |
|
|
|
|
bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT && |
|
|
|
|
preferableTarget == DNN_TARGET_OPENCL); |
|
|
|
|
|
|
|
|
|
pinsForInternalBlobs.clear(); |
|
|
|
|
|
|
|
|
|
std::vector<Mat>& outputBlobs = ld.outputBlobs, |
|
|
|
|
&internalBlobs = ld.internals; |
|
|
|
|
|
|
|
|
|
std::vector<UMat>& umat_outputBlobs = ld.umat_outputBlobs, |
|
|
|
|
&umat_internalBlobs = ld.umat_internals; |
|
|
|
|
|
|
|
|
|
const ShapesVec& outShapes = layerShapes.out, |
|
|
|
|
internalShapes = layerShapes.internal; |
|
|
|
|
|
|
|
|
|
outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob
|
|
|
|
|
internalBlobs.resize(internalShapes.size()); |
|
|
|
|
if (use_umat) |
|
|
|
|
{ |
|
|
|
|
umat_outputBlobs.resize(std::max((size_t)1, outShapes.size())); |
|
|
|
|
umat_internalBlobs.resize(internalShapes.size()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
CV_Assert(ld.requiredOutputs.size() <= outShapes.size()); |
|
|
|
|
|
|
|
|
@ -508,19 +540,14 @@ public: |
|
|
|
|
ShapesVec shapes(outShapes); |
|
|
|
|
shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end()); |
|
|
|
|
std::vector<Mat*> blobs; |
|
|
|
|
std::vector<UMat*> umat_blobs; |
|
|
|
|
for(int i = 0; i < outputBlobs.size(); i++) |
|
|
|
|
{ |
|
|
|
|
blobs.push_back(&outputBlobs[i]); |
|
|
|
|
if (use_umat) |
|
|
|
|
umat_blobs.push_back(&umat_outputBlobs[i]); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
for(int i = 0; i < internalBlobs.size(); i++) |
|
|
|
|
{ |
|
|
|
|
blobs.push_back(&internalBlobs[i]); |
|
|
|
|
if (use_umat) |
|
|
|
|
umat_blobs.push_back(&umat_internalBlobs[i]); |
|
|
|
|
if (total(internalShapes[i])) |
|
|
|
|
{ |
|
|
|
|
pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i)); |
|
|
|
@ -546,27 +573,12 @@ public: |
|
|
|
|
LayerPin blobPin(ld.id, index); |
|
|
|
|
if (index < outShapes.size() && inPlace) |
|
|
|
|
{ |
|
|
|
|
if (use_umat) |
|
|
|
|
{ |
|
|
|
|
CV_Assert(ld.umat_inputBlobs[0].total() == total(shapes[index])); |
|
|
|
|
ld.umat_outputBlobs[index] = |
|
|
|
|
ld.umat_inputBlobs[0].reshape(1, shapes[index].size(), |
|
|
|
|
&shapes[index][0]); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index])); |
|
|
|
|
ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]); |
|
|
|
|
} |
|
|
|
|
CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index])); |
|
|
|
|
ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]); |
|
|
|
|
reuse(ld.inputBlobsId[0], blobPin); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
if (use_umat) |
|
|
|
|
reuseOrCreate(shapes[index], blobPin, *umat_blobs[index]); |
|
|
|
|
else |
|
|
|
|
reuseOrCreate(shapes[index], blobPin, *blobs[index]); |
|
|
|
|
} |
|
|
|
|
reuseOrCreate(shapes[index], blobPin, *blobs[index]); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
@ -580,19 +592,6 @@ public: |
|
|
|
|
refCounter.clear(); |
|
|
|
|
reuseMap.clear(); |
|
|
|
|
memHosts.clear(); |
|
|
|
|
umat_memHosts.clear(); |
|
|
|
|
preferableTarget = DNN_TARGET_CPU; |
|
|
|
|
preferableBackend = DNN_BACKEND_DEFAULT; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void setPreferableTarget(int targetId) |
|
|
|
|
{ |
|
|
|
|
preferableTarget = targetId; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void setPreferableBackend(int backendId) |
|
|
|
|
{ |
|
|
|
|
preferableBackend = backendId; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private: |
|
|
|
@ -604,28 +603,23 @@ private: |
|
|
|
|
memHosts[lp] = mat; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void addHost(const LayerPin& lp, const UMat& umat) |
|
|
|
|
{ |
|
|
|
|
CV_Assert(umat_memHosts.find(lp) == umat_memHosts.end()); |
|
|
|
|
reuseMap[lp] = lp; |
|
|
|
|
umat_memHosts[lp] = umat; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
std::map<LayerPin, int> refCounter; |
|
|
|
|
// Maps pin to origin blob (for whom memory was allocated firstly).
|
|
|
|
|
// For origin blobs key == value.
|
|
|
|
|
std::map<LayerPin, LayerPin> reuseMap; |
|
|
|
|
std::map<LayerPin, Mat> memHosts; |
|
|
|
|
std::map<LayerPin, UMat> umat_memHosts; |
|
|
|
|
int preferableTarget; |
|
|
|
|
int preferableBackend; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, const cv::Mat& m) |
|
|
|
|
static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m) |
|
|
|
|
{ |
|
|
|
|
if (backendId == DNN_BACKEND_DEFAULT) |
|
|
|
|
{ |
|
|
|
|
return Ptr<BackendWrapper>(); |
|
|
|
|
if (targetId == DNN_TARGET_CPU) |
|
|
|
|
return Ptr<BackendWrapper>(); |
|
|
|
|
else if (targetId == DNN_TARGET_OPENCL) |
|
|
|
|
return OpenCLBackendWrapper::create(m); |
|
|
|
|
else |
|
|
|
|
CV_Error(Error::StsNotImplemented, "Unknown target identifier"); |
|
|
|
|
} |
|
|
|
|
else if (backendId == DNN_BACKEND_HALIDE) |
|
|
|
|
{ |
|
|
|
@ -660,8 +654,6 @@ struct Net::Impl |
|
|
|
|
fusion = true; |
|
|
|
|
preferableBackend = DNN_BACKEND_DEFAULT; |
|
|
|
|
preferableTarget = DNN_TARGET_CPU; |
|
|
|
|
blobManager.setPreferableBackend(DNN_BACKEND_DEFAULT); |
|
|
|
|
blobManager.setPreferableTarget(DNN_TARGET_CPU); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
Ptr<DataLayer> netInputLayer; |
|
|
|
@ -682,9 +674,9 @@ struct Net::Impl |
|
|
|
|
bool fusion; |
|
|
|
|
std::vector<int64> layersTimings; |
|
|
|
|
|
|
|
|
|
Ptr<BackendWrapper> wrap(const Mat& host) |
|
|
|
|
Ptr<BackendWrapper> wrap(Mat& host) |
|
|
|
|
{ |
|
|
|
|
if (preferableBackend == DNN_BACKEND_DEFAULT) |
|
|
|
|
if (preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_CPU) |
|
|
|
|
return Ptr<BackendWrapper>(); |
|
|
|
|
|
|
|
|
|
MatShape shape(host.dims); |
|
|
|
@ -695,7 +687,12 @@ struct Net::Impl |
|
|
|
|
if (backendWrappers.find(data) != backendWrappers.end()) |
|
|
|
|
{ |
|
|
|
|
Ptr<BackendWrapper> baseBuffer = backendWrappers[data]; |
|
|
|
|
if (preferableBackend == DNN_BACKEND_HALIDE) |
|
|
|
|
if (preferableBackend == DNN_BACKEND_DEFAULT) |
|
|
|
|
{ |
|
|
|
|
CV_Assert(preferableTarget == DNN_TARGET_OPENCL); |
|
|
|
|
return OpenCLBackendWrapper::create(baseBuffer, host); |
|
|
|
|
} |
|
|
|
|
else if (preferableBackend == DNN_BACKEND_HALIDE) |
|
|
|
|
{ |
|
|
|
|
CV_Assert(haveHalide()); |
|
|
|
|
#ifdef HAVE_HALIDE |
|
|
|
@ -771,9 +768,6 @@ struct Net::Impl |
|
|
|
|
it->second.inputBlobs.clear(); |
|
|
|
|
it->second.outputBlobs.clear(); |
|
|
|
|
it->second.internals.clear(); |
|
|
|
|
it->second.umat_inputBlobs.clear(); |
|
|
|
|
it->second.umat_outputBlobs.clear(); |
|
|
|
|
it->second.umat_internals.clear(); |
|
|
|
|
} |
|
|
|
|
it->second.skipFlags.clear(); |
|
|
|
|
//it->second.consumers.clear();
|
|
|
|
@ -1094,11 +1088,7 @@ struct Net::Impl |
|
|
|
|
allocateLayer(*i, layersShapes); |
|
|
|
|
|
|
|
|
|
//bind inputs
|
|
|
|
|
bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT && |
|
|
|
|
preferableTarget == DNN_TARGET_OPENCL); |
|
|
|
|
ld.inputBlobs.resize(ninputs); |
|
|
|
|
if (use_umat) |
|
|
|
|
ld.umat_inputBlobs.resize(ninputs); |
|
|
|
|
ld.inputBlobsWrappers.resize(ninputs); |
|
|
|
|
for (size_t i = 0; i < ninputs; i++) |
|
|
|
|
{ |
|
|
|
@ -1106,8 +1096,6 @@ struct Net::Impl |
|
|
|
|
CV_Assert(from.valid()); |
|
|
|
|
CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid); |
|
|
|
|
ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid]; |
|
|
|
|
if (use_umat) |
|
|
|
|
ld.umat_inputBlobs[i] = layers[from.lid].umat_outputBlobs[from.oid]; |
|
|
|
|
ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid]; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -1122,29 +1110,15 @@ struct Net::Impl |
|
|
|
|
{ |
|
|
|
|
ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]); |
|
|
|
|
} |
|
|
|
|
ld.internalBlobsWrappers.resize(ld.internals.size()); |
|
|
|
|
for (int i = 0; i < ld.internals.size(); ++i) |
|
|
|
|
{ |
|
|
|
|
ld.internalBlobsWrappers[i] = wrap(ld.internals[i]); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
Ptr<Layer> layerPtr = ld.getLayerInstance(); |
|
|
|
|
{ |
|
|
|
|
if (use_umat) |
|
|
|
|
{ |
|
|
|
|
std::vector<Mat> input_mats(ld.umat_inputBlobs.size());; |
|
|
|
|
std::vector<Mat*> inputs(ld.umat_inputBlobs.size());; |
|
|
|
|
std::vector<Mat> outputs(ld.umat_outputBlobs.size()); |
|
|
|
|
for (int i = 0; i < inputs.size(); i++) |
|
|
|
|
{ |
|
|
|
|
input_mats[i] = ld.umat_inputBlobs[i].getMat(ACCESS_READ); |
|
|
|
|
inputs[i] = &input_mats[i]; |
|
|
|
|
} |
|
|
|
|
for (int i = 0; i < outputs.size(); i++) |
|
|
|
|
{ |
|
|
|
|
outputs[i] = ld.umat_outputBlobs[i].getMat(ACCESS_READ); |
|
|
|
|
} |
|
|
|
|
layerPtr->finalize(inputs, outputs); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
layerPtr->finalize(ld.inputBlobs, ld.outputBlobs); |
|
|
|
|
} |
|
|
|
|
layerPtr->finalize(ld.inputBlobs, ld.outputBlobs); |
|
|
|
|
layerPtr->preferableTarget = preferableTarget; |
|
|
|
|
#if 0 |
|
|
|
|
std::cout << "\toutputs:"; |
|
|
|
@ -1221,10 +1195,8 @@ struct Net::Impl |
|
|
|
|
{ |
|
|
|
|
printf_(("\tfused with %s\n", nextBNormLayer->name.c_str())); |
|
|
|
|
bnormData->skipFlags[DNN_BACKEND_DEFAULT] = true; |
|
|
|
|
if ( preferableTarget == DNN_TARGET_OPENCL ) |
|
|
|
|
ld.umat_outputBlobs = layers[lpNext.lid].umat_outputBlobs; |
|
|
|
|
else |
|
|
|
|
ld.outputBlobs = layers[lpNext.lid].outputBlobs; |
|
|
|
|
ld.outputBlobs = layers[lpNext.lid].outputBlobs; |
|
|
|
|
ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers; |
|
|
|
|
if( bnormData->consumers.size() == 1 ) |
|
|
|
|
{ |
|
|
|
|
nextData = &layers[bnormData->consumers[0].lid]; |
|
|
|
@ -1244,10 +1216,8 @@ struct Net::Impl |
|
|
|
|
{ |
|
|
|
|
printf_(("\tfused with %s\n", nextScaleLayer->name.c_str())); |
|
|
|
|
scaleData->skipFlags[DNN_BACKEND_DEFAULT] = true; |
|
|
|
|
if ( preferableTarget == DNN_TARGET_OPENCL ) |
|
|
|
|
ld.umat_outputBlobs = layers[lpNext.lid].umat_outputBlobs; |
|
|
|
|
else |
|
|
|
|
ld.outputBlobs = layers[lpNext.lid].outputBlobs; |
|
|
|
|
ld.outputBlobs = layers[lpNext.lid].outputBlobs; |
|
|
|
|
ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers; |
|
|
|
|
if( scaleData->consumers.size() == 1 ) |
|
|
|
|
{ |
|
|
|
|
nextData = &layers[scaleData->consumers[0].lid]; |
|
|
|
@ -1276,10 +1246,8 @@ struct Net::Impl |
|
|
|
|
LayerData *activData = nextData; |
|
|
|
|
printf_(("\tfused with %s\n", nextActivLayer->name.c_str())); |
|
|
|
|
activData->skipFlags[DNN_BACKEND_DEFAULT] = true; |
|
|
|
|
if ( preferableTarget == DNN_TARGET_OPENCL ) |
|
|
|
|
ld.umat_outputBlobs = layers[lpNext.lid].umat_outputBlobs; |
|
|
|
|
else |
|
|
|
|
ld.outputBlobs = layers[lpNext.lid].outputBlobs; |
|
|
|
|
ld.outputBlobs = layers[lpNext.lid].outputBlobs; |
|
|
|
|
ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers; |
|
|
|
|
|
|
|
|
|
if ( preferableTarget == DNN_TARGET_OPENCL ) |
|
|
|
|
{ |
|
|
|
@ -1329,6 +1297,7 @@ struct Net::Impl |
|
|
|
|
// fuse eltwise + activation layer
|
|
|
|
|
LayerData *firstConvLayerData = downLayerData; |
|
|
|
|
{ |
|
|
|
|
CV_Assert(eltwiseData->consumers.size() == 1); |
|
|
|
|
nextData = &layers[eltwiseData->consumers[0].lid]; |
|
|
|
|
lpNext = LayerPin(eltwiseData->consumers[0].lid, 0); |
|
|
|
|
Ptr<ActivationLayer> nextActivLayer; |
|
|
|
@ -1341,13 +1310,50 @@ struct Net::Impl |
|
|
|
|
!nextData->type.compare("Power")) && |
|
|
|
|
currLayer->setActivation(nextActivLayer) ) |
|
|
|
|
{ |
|
|
|
|
CV_Assert(firstConvLayerData->umat_outputBlobs.size() == 1 && ld.umat_inputBlobs.size() == 1); |
|
|
|
|
ld.umat_inputBlobs.push_back(firstConvLayerData->umat_outputBlobs[0]); |
|
|
|
|
CV_Assert(firstConvLayerData->outputBlobsWrappers.size() == 1 && ld.inputBlobsWrappers.size() == 1); |
|
|
|
|
ld.inputBlobsWrappers.push_back(firstConvLayerData->outputBlobsWrappers[0]); |
|
|
|
|
printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str())); |
|
|
|
|
printf_(("\tfused with %s\n", nextActivLayer->name.c_str())); |
|
|
|
|
eltwiseData->skipFlags[DNN_BACKEND_DEFAULT] = true; |
|
|
|
|
nextData->skipFlags[DNN_BACKEND_DEFAULT] = true; |
|
|
|
|
ld.umat_outputBlobs = layers[lpNext.lid].umat_outputBlobs; |
|
|
|
|
// This optimization for cases like
|
|
|
|
|
// some_layer conv
|
|
|
|
|
// | |
|
|
|
|
|
// +-- eltwise --+
|
|
|
|
|
// |
|
|
|
|
|
// activ
|
|
|
|
|
// This way all the element-wise computations
|
|
|
|
|
// (i.e. some_layer+conv or some_layer*conv)
|
|
|
|
|
// would be done at [conv] layer. So we need to
|
|
|
|
|
// replace [conv]'s output blob to [eltwise]'s one
|
|
|
|
|
// considering that [activ] is an in-place layer.
|
|
|
|
|
// Also we need to move all the consumers' references.
|
|
|
|
|
// To prevent memory collisions (i.e. when input of
|
|
|
|
|
// [conv] and output of [eltwise] is the same blob)
|
|
|
|
|
// we allocate a new blob.
|
|
|
|
|
CV_Assert(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1); |
|
|
|
|
ld.outputBlobs[0] = ld.outputBlobs[0].clone(); |
|
|
|
|
ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]); |
|
|
|
|
|
|
|
|
|
eltwiseData->outputBlobs = ld.outputBlobs; |
|
|
|
|
nextData->outputBlobs = ld.outputBlobs; |
|
|
|
|
eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers; |
|
|
|
|
nextData->outputBlobsWrappers = ld.outputBlobsWrappers; |
|
|
|
|
|
|
|
|
|
// Move references of [activ] layer consumers to the newly allocated blob.
|
|
|
|
|
for (int i = 0; i < nextData->consumers.size(); ++i) |
|
|
|
|
{ |
|
|
|
|
LayerData& consumer = layers[nextData->consumers[i].lid]; |
|
|
|
|
for (int j = 0; j < consumer.inputBlobsId.size(); ++j) |
|
|
|
|
{ |
|
|
|
|
if (consumer.inputBlobsId[j].lid == lpNext.lid) |
|
|
|
|
{ |
|
|
|
|
consumer.inputBlobs[j] = &ld.outputBlobs[0]; |
|
|
|
|
consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0]; |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
@ -1469,8 +1475,6 @@ struct Net::Impl |
|
|
|
|
getLayersShapes(inputShapes, layersShapes); |
|
|
|
|
|
|
|
|
|
blobManager.reset(); |
|
|
|
|
blobManager.setPreferableTarget(preferableTarget); |
|
|
|
|
blobManager.setPreferableBackend(preferableBackend); |
|
|
|
|
backendWrappers.clear(); |
|
|
|
|
// Fake references to input blobs.
|
|
|
|
|
for (int i = 0; i < layers[0].outputBlobs.size(); ++i) |
|
|
|
@ -1510,19 +1514,29 @@ struct Net::Impl |
|
|
|
|
{ |
|
|
|
|
if( !ld.skipFlags[DNN_BACKEND_DEFAULT] ) |
|
|
|
|
{ |
|
|
|
|
for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i) |
|
|
|
|
if (preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_OPENCL) |
|
|
|
|
{ |
|
|
|
|
if (!ld.inputBlobsWrappers[i].empty()) |
|
|
|
|
ld.inputBlobsWrappers[i]->copyToHost(); |
|
|
|
|
std::vector<UMat> umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); |
|
|
|
|
layer->forward(OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers), |
|
|
|
|
umat_outputBlobs, |
|
|
|
|
OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers)); |
|
|
|
|
OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs); |
|
|
|
|
} |
|
|
|
|
if (preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_OPENCL) |
|
|
|
|
layer->forward(ld.umat_inputBlobs, ld.umat_outputBlobs, ld.umat_internals); |
|
|
|
|
else |
|
|
|
|
layer->forward(ld.inputBlobs, ld.outputBlobs, ld.internals); |
|
|
|
|
for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i) |
|
|
|
|
{ |
|
|
|
|
if (!ld.outputBlobsWrappers[i].empty()) |
|
|
|
|
ld.outputBlobsWrappers[i]->setHostDirty(); |
|
|
|
|
for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i) |
|
|
|
|
{ |
|
|
|
|
if (!ld.inputBlobsWrappers[i].empty()) |
|
|
|
|
ld.inputBlobsWrappers[i]->copyToHost(); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
layer->forward(ld.inputBlobs, ld.outputBlobs, ld.internals); |
|
|
|
|
|
|
|
|
|
for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i) |
|
|
|
|
{ |
|
|
|
|
if (!ld.outputBlobsWrappers[i].empty()) |
|
|
|
|
ld.outputBlobsWrappers[i]->setHostDirty(); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
@ -1654,51 +1668,19 @@ struct Net::Impl |
|
|
|
|
CV_Error(Error::StsOutOfRange, "Layer \"" + ld.name + "\" produce only " + toString(ld.outputBlobs.size()) + |
|
|
|
|
" outputs, the #" + toString(pin.oid) + " was requsted"); |
|
|
|
|
} |
|
|
|
|
if (preferableBackend != DNN_BACKEND_DEFAULT) |
|
|
|
|
if (preferableTarget != DNN_TARGET_CPU) |
|
|
|
|
{ |
|
|
|
|
CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty()); |
|
|
|
|
// Transfer data to CPU if it's require.
|
|
|
|
|
ld.outputBlobsWrappers[pin.oid]->copyToHost(); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
CV_Assert(preferableTarget == DNN_TARGET_CPU || preferableTarget == DNN_TARGET_OPENCL); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (ld.umat_outputBlobs.size() > 0 && !ld.umat_outputBlobs[pin.oid].empty()) |
|
|
|
|
ld.umat_outputBlobs[pin.oid].copyTo(ld.outputBlobs[pin.oid]); |
|
|
|
|
|
|
|
|
|
return ld.outputBlobs[pin.oid]; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void getBlob(UMat& umat, const LayerPin& pin) |
|
|
|
|
{ |
|
|
|
|
CV_TRACE_FUNCTION(); |
|
|
|
|
|
|
|
|
|
if (!pin.valid()) |
|
|
|
|
CV_Error(Error::StsObjectNotFound, "Requested blob not found"); |
|
|
|
|
|
|
|
|
|
LayerData &ld = layers[pin.lid]; |
|
|
|
|
if ((size_t)pin.oid >= ld.outputBlobs.size()) |
|
|
|
|
{ |
|
|
|
|
CV_Error(Error::StsOutOfRange, "Layer \"" + ld.name + "\" produce only " + toString(ld.outputBlobs.size()) + |
|
|
|
|
" outputs, the #" + toString(pin.oid) + " was requsted"); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (ld.umat_outputBlobs.size() > 0 && !ld.umat_outputBlobs[pin.oid].empty()) |
|
|
|
|
umat = ld.umat_outputBlobs[pin.oid]; |
|
|
|
|
else |
|
|
|
|
umat = UMat(); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
Mat getBlob(String outputName) |
|
|
|
|
{ |
|
|
|
|
return getBlob(getPinByAlias(outputName)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void getBlob(UMat& umat, String outputName) |
|
|
|
|
{ |
|
|
|
|
getBlob(umat, getPinByAlias(outputName)); |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
Net::Net() : impl(new Net::Impl) |
|
|
|
@ -1794,12 +1776,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName) |
|
|
|
|
|
|
|
|
|
if (outputBlobs.isUMat()) |
|
|
|
|
{ |
|
|
|
|
if (ld.umat_outputBlobs.size() > 0) |
|
|
|
|
{ |
|
|
|
|
UMat umat; |
|
|
|
|
impl->getBlob(umat, layerName); |
|
|
|
|
outputBlobs.assign(umat); |
|
|
|
|
} |
|
|
|
|
outputBlobs.assign(ld.outputBlobs[pin.oid].getUMat(ACCESS_RW)); |
|
|
|
|
} |
|
|
|
|
else if (outputBlobs.isMat()) |
|
|
|
|
{ |
|
|
|
@ -1807,20 +1784,31 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName) |
|
|
|
|
} |
|
|
|
|
else if (outputBlobs.isMatVector()) |
|
|
|
|
{ |
|
|
|
|
if (ld.umat_outputBlobs.size() > 0) |
|
|
|
|
if (impl->preferableTarget != DNN_TARGET_CPU) |
|
|
|
|
{ |
|
|
|
|
for (int i = 0; i < ld.umat_outputBlobs.size(); i++) |
|
|
|
|
ld.umat_outputBlobs[i].copyTo(ld.outputBlobs[i]); |
|
|
|
|
for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) |
|
|
|
|
{ |
|
|
|
|
CV_Assert(!ld.outputBlobsWrappers[i].empty()); |
|
|
|
|
ld.outputBlobsWrappers[i]->copyToHost(); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj(); |
|
|
|
|
outputvec = ld.outputBlobs; |
|
|
|
|
} |
|
|
|
|
else if (outputBlobs.isUMatVector()) |
|
|
|
|
{ |
|
|
|
|
if (ld.umat_outputBlobs.size() > 0) |
|
|
|
|
std::vector<UMat> & outputvec = *(std::vector<UMat> *)outputBlobs.getObj(); |
|
|
|
|
|
|
|
|
|
if (impl->preferableBackend == DNN_BACKEND_DEFAULT && |
|
|
|
|
impl->preferableTarget == DNN_TARGET_OPENCL) |
|
|
|
|
{ |
|
|
|
|
outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
std::vector<UMat> & outputvec = *(std::vector<UMat> *)outputBlobs.getObj(); |
|
|
|
|
outputvec = ld.umat_outputBlobs; |
|
|
|
|
outputvec.resize(ld.outputBlobs.size()); |
|
|
|
|
for (int i = 0; i < outputvec.size(); ++i) |
|
|
|
|
outputvec[i] = ld.outputBlobs[i].getUMat(ACCESS_RW); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
@ -1889,7 +1877,6 @@ void Net::setPreferableBackend(int backendId) |
|
|
|
|
if( impl->preferableBackend != backendId ) |
|
|
|
|
{ |
|
|
|
|
impl->preferableBackend = backendId; |
|
|
|
|
impl->blobManager.setPreferableBackend(backendId); |
|
|
|
|
impl->netWasAllocated = false; |
|
|
|
|
impl->clear(); |
|
|
|
|
} |
|
|
|
@ -1903,7 +1890,6 @@ void Net::setPreferableTarget(int targetId) |
|
|
|
|
if( impl->preferableTarget != targetId ) |
|
|
|
|
{ |
|
|
|
|
impl->preferableTarget = targetId; |
|
|
|
|
impl->blobManager.setPreferableTarget(targetId); |
|
|
|
|
impl->netWasAllocated = false; |
|
|
|
|
impl->clear(); |
|
|
|
|
} |
|
|
|
@ -1930,10 +1916,6 @@ void Net::setInput(InputArray blob, const String& name) |
|
|
|
|
|
|
|
|
|
LayerData &ld = impl->layers[pin.lid]; |
|
|
|
|
ld.outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) ); |
|
|
|
|
bool use_umat = (impl->preferableBackend == DNN_BACKEND_DEFAULT && |
|
|
|
|
impl->preferableTarget == DNN_TARGET_OPENCL); |
|
|
|
|
if (use_umat) |
|
|
|
|
ld.umat_outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) ); |
|
|
|
|
ld.outputBlobsWrappers.resize(ld.outputBlobs.size()); |
|
|
|
|
MatShape prevShape = shape(ld.outputBlobs[pin.oid]); |
|
|
|
|
Mat blob_ = blob.getMat(); |
|
|
|
@ -1941,14 +1923,10 @@ void Net::setInput(InputArray blob, const String& name) |
|
|
|
|
if (oldShape) |
|
|
|
|
{ |
|
|
|
|
blob_.copyTo(ld.outputBlobs[pin.oid]); |
|
|
|
|
if (use_umat) |
|
|
|
|
blob_.copyTo(ld.umat_outputBlobs[pin.oid]); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
ld.outputBlobs[pin.oid] = blob_.clone(); |
|
|
|
|
if (use_umat) |
|
|
|
|
blob_.copyTo(ld.umat_outputBlobs[pin.oid]); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (!ld.outputBlobsWrappers[pin.oid].empty()) |
|
|
|
|