@ -148,11 +148,89 @@ public: |
deltasPermute->finalize(layerInputs, layerOutputs); |
} |
#ifdef HAVE_OPENCL |
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_) |
{ |
std::vector<UMat> inputs; |
std::vector<UMat> outputs; |
std::vector<UMat> internals; |
inputs_.getUMatVector(inputs); |
outputs_.getUMatVector(outputs); |
internals_.getUMatVector(internals); |
CV_Assert(inputs.size() == 3); |
CV_Assert(internals.size() == 3); |
const UMat& scores = inputs[0]; |
const UMat& bboxDeltas = inputs[1]; |
const UMat& imInfo = inputs[2]; |
UMat& priorBoxes = internals[0]; |
UMat& permuttedScores = internals[1]; |
UMat& permuttedDeltas = internals[2]; |
CV_Assert(imInfo.total() >= 2); |
// We've chosen the smallest data type because we need just a shape from it.
Mat szMat; |
imInfo.copyTo(szMat); |
int rows = (int)szMat.at<float>(0); |
int cols = (int)szMat.at<float>(1); |
umat_fakeImageBlob.create(shape(1, 1, rows, cols), CV_8UC1); |
umat_fakeImageBlob.setTo(0); |
// Generate prior boxes.
std::vector<UMat> layerInputs(2), layerOutputs(1, priorBoxes); |
layerInputs[0] = scores; |
layerInputs[1] = umat_fakeImageBlob; |
priorBoxLayer->forward(layerInputs, layerOutputs, internals); |
// Permute scores.
layerInputs.assign(1, getObjectScores(scores)); |
layerOutputs.assign(1, permuttedScores); |
scoresPermute->forward(layerInputs, layerOutputs, internals); |
// Permute deltas.
layerInputs.assign(1, bboxDeltas); |
layerOutputs.assign(1, permuttedDeltas); |
deltasPermute->forward(layerInputs, layerOutputs, internals); |
// Sort predictions by scores and apply NMS. DetectionOutputLayer allocates
// output internally because of different number of objects after NMS.
layerInputs.resize(4); |
layerInputs[0] = permuttedDeltas; |
layerInputs[1] = permuttedScores; |
layerInputs[2] = priorBoxes; |
layerInputs[3] = umat_fakeImageBlob; |
layerOutputs[0] = UMat(); |
detectionOutputLayer->forward(layerInputs, layerOutputs, internals); |
// DetectionOutputLayer produces 1x1xNx7 output where N might be less or
// equal to keepTopAfterNMS. We fill the rest by zeros.
const int numDets = layerOutputs[0].total() / 7; |
CV_Assert(numDets <= keepTopAfterNMS); |
MatShape s = shape(numDets, 7); |
UMat src = layerOutputs[0].reshape(1, s.size(), &s[0]).colRange(3, 7); |
UMat dst = outputs[0].rowRange(0, numDets); |
src.copyTo(dst.colRange(1, 5)); |
dst.col(0).setTo(0); // First column are batch ids. Keep it zeros too.
if (numDets < keepTopAfterNMS) |
outputs[0].rowRange(numDets, keepTopAfterNMS).setTo(0); |
return true; |
} |
#endif |
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) |
{ |
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); |
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && |
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), |
forward_ocl(inputs_arr, outputs_arr, internals_arr)) |
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); |
} |
@ -226,6 +304,20 @@ private: |
return slice(m, Range::all(), Range(channels / 2, channels)); |
} |
#ifdef HAVE_OPENCL |
static UMat getObjectScores(const UMat& m) |
{ |
CV_Assert(m.dims == 4); |
CV_Assert(m.size[0] == 1); |
int channels = m.size[1]; |
CV_Assert((channels & 1) == 0); |
Range r = Range(channels / 2, channels); |
Range ranges[4] = { Range::all(), r, Range::all(), Range::all() }; |
return m(&ranges[0]); |
} |
#endif |
Ptr<PriorBoxLayer> priorBoxLayer; |
Ptr<DetectionOutputLayer> detectionOutputLayer; |
@ -233,6 +325,9 @@ private: |
Ptr<PermuteLayer> scoresPermute; |
uint32_t keepTopAfterNMS; |
Mat fakeImageBlob; |
#ifdef HAVE_OPENCL |
UMat umat_fakeImageBlob; |
#endif |
}; |