diff --git a/modules/dnn/src/layers/concat_layer.cpp b/modules/dnn/src/layers/concat_layer.cpp index e51e1f7824..e49f22db2c 100644 --- a/modules/dnn/src/layers/concat_layer.cpp +++ b/modules/dnn/src/layers/concat_layer.cpp @@ -185,12 +185,13 @@ public: outs.getUMatVector(outputs); int cAxis = clamp(axis, inputs[0].dims); - if (!(cAxis == 1 && outputs[0].dims == 4 && !padding)) + if (padding) return false; int bottom_concat_axis; - int concat_size = inputs[0].size[2] * inputs[0].size[3]; - int top_concat_axis = outputs[0].size[1]; + int concat_size = total(shape(inputs[0]), cAxis + 1); + int top_concat_axis = outputs[0].size[cAxis]; + int num_concats = total(shape(inputs[0]), 0, cAxis); int offset_concat_axis = 0; UMat& outMat = outputs[0]; String buildopt = String("-DDtype=") + ocl::typeToStr(inputs[0].type()) + String(" "); @@ -202,12 +203,12 @@ public: return false; UMat& inpMat = inputs[i]; - bottom_concat_axis = inputs[i].size[1]; + bottom_concat_axis = inputs[i].size[cAxis]; size_t nthreads = inputs[i].total(); kernel.set(0, (int)nthreads); kernel.set(1, ocl::KernelArg::PtrReadOnly(inpMat)); - kernel.set(2, (int)inputs[i].size[0]); + kernel.set(2, (int)num_concats); kernel.set(3, (int)concat_size); kernel.set(4, (int)top_concat_axis); kernel.set(5, (int)bottom_concat_axis); diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp index 065c0c2566..5f75effeb6 100644 --- a/modules/dnn/src/layers/detection_output_layer.cpp +++ b/modules/dnn/src/layers/detection_output_layer.cpp @@ -45,6 +45,7 @@ #include #include #include "../nms.inl.hpp" +#include "opencl_kernels_dnn.hpp" namespace cv { @@ -211,11 +212,160 @@ public: return false; } +#ifdef HAVE_OPENCL + // Decode all bboxes in a batch + bool ocl_DecodeBBoxesAll(UMat& loc_mat, UMat& prior_mat, + const int num, const int numPriors, const bool share_location, + const int num_loc_classes, const int background_label_id, + const cv::String& code_type, const bool variance_encoded_in_target, + const bool clip, std::vector& all_decode_bboxes) + { + UMat outmat = UMat(loc_mat.dims, loc_mat.size, CV_32F); + size_t nthreads = loc_mat.total(); + String kernel_name; + + if (code_type == "CORNER") + kernel_name = "DecodeBBoxesCORNER"; + else if (code_type == "CENTER_SIZE") + kernel_name = "DecodeBBoxesCENTER_SIZE"; + else + return false; + + for (int i = 0; i < num; ++i) + { + ocl::Kernel kernel(kernel_name.c_str(), ocl::dnn::detection_output_oclsrc); + kernel.set(0, (int)nthreads); + kernel.set(1, ocl::KernelArg::PtrReadOnly(loc_mat)); + kernel.set(2, ocl::KernelArg::PtrReadOnly(prior_mat)); + kernel.set(3, (int)variance_encoded_in_target); + kernel.set(4, (int)numPriors); + kernel.set(5, (int)share_location); + kernel.set(6, (int)num_loc_classes); + kernel.set(7, (int)background_label_id); + kernel.set(8, (int)clip); + kernel.set(9, ocl::KernelArg::PtrWriteOnly(outmat)); + + if (!kernel.run(1, &nthreads, NULL, false)) + return false; + } + + all_decode_bboxes.clear(); + all_decode_bboxes.resize(num); + { + Mat mat = outmat.getMat(ACCESS_READ); + const float* decode_data = mat.ptr(); + for (int i = 0; i < num; ++i) + { + LabelBBox& decode_bboxes = all_decode_bboxes[i]; + for (int c = 0; c < num_loc_classes; ++c) + { + int label = share_location ? -1 : c; + decode_bboxes[label].resize(numPriors); + for (int p = 0; p < numPriors; ++p) + { + int startIdx = p * num_loc_classes * 4; + util::NormalizedBBox& bbox = decode_bboxes[label][p]; + bbox.xmin = decode_data[startIdx + c * 4]; + bbox.ymin = decode_data[startIdx + c * 4 + 1]; + bbox.xmax = decode_data[startIdx + c * 4 + 2]; + bbox.ymax = decode_data[startIdx + c * 4 + 3]; + } + } + } + } + return true; + } + + void ocl_GetConfidenceScores(const UMat& inp1, const int num, + const int numPredsPerClass, const int numClasses, + std::vector& confPreds) + { + int shape[] = { numClasses, numPredsPerClass }; + for (int i = 0; i < num; i++) + confPreds.push_back(Mat(2, shape, CV_32F)); + + UMat umat = inp1.reshape(1, num * numPredsPerClass); + for (int i = 0; i < num; ++i) + { + Range ranges[] = { Range(i * numPredsPerClass, (i + 1) * numPredsPerClass), Range::all() }; + transpose(umat(ranges), confPreds[i]); + } + } + + bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) + { + std::vector inputs; + std::vector outputs; + + inps.getUMatVector(inputs); + outs.getUMatVector(outputs); + + std::vector allDecodedBBoxes; + std::vector allConfidenceScores; + + int num = inputs[0].size[0]; + + // extract predictions from input layers + { + int numPriors = inputs[2].size[2] / 4; + + // Retrieve all confidences + ocl_GetConfidenceScores(inputs[1], num, numPriors, _numClasses, allConfidenceScores); + + // Decode all loc predictions to bboxes + bool ret = ocl_DecodeBBoxesAll(inputs[0], inputs[2], num, numPriors, + _shareLocation, _numLocClasses, _backgroundLabelId, + _codeType, _varianceEncodedInTarget, false, + allDecodedBBoxes); + if (!ret) + return false; + } + + size_t numKept = 0; + std::vector > > allIndices; + for (int i = 0; i < num; ++i) + { + numKept += processDetections_(allDecodedBBoxes[i], allConfidenceScores[i], allIndices); + } + + if (numKept == 0) + { + // Set confidences to zeros. + Range ranges[] = {Range::all(), Range::all(), Range::all(), Range(2, 3)}; + outputs[0](ranges).setTo(0); + return true; + } + int outputShape[] = {1, 1, (int)numKept, 7}; + UMat umat = UMat(4, outputShape, CV_32F); + { + Mat mat = umat.getMat(ACCESS_WRITE); + float* outputsData = mat.ptr(); + + size_t count = 0; + for (int i = 0; i < num; ++i) + { + count += outputDetections_(i, &outputsData[count * 7], + allDecodedBBoxes[i], allConfidenceScores[i], + allIndices[i]); + } + CV_Assert(count == numKept); + } + outputs.clear(); + outputs.push_back(umat); + outs.assign(outputs); + return true; + } +#endif + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && + OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + forward_ocl(inputs_arr, outputs_arr, internals_arr)) + Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); } @@ -225,7 +375,7 @@ public: CV_TRACE_ARG_VALUE(name, "name", name.c_str()); std::vector allDecodedBBoxes; - std::vector > > allConfidenceScores; + std::vector allConfidenceScores; int num = inputs[0]->size[0]; @@ -286,7 +436,7 @@ public: size_t outputDetections_( const int i, float* outputsData, - const LabelBBox& decodeBBoxes, const std::vector >& confidenceScores, + const LabelBBox& decodeBBoxes, Mat& confidenceScores, const std::map >& indicesMap ) { @@ -294,9 +444,9 @@ public: for (std::map >::const_iterator it = indicesMap.begin(); it != indicesMap.end(); ++it) { int label = it->first; - if (confidenceScores.size() <= label) + if (confidenceScores.rows <= label) CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find confidence predictions for label %d", label)); - const std::vector& scores = confidenceScores[label]; + const std::vector& scores = confidenceScores.row(label); int locLabel = _shareLocation ? -1 : label; LabelBBox::const_iterator label_bboxes = decodeBBoxes.find(locLabel); if (label_bboxes == decodeBBoxes.end()) @@ -320,7 +470,7 @@ public: } size_t processDetections_( - const LabelBBox& decodeBBoxes, const std::vector >& confidenceScores, + const LabelBBox& decodeBBoxes, Mat& confidenceScores, std::vector > >& allIndices ) { @@ -330,10 +480,10 @@ public: { if (c == _backgroundLabelId) continue; // Ignore background class. - if (c >= confidenceScores.size()) + if (c >= confidenceScores.rows) CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find confidence predictions for label %d", c)); - const std::vector& scores = confidenceScores[c]; + const std::vector scores = confidenceScores.row(c); int label = _shareLocation ? -1 : c; LabelBBox::const_iterator label_bboxes = decodeBBoxes.find(label); @@ -351,9 +501,9 @@ public: { int label = it->first; const std::vector& labelIndices = it->second; - if (label >= confidenceScores.size()) + if (label >= confidenceScores.rows) CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find location predictions for label %d", label)); - const std::vector& scores = confidenceScores[label]; + const std::vector& scores = confidenceScores.row(label); for (size_t j = 0; j < labelIndices.size(); ++j) { size_t idx = labelIndices[j]; @@ -630,20 +780,20 @@ public: // confidence prediction for an image. static void GetConfidenceScores(const float* confData, const int num, const int numPredsPerClass, const int numClasses, - std::vector > >& confPreds) + std::vector& confPreds) { - confPreds.clear(); confPreds.resize(num); + int shape[] = { numClasses, numPredsPerClass }; + for (int i = 0; i < num; i++) + confPreds.push_back(Mat(2, shape, CV_32F)); + for (int i = 0; i < num; ++i, confData += numPredsPerClass * numClasses) { - std::vector >& labelScores = confPreds[i]; - labelScores.resize(numClasses); + Mat labelScores = confPreds[i]; for (int c = 0; c < numClasses; ++c) { - std::vector& classLabelScores = labelScores[c]; - classLabelScores.resize(numPredsPerClass); for (int p = 0; p < numPredsPerClass; ++p) { - classLabelScores[p] = confData[p * numClasses + c]; + labelScores.at(c, p) = confData[p * numClasses + c]; } } } diff --git a/modules/dnn/src/layers/permute_layer.cpp b/modules/dnn/src/layers/permute_layer.cpp index a21c5a6d1c..664c24e475 100644 --- a/modules/dnn/src/layers/permute_layer.cpp +++ b/modules/dnn/src/layers/permute_layer.cpp @@ -44,6 +44,7 @@ #include "layers_common.hpp" #include #include +#include "opencl_kernels_dnn.hpp" namespace cv { @@ -173,6 +174,24 @@ public: CV_Assert((int)_numAxes == inp0.dims); computeStrides(shape(*inputs[0]), shape(outputs[0])); + +#ifdef HAVE_OPENCL + if (uorder.empty()) + { + std::vector orderVec(_order.begin(), _order.end());; + Mat morder(1, orderVec.size(), CV_32SC1, &orderVec[0]); + + std::vector oldStrideVec(_oldStride.begin(), _oldStride.end()); + Mat mold_stride(1, _oldStride.size(), CV_32SC1, &oldStrideVec[0]); + + std::vector newStrideVec(_newStride.begin(), _newStride.end()); + Mat mnew_stride(1, newStrideVec.size(), CV_32SC1, &newStrideVec[0]); + + morder.copyTo(uorder); + mold_stride.copyTo(uold_stride); + mnew_stride.copyTo(unew_stride); + } +#endif } class PermuteInvoker : public ParallelLoopBody @@ -247,11 +266,47 @@ public: } }; +#ifdef HAVE_OPENCL + bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) + { + std::vector inputs; + std::vector outputs; + + inps.getUMatVector(inputs); + outs.getUMatVector(outputs); + + if (!_needsPermute) + return false; + + for (size_t i = 0; i < inputs.size(); i++) + { + ocl::Kernel kernel("permute", ocl::dnn::permute_oclsrc); + + kernel.set(0, (int)_count); + kernel.set(1, ocl::KernelArg::PtrReadOnly(inputs[i])); + kernel.set(2, ocl::KernelArg::PtrReadOnly(uorder)); + kernel.set(3, ocl::KernelArg::PtrReadOnly(uold_stride)); + kernel.set(4, ocl::KernelArg::PtrReadOnly(unew_stride)); + kernel.set(5, (int)_numAxes); + kernel.set(6, ocl::KernelArg::PtrWriteOnly(outputs[i])); + + if (!kernel.run(1, &_count, NULL, false)) + return false; + } + + return true; + } +#endif + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && + OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + forward_ocl(inputs_arr, outputs_arr, internals_arr)) + Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); } @@ -325,6 +380,10 @@ public: std::vector _newStride; bool _needsPermute; +#ifdef HAVE_OPENCL + UMat uorder, uold_stride, unew_stride; +#endif + size_t _numAxes; }; diff --git a/modules/dnn/src/layers/region_layer.cpp b/modules/dnn/src/layers/region_layer.cpp index bc12e8b1be..94993fa58f 100644 --- a/modules/dnn/src/layers/region_layer.cpp +++ b/modules/dnn/src/layers/region_layer.cpp @@ -44,6 +44,7 @@ #include #include #include +#include "opencl_kernels_dnn.hpp" namespace cv { @@ -114,11 +115,83 @@ public: } } +#ifdef HAVE_OPENCL + bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) + { + std::vector inputs; + std::vector outputs; + + inps.getUMatVector(inputs); + outs.getUMatVector(outputs); + + if (useSoftmaxTree) { // Yolo 9000 + CV_Error(cv::Error::StsNotImplemented, "Yolo9000 is not implemented"); + return false; + } + + CV_Assert(inputs.size() >= 1); + int const cell_size = classes + coords + 1; + UMat blob_umat = blobs[0].getUMat(ACCESS_READ); + + for (size_t ii = 0; ii < outputs.size(); ii++) + { + UMat& inpBlob = inputs[ii]; + UMat& outBlob = outputs[ii]; + + int rows = inpBlob.size[1]; + int cols = inpBlob.size[2]; + + ocl::Kernel logistic_kernel("logistic_activ", ocl::dnn::region_oclsrc); + size_t global = rows*cols*anchors; + logistic_kernel.set(0, (int)global); + logistic_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob)); + logistic_kernel.set(2, (int)cell_size); + logistic_kernel.set(3, ocl::KernelArg::PtrWriteOnly(outBlob)); + logistic_kernel.run(1, &global, NULL, false); + + if (useSoftmax) + { + // Yolo v2 + // softmax activation for Probability, for each grid cell (X x Y x Anchor-index) + ocl::Kernel softmax_kernel("softmax_activ", ocl::dnn::region_oclsrc); + size_t nthreads = rows*cols*anchors; + softmax_kernel.set(0, (int)nthreads); + softmax_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob)); + softmax_kernel.set(2, ocl::KernelArg::PtrReadOnly(blob_umat)); + softmax_kernel.set(3, (int)cell_size); + softmax_kernel.set(4, (int)classes); + softmax_kernel.set(5, (int)classfix); + softmax_kernel.set(6, (int)rows); + softmax_kernel.set(7, (int)cols); + softmax_kernel.set(8, (int)anchors); + softmax_kernel.set(9, (float)thresh); + softmax_kernel.set(10, ocl::KernelArg::PtrWriteOnly(outBlob)); + if (!softmax_kernel.run(1, &nthreads, NULL, false)) + return false; + } + + if (nmsThreshold > 0) { + Mat mat = outBlob.getMat(ACCESS_WRITE); + float *dstData = mat.ptr(); + do_nms_sort(dstData, rows*cols*anchors, nmsThreshold); + //do_nms(dstData, rows*cols*anchors, nmsThreshold); + } + + } + + return true; + } +#endif + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && + OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + forward_ocl(inputs_arr, outputs_arr, internals_arr)) + Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); } diff --git a/modules/dnn/src/layers/reorg_layer.cpp b/modules/dnn/src/layers/reorg_layer.cpp index 78c806fc40..51da9fd12e 100644 --- a/modules/dnn/src/layers/reorg_layer.cpp +++ b/modules/dnn/src/layers/reorg_layer.cpp @@ -44,6 +44,7 @@ #include #include #include +#include "opencl_kernels_dnn.hpp" namespace cv { @@ -86,11 +87,54 @@ public: return backendId == DNN_BACKEND_DEFAULT; } +#ifdef HAVE_OPENCL + bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) + { + std::vector inputs; + std::vector outputs; + + inps.getUMatVector(inputs); + outs.getUMatVector(outputs); + String buildopt = String("-DDtype=") + ocl::typeToStr(inputs[0].type()) + String(" "); + + for (size_t i = 0; i < inputs.size(); i++) + { + ocl::Kernel kernel("reorg", ocl::dnn::reorg_oclsrc, buildopt); + if (kernel.empty()) + return false; + + UMat& srcBlob = inputs[i]; + UMat& dstBlob = outputs[0]; + int channels = srcBlob.size[1]; + int height = srcBlob.size[2]; + int width = srcBlob.size[3]; + size_t nthreads = channels * height * width; + + kernel.set(0, (int)nthreads); + kernel.set(1, ocl::KernelArg::PtrReadOnly(srcBlob)); + kernel.set(2, (int)channels); + kernel.set(3, (int)height); + kernel.set(4, (int)width); + kernel.set(5, (int)reorgStride); + kernel.set(6, ocl::KernelArg::PtrWriteOnly(dstBlob)); + + if (!kernel.run(1, &nthreads, NULL, false)) + return false; + } + + return true; + } +#endif + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && + OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + forward_ocl(inputs_arr, outputs_arr, internals_arr)) + Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); } diff --git a/modules/dnn/src/layers/reshape_layer.cpp b/modules/dnn/src/layers/reshape_layer.cpp index d4a2875356..3183a3f898 100644 --- a/modules/dnn/src/layers/reshape_layer.cpp +++ b/modules/dnn/src/layers/reshape_layer.cpp @@ -182,11 +182,40 @@ public: return true; } + bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) + { + std::vector inputs; + std::vector outputs; + + inps.getUMatVector(inputs); + outs.getUMatVector(outputs); + + for (size_t i = 0; i < inputs.size(); i++) + { + UMat srcBlob = inputs[i]; + void *src_handle = inputs[i].handle(ACCESS_READ); + void *dst_handle = outputs[i].handle(ACCESS_WRITE); + if (src_handle != dst_handle) + { + MatShape outShape = shape(outputs[i]); + UMat umat = srcBlob.reshape(1, (int)outShape.size(), &outShape[0]); + umat.copyTo(outputs[i]); + } + } + outs.assign(outputs); + + return true; + } + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && + OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + forward_ocl(inputs_arr, outputs_arr, internals_arr)) + Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); } diff --git a/modules/dnn/src/opencl/detection_output.cl b/modules/dnn/src/opencl/detection_output.cl new file mode 100644 index 0000000000..f5932cc82a --- /dev/null +++ b/modules/dnn/src/opencl/detection_output.cl @@ -0,0 +1,181 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#define Dtype float +#define Dtype4 float4 + +__kernel void DecodeBBoxesCORNER(const int nthreads, + __global const Dtype* loc_data, + __global const Dtype* prior_data, + const int variance_encoded_in_target, + const int num_priors, + const int share_location, + const int num_loc_classes, + const int background_label_id, + const int clip_bbox, + __global Dtype* bbox_data) +{ + for (int index = get_global_id(0); index < nthreads; index += get_global_size(0)) + { + Dtype bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax; + const int i = index % 4; + const int p = ((index / 4 / num_loc_classes) % num_priors) * 4; + + const int c = (index / 4) % num_loc_classes; + int label = share_location ? -1 : c; + if (label == background_label_id) + return; // Ignore background class. + + Dtype4 loc_vec = vload4(0, loc_data + index - i); + Dtype4 bbox_vec, prior_variance; + if (variance_encoded_in_target) + { + bbox_vec = loc_vec; + } else { + const int start_index = num_priors * 4 + p; + prior_variance = vload4(0, prior_data + start_index); + bbox_vec = loc_vec * prior_variance; + } + + bbox_xmin = bbox_vec.x; + bbox_ymin = bbox_vec.y; + bbox_xmax = bbox_vec.z; + bbox_ymax = bbox_vec.w; + + Dtype4 prior_vec = vload4(0, prior_data + p); + Dtype val; + switch (i) + { + case 0: + val = prior_vec.x + bbox_xmin; + break; + case 1: + val = prior_vec.y + bbox_ymin; + break; + case 2: + val = prior_vec.z + bbox_xmax; + break; + case 3: + val = prior_vec.w + bbox_ymax; + break; + } + + if (clip_bbox) + val = max(min(val, (Dtype)1.), (Dtype)0.); + + bbox_data[index] = val; + } +} + +__kernel void DecodeBBoxesCENTER_SIZE(const int nthreads, + __global const Dtype* loc_data, + __global const Dtype* prior_data, + const int variance_encoded_in_target, + const int num_priors, + const int share_location, + const int num_loc_classes, + const int background_label_id, + const int clip_bbox, + __global Dtype* bbox_data) +{ + for (int index = get_global_id(0); index < nthreads; index += get_global_size(0)) + { + Dtype bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax; + const int i = index % 4; + const int p = ((index / 4 / num_loc_classes) % num_priors) * 4; + + const int c = (index / 4) % num_loc_classes; + int label = share_location ? -1 : c; + if (label == background_label_id) + return; // Ignore background class. + + Dtype4 loc_vec = vload4(0, loc_data + index - i); + Dtype4 bbox_vec, prior_variance; + if (variance_encoded_in_target) + { + bbox_vec = loc_vec; + } else { + const int start_index = num_priors * 4 + p; + prior_variance = vload4(0, prior_data + start_index); + bbox_vec = loc_vec * prior_variance; + } + + bbox_xmin = bbox_vec.x; + bbox_ymin = bbox_vec.y; + bbox_xmax = bbox_vec.z; + bbox_ymax = bbox_vec.w; + + Dtype4 prior_vec = vload4(0, prior_data + p); + Dtype prior_width = prior_vec.z - prior_vec.x; + Dtype prior_height = prior_vec.w - prior_vec.y; + Dtype prior_center_x = (prior_vec.x + prior_vec.z) * .5; + Dtype prior_center_y = (prior_vec.y + prior_vec.w) * .5; + + Dtype decode_bbox_center_x, decode_bbox_center_y; + Dtype decode_bbox_width, decode_bbox_height; + decode_bbox_center_x = bbox_xmin * prior_width + prior_center_x; + decode_bbox_center_y = bbox_ymin * prior_height + prior_center_y; + decode_bbox_width = exp(bbox_xmax) * prior_width; + decode_bbox_height = exp(bbox_ymax) * prior_height; + + Dtype val; + switch (i) + { + case 0: + val = decode_bbox_center_x - decode_bbox_width * .5; + break; + case 1: + val = decode_bbox_center_y - decode_bbox_height * .5; + break; + case 2: + val = decode_bbox_center_x + decode_bbox_width * .5; + break; + case 3: + val = decode_bbox_center_y + decode_bbox_height * .5; + break; + } + + if (clip_bbox) + val = max(min(val, (Dtype)1.), (Dtype)0.); + + bbox_data[index] = val; + } +} diff --git a/modules/dnn/src/opencl/permute.cl b/modules/dnn/src/opencl/permute.cl new file mode 100644 index 0000000000..38aa7990c1 --- /dev/null +++ b/modules/dnn/src/opencl/permute.cl @@ -0,0 +1,67 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2017, Intel Corporation, all rights reserved. +// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#define Dtype float + +__kernel void permute(const int nthreads, + __global Dtype* bottom_data, + global int* permute_order, + global int* oldStride, + global int* newStride, + const int num_axes, + __global Dtype* top_data) +{ + for (int i = get_global_id(0); i < nthreads; i += get_global_size(0)) + { + int oldPosition = 0; + int newPosition = i; + + for (int j = 0; j < num_axes; ++j) + { + int order = permute_order[j]; + oldPosition += (newPosition / newStride[j]) * oldStride[order]; + newPosition %= newStride[j]; + } + + top_data[i] = bottom_data[oldPosition]; + } +} diff --git a/modules/dnn/src/opencl/region.cl b/modules/dnn/src/opencl/region.cl new file mode 100644 index 0000000000..d33ac782c4 --- /dev/null +++ b/modules/dnn/src/opencl/region.cl @@ -0,0 +1,109 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#define Dtype float + +__kernel void logistic_activ(const int count, + __global const Dtype* src, + const int cell_size, + __global Dtype* dst) +{ + for (int i = get_global_id(0); i < count; i += get_global_size(0)) + { + int index = cell_size * i; + Dtype x = src[index + 4]; + dst[index + 4] = 1.f / (1.f + exp(-x)); + } +} + +__kernel void softmax_activ(const int count, + __global const Dtype* src, + __global const Dtype* biasData, + const int cell_size, + const int classes, + const int classfix, + const int rows, + const int cols, + const int anchors, + const float thresh, + __global Dtype* dst) +{ + for (int index = get_global_id(0); index < count; index += get_global_size(0)) + { + int box_index = index * cell_size; + float largest = -FLT_MAX; + __global const Dtype *input = src + box_index + 5; + __global Dtype *output = dst + box_index + 5; + + for (int i = 0; i < classes; ++i) + largest = fmax(largest, input[i]); + + float sum = 0; + for (int i = 0; i < classes; ++i) + { + float e = exp((input[i] - largest)); + sum += e; + output[i] = e; + } + + int y = index / anchors / cols; + int x = index / anchors % cols; + int a = index - anchors * (x + y * cols); + float scale = dst[box_index + 4]; + if (classfix == -1 && scale < .5) scale = 0; + + float v1 = src[box_index + 0]; + float v2 = src[box_index + 1]; + float l1 = 1.f / (1.f + exp(-v1)); + float l2 = 1.f / (1.f + exp(-v2)); + + dst[box_index + 0] = (x + l1) / cols; + dst[box_index + 1] = (y + l2) / rows; + dst[box_index + 2] = exp(src[box_index + 2]) * biasData[2 * a] / cols; + dst[box_index + 3] = exp(src[box_index + 3]) * biasData[2 * a + 1] / rows; + + for (int i = 0; i < classes; ++i) + { + float prob = scale * output[i] / sum; + output[i] = (prob > thresh) ? prob : 0; + } + } +} diff --git a/modules/dnn/src/opencl/reorg.cl b/modules/dnn/src/opencl/reorg.cl new file mode 100644 index 0000000000..a4b9caea84 --- /dev/null +++ b/modules/dnn/src/opencl/reorg.cl @@ -0,0 +1,63 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +__kernel void reorg(const int count, + __global const Dtype* src, + const int channels, + const int height, + const int width, + const int reorgStride, + __global Dtype* dst) +{ + for (int index = get_global_id(0); index < count; index += get_global_size(0)) + { + int k = index / (height * width); + int j = (index - (k * height * width)) / width; + int i = (index - (k * height * width)) % width; + int out_c = channels / (reorgStride*reorgStride); + int c2 = k % out_c; + int offset = k / out_c; + int w2 = i*reorgStride + offset % reorgStride; + int h2 = j*reorgStride + offset / reorgStride; + int in_index = w2 + width*reorgStride*(h2 + height*reorgStride*c2); + dst[index] = src[in_index]; + } +}