diff --git a/modules/dnn/src/layers/concat_layer.cpp b/modules/dnn/src/layers/concat_layer.cpp index 04d7c7fcc0..a38e3baa9e 100644 --- a/modules/dnn/src/layers/concat_layer.cpp +++ b/modules/dnn/src/layers/concat_layer.cpp @@ -52,6 +52,7 @@ #ifdef HAVE_OPENCL #include "opencl_kernels_dnn.hpp" +#include "../ocl4dnn/include/common.hpp" #endif #ifdef HAVE_CUDA @@ -235,8 +236,6 @@ public: { std::vector inputs; std::vector outputs; - - bool use_half = (inps.depth() == CV_16F); inps.getUMatVector(inputs); outs.getUMatVector(outputs); @@ -250,8 +249,9 @@ public: int num_concats = total(shape(inputs[0]), 0, cAxis); int offset_concat_axis = 0; UMat& outMat = outputs[0]; - String buildopt = format(" -DDtype=%s", (use_half) ? "half" : "float"); - String kname = format("concat_%s", use_half ? "half" : "float"); + String matType = matTypeToOclType(inputs[0].type()); + String buildopt = " -DDtype=" + matType; + String kname = "concat_" + matType; for (size_t i = 0; i < inputs.size(); i++) { @@ -287,8 +287,7 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && - (inputs_arr.depth() == CV_32F || inputs_arr.depth() == CV_16F), + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), forward_ocl(inputs_arr, outputs_arr, internals_arr)) std::vector inputs, outputs; diff --git a/modules/dnn/src/layers/permute_layer.cpp b/modules/dnn/src/layers/permute_layer.cpp index 0a3315e4e7..7c6de8c277 100644 --- a/modules/dnn/src/layers/permute_layer.cpp +++ b/modules/dnn/src/layers/permute_layer.cpp @@ -337,11 +337,13 @@ public: mnew_stride.copyTo(unew_stride); } - bool use_half = (inps.depth() == CV_16F); - String opts = format("-DDtype=%s", use_half ? "half" : "float"); for (size_t i = 0; i < inputs.size(); i++) { - ocl::Kernel kernel("permute", ocl::dnn::permute_oclsrc, opts); + String matType = matTypeToOclType(inputs[0].type()); + String opts = " -DDtype=" + matType; + String kname = "permute_" + matType; + + ocl::Kernel kernel(kname.c_str(), ocl::dnn::permute_oclsrc, opts); kernel.set(0, (int)_count); kernel.set(1, ocl::KernelArg::PtrReadOnly(inputs[i])); @@ -364,9 +366,7 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && - inputs_arr.depth() != CV_8S && inputs_arr.depth() != CV_8U && - inputs_arr.depth() != CV_Bool && inputs_arr.depth() != CV_64S, + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), forward_ocl(inputs_arr, outputs_arr, internals_arr)) std::vector inputs, outputs; diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 7e088e9735..45e8a1026b 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -315,25 +315,11 @@ public: CV_Assert_N(inputs.size() == 1, !outputs.empty(), !computeMaxIdx || outputs.size() == 2); UMat& inpMat = inputs[0]; UMat& outMat = outputs[0]; - UMat maskMat; - if (computeMaxIdx) - maskMat.create(shape(outputs[1]), use_half ? CV_16F : CV_32F); + UMat maskMat = computeMaxIdx ? outputs[1] : UMat(); CV_Assert(inpMat.offset == 0 && outMat.offset == 0); - bool result = poolOp->Forward(inpMat, outMat, maskMat); - - if (computeMaxIdx) { - if (use_half) { - UMat maskMat32F; - maskMat.convertTo(maskMat32F, CV_32F); - maskMat32F.convertTo(outputs[1], CV_64S); - } - else - maskMat.convertTo(outputs[1], CV_64S); - } - - return result; + return poolOp->Forward(inpMat, outMat, maskMat); } #endif diff --git a/modules/dnn/src/layers/reorg_layer.cpp b/modules/dnn/src/layers/reorg_layer.cpp index 04b0a3b045..fc120427bd 100644 --- a/modules/dnn/src/layers/reorg_layer.cpp +++ b/modules/dnn/src/layers/reorg_layer.cpp @@ -195,7 +195,7 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && inputs_arr.depth() != CV_32S && inputs_arr.depth() != CV_64S, + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), forward_ocl(inputs_arr, outputs_arr, internals_arr)) if (inputs_arr.depth() == CV_16F) diff --git a/modules/dnn/src/layers/reshape_layer.cpp b/modules/dnn/src/layers/reshape_layer.cpp index e2fc7a9b30..1c31a36f98 100644 --- a/modules/dnn/src/layers/reshape_layer.cpp +++ b/modules/dnn/src/layers/reshape_layer.cpp @@ -331,7 +331,7 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && inputs_arr.depth() != CV_32S && inputs_arr.depth() != CV_64S, + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), forward_ocl(inputs_arr, outputs_arr, internals_arr)) std::vector inputs, outputs; diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp index b6340a3efc..e34cffb936 100644 --- a/modules/dnn/src/layers/slice_layer.cpp +++ b/modules/dnn/src/layers/slice_layer.cpp @@ -621,8 +621,7 @@ public: inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); - CV_OCL_RUN((IS_DNN_OPENCL_TARGET(preferableTarget) && - (outputs[0].type() != CV_32S && outputs[0].type() != CV_64S)), + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), forward_ocl(inputs_arr, outputs_arr, internals_arr)) const Mat& inpMat = inputs[0]; diff --git a/modules/dnn/src/ocl4dnn/include/common.hpp b/modules/dnn/src/ocl4dnn/include/common.hpp index 2af6d69803..bfaeadc663 100644 --- a/modules/dnn/src/ocl4dnn/include/common.hpp +++ b/modules/dnn/src/ocl4dnn/include/common.hpp @@ -55,4 +55,6 @@ bool clOptionSupport(cv::String option); +cv::String matTypeToOclType(int cvMatType); + #endif diff --git a/modules/dnn/src/ocl4dnn/src/common.cpp b/modules/dnn/src/ocl4dnn/src/common.cpp index 7ca196b355..1c73e80112 100644 --- a/modules/dnn/src/ocl4dnn/src/common.cpp +++ b/modules/dnn/src/ocl4dnn/src/common.cpp @@ -52,3 +52,21 @@ bool clOptionSupport(cv::String option) ocl::Program program = ocl::Context::getDefault().getProg(ocl::dnn::dummy_oclsrc, option, errmsg); return program.ptr() ? true : false; } + +cv::String matTypeToOclType(int cvMatType) +{ + cv::String oclType; + switch(cvMatType) + { + case CV_16F: oclType = "half"; break; + case CV_32F: oclType = "float"; break; + case CV_Bool: oclType = "bool"; break; + case CV_8U: oclType = "uchar"; break; + case CV_8S: oclType = "char"; break; + case CV_32S: oclType = "int"; break; + case CV_64S: oclType = "long"; break; + default: + CV_Error(Error::StsBadArg, "Unsupported mat type"); + } + return oclType; +} diff --git a/modules/dnn/src/opencl/ocl4dnn_pooling.cl b/modules/dnn/src/opencl/ocl4dnn_pooling.cl index 53c61e4bd2..93fdbb6ce5 100644 --- a/modules/dnn/src/opencl/ocl4dnn_pooling.cl +++ b/modules/dnn/src/opencl/ocl4dnn_pooling.cl @@ -61,7 +61,7 @@ __kernel void const int pooled_height, const int pooled_width, __global Dtype* top_data #ifdef HAVE_MASK - , __global Dtype* mask + , __global long* mask #endif ) { diff --git a/modules/dnn/src/opencl/permute.cl b/modules/dnn/src/opencl/permute.cl index 9e709f201c..0b7be8f2f4 100644 --- a/modules/dnn/src/opencl/permute.cl +++ b/modules/dnn/src/opencl/permute.cl @@ -44,13 +44,16 @@ #pragma OPENCL EXTENSION cl_khr_fp16 : enable #endif -__kernel void permute(const int nthreads, - __global Dtype* bottom_data, - global int* permute_order, - global int* oldStride, - global int* newStride, - const int num_axes, - __global Dtype* top_data) +#define CONCAT(A,B) A##_##B +#define TEMPLATE(name,type) CONCAT(name,type) + +__kernel void TEMPLATE(permute, Dtype)(const int nthreads, + __global Dtype* bottom_data, + global int* permute_order, + global int* oldStride, + global int* newStride, + const int num_axes, + __global Dtype* top_data) { for (int i = get_global_id(0); i < nthreads; i += get_global_size(0)) {