diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 9fde7ad4c4..c70ce2b50a 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -600,6 +600,42 @@ CV__DNN_INLINE_NS_BEGIN static Ptr create(const LayerParams ¶ms); }; + class CV_EXPORTS CeilLayer : public ActivationLayer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS FloorLayer : public ActivationLayer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS LogLayer : public ActivationLayer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS RoundLayer : public ActivationLayer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS SqrtLayer : public ActivationLayer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS NotLayer : public ActivationLayer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + class CV_EXPORTS ActivationLayerInt8 : public ActivationLayer { public: @@ -665,6 +701,7 @@ CV__DNN_INLINE_NS_BEGIN public: bool hasBias; int axis; + String mode; static Ptr create(const LayerParams& params); }; @@ -689,6 +726,12 @@ CV__DNN_INLINE_NS_BEGIN static Ptr create(const LayerParams& params); }; + class CV_EXPORTS CompareLayer : public Layer + { + public: + static Ptr create(const LayerParams& params); + }; + class CV_EXPORTS DataAugmentationLayer : public Layer { public: diff --git a/modules/dnn/src/cuda/activations.cu b/modules/dnn/src/cuda/activations.cu index 599d58852e..c38fa0346f 100644 --- a/modules/dnn/src/cuda/activations.cu +++ b/modules/dnn/src/cuda/activations.cu @@ -128,6 +128,36 @@ void bnll(const Stream& stream, Span output, View input) { generic_op>(stream, output, input); } +template +void ceil(const Stream& stream, Span output, View input) { + generic_op>(stream, output, input); +} + +template +void floor(const Stream& stream, Span output, View input) { + generic_op>(stream, output, input); +} + +template +void log(const Stream& stream, Span output, View input) { + generic_op>(stream, output, input); +} + +template +void rint(const Stream& stream, Span output, View input) { + generic_op>(stream, output, input); +} + +template +void sqrt(const Stream& stream, Span output, View input) { + generic_op>(stream, output, input); +} + +template +void not_k(const Stream& stream, Span output, View input) { + generic_op>(stream, output, input); +} + template void abs(const Stream& stream, Span output, View input) { generic_op>(stream, output, input); @@ -160,6 +190,12 @@ template void sigmoid<__half>(const Stream&, Span<__half>, View<__half>); template void elu<__half>(const Stream&, Span<__half>, View<__half>); template void abs<__half>(const Stream& stream, Span<__half> output, View<__half> input); template void bnll<__half>(const Stream&, Span<__half>, View<__half>); +template void ceil<__half>(const Stream&, Span<__half>, View<__half>); +template void floor<__half>(const Stream&, Span<__half>, View<__half>); +template void log<__half>(const Stream&, Span<__half>, View<__half>); +template void rint<__half>(const Stream&, Span<__half>, View<__half>); +template void sqrt<__half>(const Stream&, Span<__half>, View<__half>); +template void not_k<__half>(const Stream&, Span<__half>, View<__half>); template void power<__half>(const Stream&, Span<__half>, View<__half>, __half, __half, __half); template void exp<__half>(const Stream&, Span<__half>, View<__half>, __half, __half); #endif @@ -174,6 +210,12 @@ template void sigmoid(const Stream&, Span, View); template void elu(const Stream&, Span, View); template void abs(const Stream& stream, Span output, View input); template void bnll(const Stream&, Span, View); +template void ceil(const Stream&, Span, View); +template void floor(const Stream&, Span, View); +template void log(const Stream&, Span, View); +template void rint(const Stream&, Span, View); +template void sqrt(const Stream&, Span, View); +template void not_k(const Stream&, Span, View); template void power(const Stream&, Span, View, float, float, float); template void exp(const Stream&, Span, View, float, float); diff --git a/modules/dnn/src/cuda/functors.hpp b/modules/dnn/src/cuda/functors.hpp index f01a07c77e..04b545acaf 100644 --- a/modules/dnn/src/cuda/functors.hpp +++ b/modules/dnn/src/cuda/functors.hpp @@ -209,6 +209,96 @@ struct BNLLFunctor { } }; +template +struct CeilFunctor { + struct Params { + CUDA4DNN_HOST_DEVICE Params() { } + }; + + CUDA4DNN_DEVICE CeilFunctor() { } + CUDA4DNN_DEVICE CeilFunctor(const Params& params) { } + + CUDA4DNN_DEVICE T operator()(T value) { + using csl::device::ceil; + return ceil(value); + } +}; + +template +struct FloorFunctor { + struct Params { + CUDA4DNN_HOST_DEVICE Params() { } + }; + + CUDA4DNN_DEVICE FloorFunctor() { } + CUDA4DNN_DEVICE FloorFunctor(const Params& params) { } + + CUDA4DNN_DEVICE T operator()(T value) { + using csl::device::floor; + return floor(value); + } +}; + +template +struct LogFunctor { + struct Params { + CUDA4DNN_HOST_DEVICE Params() { } + }; + + CUDA4DNN_DEVICE LogFunctor() { } + CUDA4DNN_DEVICE LogFunctor(const Params& params) { } + + CUDA4DNN_DEVICE T operator()(T value) { + using csl::device::log; + return log(value); + } +}; + +template +struct RintFunctor { + struct Params { + CUDA4DNN_HOST_DEVICE Params() { } + }; + + CUDA4DNN_DEVICE RintFunctor() { } + CUDA4DNN_DEVICE RintFunctor(const Params& params) { } + + CUDA4DNN_DEVICE T operator()(T value) { + using csl::device::rint; + return rint(value); + } +}; + +template +struct SqrtFunctor { + struct Params { + CUDA4DNN_HOST_DEVICE Params() { } + }; + + CUDA4DNN_DEVICE SqrtFunctor() { } + CUDA4DNN_DEVICE SqrtFunctor(const Params& params) { } + + CUDA4DNN_DEVICE T operator()(T value) { + using csl::device::sqrt; + return sqrt(value); + } +}; + +template +struct NotFunctor { + struct Params { + CUDA4DNN_HOST_DEVICE Params() { } + }; + + CUDA4DNN_DEVICE NotFunctor() { } + CUDA4DNN_DEVICE NotFunctor(const Params& params) { } + + CUDA4DNN_DEVICE T operator()(T value) { + using csl::device::floor; + return floor(static_cast(1.) - value); + } +}; + template struct PowerFunctor { struct Params { diff --git a/modules/dnn/src/cuda/math.hpp b/modules/dnn/src/cuda/math.hpp index 273f3fe98e..0da584197d 100644 --- a/modules/dnn/src/cuda/math.hpp +++ b/modules/dnn/src/cuda/math.hpp @@ -119,6 +119,27 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace de template <> inline __device__ __half round(__half value) { return hrint(value); } #endif + template __device__ T floor(T value); + template <> inline __device__ double floor(double value) { return ::floor(value); } + template <> inline __device__ float floor(float value) { return floorf(value); } +#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530) + template <> inline __device__ __half floor(__half value) { return hfloor(value); } +#endif + + template __device__ T log(T value); + template <> inline __device__ double log(double value) { return ::log(value); } + template <> inline __device__ float log(float value) { return logf(value); } +#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530) + template <> inline __device__ __half log(__half value) { return hlog(value); } +#endif + + template __device__ T rint(T value); + template <> inline __device__ double rint(double value) { return ::rint(value); } + template <> inline __device__ float rint(float value) { return rintf(value); } +#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530) + template <> inline __device__ __half rint(__half value) { return hrint(value); } +#endif + template __device__ T ceil(T value); template <> inline __device__ double ceil(double value) { return ::ceil(value); } template <> inline __device__ float ceil(float value) { return ceilf(value); } diff --git a/modules/dnn/src/cuda4dnn/kernels/activations.hpp b/modules/dnn/src/cuda4dnn/kernels/activations.hpp index 0a7c9878fb..0fcf7dab8a 100644 --- a/modules/dnn/src/cuda4dnn/kernels/activations.hpp +++ b/modules/dnn/src/cuda4dnn/kernels/activations.hpp @@ -42,6 +42,24 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { template void bnll(const csl::Stream& stream, csl::Span output, csl::View input); + template + void ceil(const csl::Stream& stream, csl::Span output, csl::View input); + + template + void floor(const csl::Stream& stream, csl::Span output, csl::View input); + + template + void log(const csl::Stream& stream, csl::Span output, csl::View input); + + template + void rint(const csl::Stream& stream, csl::Span output, csl::View input); + + template + void sqrt(const csl::Stream& stream, csl::Span output, csl::View input); + + template + void not_k(const csl::Stream& stream, csl::Span output, csl::View input); + template void power(const csl::Stream& stream, csl::Span output, csl::View input, T exp, T scale, T shift); diff --git a/modules/dnn/src/cuda4dnn/primitives/activation.hpp b/modules/dnn/src/cuda4dnn/primitives/activation.hpp index 84b95927a3..a179db2da5 100644 --- a/modules/dnn/src/cuda4dnn/primitives/activation.hpp +++ b/modules/dnn/src/cuda4dnn/primitives/activation.hpp @@ -18,14 +18,12 @@ namespace cv { namespace dnn { namespace cuda4dnn { - template - class ReLUOp final : public CUDABackendNode { - public: + template class Op, class T> + struct BaseOp : public CUDABackendNode + { + protected: using wrapper_type = GetCUDABackendWrapperType; - ReLUOp(csl::Stream stream_, T slope_) - : stream(std::move(stream_)), slope{ slope_ } { } - void forward( const std::vector>& inputs, const std::vector>& outputs, @@ -39,9 +37,21 @@ namespace cv { namespace dnn { namespace cuda4dnn { auto output_wrapper = outputs[i].dynamicCast(); auto output = output_wrapper->getSpan(); - kernels::relu(stream, output, input, slope); + static_cast*>(this)->calculate(output, input); } } + }; + + template + class ReLUOp final : public BaseOp { + public: + ReLUOp(csl::Stream stream_, T slope_) + : stream(std::move(stream_)), slope{ slope_ } { } + + void calculate(csl::TensorSpan output, csl::TensorView input) const + { + kernels::relu(stream, output, input, slope); + } private: csl::Stream stream; @@ -49,28 +59,14 @@ namespace cv { namespace dnn { namespace cuda4dnn { }; template - class ClippedReLUOp final : public CUDABackendNode { + class ClippedReLUOp final : public BaseOp { public: - using wrapper_type = GetCUDABackendWrapperType; - ClippedReLUOp(csl::Stream stream_, T min_, T max_) : stream(std::move(stream_)), min{ min_ }, max{ max_ } { } - void forward( - const std::vector>& inputs, - const std::vector>& outputs, - csl::Workspace& workspace) override + void calculate(csl::TensorSpan output, csl::TensorView input) const { - for (int i = 0; i < inputs.size(); i++) - { - auto input_wrapper = inputs[i].dynamicCast(); - auto input = input_wrapper->getView(); - - auto output_wrapper = outputs[i].dynamicCast(); - auto output = output_wrapper->getSpan(); - - kernels::clipped_relu(stream, output, input, min, max); - } + kernels::clipped_relu(stream, output, input, min, max); } private: @@ -79,35 +75,21 @@ namespace cv { namespace dnn { namespace cuda4dnn { }; template - class ChannelwiseReLUOp final : public CUDABackendNode { + class ChannelwiseReLUOp final : public BaseOp { public: - using wrapper_type = GetCUDABackendWrapperType; - ChannelwiseReLUOp(csl::Stream stream_, const Mat& slope) - : stream(std::move(stream_)) + : stream(std::move(stream_)) { CV_Assert(!slope.empty()); slopeTensor = csl::makeTensorHeader(slope); csl::copyMatToTensor(slope, slopeTensor, stream); } - void forward( - const std::vector>& inputs, - const std::vector>& outputs, - csl::Workspace& workspace) override + void calculate(csl::TensorSpan output, csl::TensorView input) const { - for (int i = 0; i < inputs.size(); i++) - { - auto input_wrapper = inputs[i].dynamicCast(); - auto input = input_wrapper->getView(); - - auto output_wrapper = outputs[i].dynamicCast(); - auto output = output_wrapper->getSpan(); - - CV_Assert(input.get_axis_size(1) == slopeTensor.size()); - std::size_t inner_size = input.size_range(2, input.rank()); - kernels::axiswise_relu(stream, output, input, inner_size, slopeTensor); - } + CV_Assert(input.get_axis_size(1) == slopeTensor.size()); + std::size_t inner_size = input.size_range(2, input.rank()); + kernels::axiswise_relu(stream, output, input, inner_size, slopeTensor); } private: @@ -116,27 +98,13 @@ namespace cv { namespace dnn { namespace cuda4dnn { }; template - class TanHOp final : public CUDABackendNode { + class TanHOp final : public BaseOp { public: - using wrapper_type = GetCUDABackendWrapperType; - TanHOp(csl::Stream stream_) : stream(std::move(stream_)) { } - void forward( - const std::vector>& inputs, - const std::vector>& outputs, - csl::Workspace& workspace) override + void calculate(csl::TensorSpan output, csl::TensorView input) const { - for (int i = 0; i < inputs.size(); i++) - { - auto input_wrapper = inputs[i].dynamicCast(); - auto input = input_wrapper->getView(); - - auto output_wrapper = outputs[i].dynamicCast(); - auto output = output_wrapper->getSpan(); - - kernels::tanh(stream, output, input); - } + kernels::tanh(stream, output, input); } private: @@ -144,27 +112,13 @@ namespace cv { namespace dnn { namespace cuda4dnn { }; template - class SwishOp final : public CUDABackendNode { + class SwishOp final : public BaseOp { public: - using wrapper_type = GetCUDABackendWrapperType; - SwishOp(csl::Stream stream_) : stream(std::move(stream_)) { } - void forward( - const std::vector>& inputs, - const std::vector>& outputs, - csl::Workspace& workspace) override + void calculate(csl::TensorSpan output, csl::TensorView input) const { - for (int i = 0; i < inputs.size(); i++) - { - auto input_wrapper = inputs[i].dynamicCast(); - auto input = input_wrapper->getView(); - - auto output_wrapper = outputs[i].dynamicCast(); - auto output = output_wrapper->getSpan(); - - kernels::swish(stream, output, input); - } + kernels::swish(stream, output, input); } private: @@ -172,27 +126,13 @@ namespace cv { namespace dnn { namespace cuda4dnn { }; template - class MishOp final : public CUDABackendNode { + class MishOp final : public BaseOp { public: - using wrapper_type = GetCUDABackendWrapperType; - MishOp(csl::Stream stream_) : stream(std::move(stream_)) { } - void forward( - const std::vector>& inputs, - const std::vector>& outputs, - csl::Workspace& workspace) override + void calculate(csl::TensorSpan output, csl::TensorView input) const { - for (int i = 0; i < inputs.size(); i++) - { - auto input_wrapper = inputs[i].dynamicCast(); - auto input = input_wrapper->getView(); - - auto output_wrapper = outputs[i].dynamicCast(); - auto output = output_wrapper->getSpan(); - - kernels::mish(stream, output, input); - } + kernels::mish(stream, output, input); } private: @@ -200,27 +140,13 @@ namespace cv { namespace dnn { namespace cuda4dnn { }; template - class SigmoidOp final : public CUDABackendNode { + class SigmoidOp final : public BaseOp { public: - using wrapper_type = GetCUDABackendWrapperType; - SigmoidOp(csl::Stream stream_) : stream(std::move(stream_)) { } - void forward( - const std::vector>& inputs, - const std::vector>& outputs, - csl::Workspace& workspace) override + void calculate(csl::TensorSpan output, csl::TensorView input) const { - for (int i = 0; i < inputs.size(); i++) - { - auto input_wrapper = inputs[i].dynamicCast(); - auto input = input_wrapper->getView(); - - auto output_wrapper = outputs[i].dynamicCast(); - auto output = output_wrapper->getSpan(); - - kernels::sigmoid(stream, output, input); - } + kernels::sigmoid(stream, output, input); } private: @@ -228,27 +154,27 @@ namespace cv { namespace dnn { namespace cuda4dnn { }; template - class ELUOp final : public CUDABackendNode { + class ELUOp final : public BaseOp { public: - using wrapper_type = GetCUDABackendWrapperType; - ELUOp(csl::Stream stream_) : stream(std::move(stream_)) { } - void forward( - const std::vector>& inputs, - const std::vector>& outputs, - csl::Workspace& workspace) override + void calculate(csl::TensorSpan output, csl::TensorView input) const { - for (int i = 0; i < inputs.size(); i++) - { - auto input_wrapper = inputs[i].dynamicCast(); - auto input = input_wrapper->getView(); + kernels::elu(stream, output, input); + } - auto output_wrapper = outputs[i].dynamicCast(); - auto output = output_wrapper->getSpan(); + private: + csl::Stream stream; + }; - kernels::elu(stream, output, input); - } + template + class AbsValOp final : public BaseOp { + public: + AbsValOp(csl::Stream stream_) : stream(std::move(stream_)) { } + + void calculate(csl::TensorSpan output, csl::TensorView input) const + { + kernels::abs(stream, output, input); } private: @@ -256,27 +182,41 @@ namespace cv { namespace dnn { namespace cuda4dnn { }; template - class AbsValOp final : public CUDABackendNode { + class BNLLOp final : public BaseOp { public: - using wrapper_type = GetCUDABackendWrapperType; + BNLLOp(csl::Stream stream_) : stream(std::move(stream_)) { } - AbsValOp(csl::Stream stream_) : stream(std::move(stream_)) { } + void calculate(csl::TensorSpan output, csl::TensorView input) const + { + kernels::bnll(stream, output, input); + } - void forward( - const std::vector>& inputs, - const std::vector>& outputs, - csl::Workspace& workspace) override + private: + csl::Stream stream; + }; + + template + class CeilOp final : public BaseOp { + public: + CeilOp(csl::Stream stream_) : stream(std::move(stream_)) { } + + void calculate(csl::TensorSpan output, csl::TensorView input) const { - for (int i = 0; i < inputs.size(); i++) - { - auto input_wrapper = inputs[i].dynamicCast(); - auto input = input_wrapper->getView(); + kernels::ceil(stream, output, input); + } - auto output_wrapper = outputs[i].dynamicCast(); - auto output = output_wrapper->getSpan(); + private: + csl::Stream stream; + }; - kernels::abs(stream, output, input); - } + template + class FloorOp final : public BaseOp { + public: + FloorOp(csl::Stream stream_) : stream(std::move(stream_)) { } + + void calculate(csl::TensorSpan output, csl::TensorView input) const + { + kernels::floor(stream, output, input); } private: @@ -284,27 +224,41 @@ namespace cv { namespace dnn { namespace cuda4dnn { }; template - class BNLLOp final : public CUDABackendNode { + class LogOp final : public BaseOp { public: - using wrapper_type = GetCUDABackendWrapperType; + LogOp(csl::Stream stream_) : stream(std::move(stream_)) { } - BNLLOp(csl::Stream stream_) : stream(std::move(stream_)) { } + void calculate(csl::TensorSpan output, csl::TensorView input) const + { + kernels::log(stream, output, input); + } - void forward( - const std::vector>& inputs, - const std::vector>& outputs, - csl::Workspace& workspace) override + private: + csl::Stream stream; + }; + + template + class RoundOp final : public BaseOp { + public: + RoundOp(csl::Stream stream_) : stream(std::move(stream_)) { } + + void calculate(csl::TensorSpan output, csl::TensorView input) const { - for (int i = 0; i < inputs.size(); i++) - { - auto input_wrapper = inputs[i].dynamicCast(); - auto input = input_wrapper->getView(); + kernels::rint(stream, output, input); + } - auto output_wrapper = outputs[i].dynamicCast(); - auto output = output_wrapper->getSpan(); + private: + csl::Stream stream; + }; - kernels::bnll(stream, output, input); - } + template + class SqrtOp final : public BaseOp { + public: + SqrtOp(csl::Stream stream_) : stream(std::move(stream_)) { } + + void calculate(csl::TensorSpan output, csl::TensorView input) const + { + kernels::sqrt(stream, output, input); } private: @@ -312,28 +266,28 @@ namespace cv { namespace dnn { namespace cuda4dnn { }; template - class PowerOp final : public CUDABackendNode { + class NotOp final : public BaseOp { public: - using wrapper_type = GetCUDABackendWrapperType; + NotOp(csl::Stream stream_) : stream(std::move(stream_)) { } + + void calculate(csl::TensorSpan output, csl::TensorView input) const + { + kernels::not_k(stream, output, input); + } + private: + csl::Stream stream; + }; + + template + class PowerOp final : public BaseOp { + public: PowerOp(csl::Stream stream_, T exp_, T scale_, T shift_) : stream(std::move(stream_)), exp{ exp_ }, scale{ scale_ }, shift{ shift_ } { } - void forward( - const std::vector>& inputs, - const std::vector>& outputs, - csl::Workspace& workspace) override + void calculate(csl::TensorSpan output, csl::TensorView input) const { - for (int i = 0; i < inputs.size(); i++) - { - auto input_wrapper = inputs[i].dynamicCast(); - auto input = input_wrapper->getView(); - - auto output_wrapper = outputs[i].dynamicCast(); - auto output = output_wrapper->getSpan(); - - kernels::power(stream, output, input, exp, scale, shift); - } + kernels::power(stream, output, input, exp, scale, shift); } private: @@ -342,28 +296,14 @@ namespace cv { namespace dnn { namespace cuda4dnn { }; template - class ExpOp final : public CUDABackendNode { + class ExpOp final : public BaseOp { public: - using wrapper_type = GetCUDABackendWrapperType; - ExpOp(csl::Stream stream_, T nScale_, T nShift_) : stream(std::move(stream_)), normScale{ nScale_ }, normShift{ nShift_ } { } - void forward( - const std::vector>& inputs, - const std::vector>& outputs, - csl::Workspace& workspace) override + void calculate(csl::TensorSpan output, csl::TensorView input) const { - for (int i = 0; i < inputs.size(); i++) - { - auto input_wrapper = inputs[i].dynamicCast(); - auto input = input_wrapper->getView(); - - auto output_wrapper = outputs[i].dynamicCast(); - auto output = output_wrapper->getSpan(); - - kernels::exp(stream, output, input, normScale, normShift); - } + kernels::exp(stream, output, input, normScale, normShift); } private: diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp index 123cb170b7..affaa1a7e1 100644 --- a/modules/dnn/src/init.cpp +++ b/modules/dnn/src/init.cpp @@ -111,6 +111,12 @@ void initializeLayerFactory() CV_DNN_REGISTER_LAYER_CLASS(AbsVal, AbsLayer); CV_DNN_REGISTER_LAYER_CLASS(Power, PowerLayer); CV_DNN_REGISTER_LAYER_CLASS(Exp, ExpLayer); + CV_DNN_REGISTER_LAYER_CLASS(Ceil, CeilLayer); + CV_DNN_REGISTER_LAYER_CLASS(Floor, FloorLayer); + CV_DNN_REGISTER_LAYER_CLASS(Log, LogLayer); + CV_DNN_REGISTER_LAYER_CLASS(Round, RoundLayer); + CV_DNN_REGISTER_LAYER_CLASS(Sqrt, SqrtLayer); + CV_DNN_REGISTER_LAYER_CLASS(Not, NotLayer); CV_DNN_REGISTER_LAYER_CLASS(BatchNorm, BatchNormLayer); CV_DNN_REGISTER_LAYER_CLASS(MaxUnpool, MaxUnpoolLayer); CV_DNN_REGISTER_LAYER_CLASS(Dropout, BlankLayer); @@ -133,6 +139,7 @@ void initializeLayerFactory() CV_DNN_REGISTER_LAYER_CLASS(Padding, PaddingLayer); CV_DNN_REGISTER_LAYER_CLASS(Proposal, ProposalLayer); CV_DNN_REGISTER_LAYER_CLASS(Scale, ScaleLayer); + CV_DNN_REGISTER_LAYER_CLASS(Compare, CompareLayer); CV_DNN_REGISTER_LAYER_CLASS(DataAugmentation, DataAugmentationLayer); CV_DNN_REGISTER_LAYER_CLASS(Correlation, CorrelationLayer); CV_DNN_REGISTER_LAYER_CLASS(Accum, AccumLayer); diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index 1b6483a5a0..c95dbbc933 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -49,7 +49,7 @@ #include "../op_vkcom.hpp" #include -#include +#include #ifdef HAVE_OPENCL #include "opencl_kernels_dnn.hpp" @@ -69,6 +69,11 @@ using std::abs; using std::exp; using std::tanh; using std::pow; +using std::ceil; +using std::floor; +using std::log; +using std::sqrt; +using std::round; template class ElementWiseLayer : public Func::Layer @@ -599,18 +604,9 @@ struct ReLU6Functor : public BaseFunctor int64 getFLOPSPerElement() const { return 2; } }; -struct TanHFunctor : public BaseFunctor +template +struct BaseDefaultFunctor : public BaseFunctor { - typedef TanHLayer Layer; - - bool supportBackend(int backendId, int) - { - return backendId == DNN_BACKEND_OPENCV || - backendId == DNN_BACKEND_CUDA || - backendId == DNN_BACKEND_HALIDE || - backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; - } - void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const { for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize ) @@ -618,7 +614,7 @@ struct TanHFunctor : public BaseFunctor for( int i = 0; i < len; i++ ) { float x = srcptr[i]; - dstptr[i] = tanh(x); + dstptr[i] = static_cast(this)->calculate(x); } } } @@ -638,45 +634,53 @@ struct TanHFunctor : public BaseFunctor UMat& src = inputs[i]; UMat& dst = outputs[i]; - ocl::Kernel kernel("TanHForward", ocl::dnn::activations_oclsrc, buildopt); - kernel.set(0, (int)src.total()); + ocl::Kernel kernel(ocl_kernel_name, ocl::dnn::activations_oclsrc, buildopt); + kernel.set(0, static_cast(src.total())); kernel.set(1, ocl::KernelArg::PtrReadOnly(src)); kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst)); + static_cast(this)->setKernelParams(kernel); size_t gSize = src.total(); - CV_Assert(kernel.run(1, &gSize, NULL, false)); + CV_Assert(kernel.run(1, &gSize, nullptr, false)); } return true; } #endif -#ifdef HAVE_CUDA - Ptr initCUDA(int target, csl::Stream stream) - { - return make_cuda_node(target, stream); - } -#endif + inline void setKernelParams(ocl::Kernel& kernel) const {} -#ifdef HAVE_HALIDE - void attachHalide(const Halide::Expr& input, Halide::Func& top) + bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) { - Halide::Var x("x"), y("y"), c("c"), n("n"); - top(x, y, c, n) = tanh(input); + float inpScale = scales[0][0], outScale = scales[1][0]; + int inpZp = zeropoints[0][0], outZp = zeropoints[1][0]; + + Mat lookUpTable(1, 256, CV_8S); + int8_t* table = lookUpTable.ptr(); + for (int i = -128; i < 128; i++) + { + float x = inpScale * static_cast(i - inpZp); + float y = static_cast(this)->calculate(x); + int quantized = outZp + static_cast(std::round(y/outScale)); + table[i+128] = saturate_cast(quantized); + } + params.blobs.clear(); + params.blobs.push_back(lookUpTable); + return true; } -#endif // HAVE_HALIDE #ifdef HAVE_DNN_IE_NN_BUILDER_2019 InferenceEngine::Builder::Layer initInfEngineBuilderAPI() { - return InferenceEngine::Builder::TanHLayer(""); + CV_Error(Error::StsNotImplemented, ""); } #endif // HAVE_DNN_IE_NN_BUILDER_2019 #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) { - return std::make_shared(node); + CV_Error(Error::StsNotImplemented, ""); } #endif // HAVE_DNN_NGRAPH @@ -688,84 +692,31 @@ struct TanHFunctor : public BaseFunctor } #endif // HAVE_VULKAN - bool tryQuantize(const std::vector > &scales, - const std::vector > &zeropoints, LayerParams& params) - { - float inpScale = scales[0][0], outScale = scales[1][0]; - int inpZp = zeropoints[0][0], outZp = zeropoints[1][0]; - - Mat lookUpTable(1, 256, CV_8S); - int8_t* table = lookUpTable.ptr(); - for (int i = -128; i < 128; i++) - { - float x = inpScale*(i - inpZp); - float y = tanh(x); - int quantized = outZp + (int)std::round(y/outScale); - table[i+128] = saturate_cast(quantized); - } - params.blobs.clear(); - params.blobs.push_back(lookUpTable); - return true; - } - - int64 getFLOPSPerElement() const { return 1; } +private: + static const char* const ocl_kernel_name; }; -struct SwishFunctor : public BaseFunctor +struct TanHFunctor : public BaseDefaultFunctor { - typedef SwishLayer Layer; + typedef TanHLayer Layer; bool supportBackend(int backendId, int) { return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || - backendId == DNN_BACKEND_HALIDE || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;; + backendId == DNN_BACKEND_HALIDE || + backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; } - void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const + inline float calculate(float x) const { - for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize ) - { - for( int i = 0; i < len; i++ ) - { - float x = srcptr[i]; - dstptr[i] = x / (1.0f + exp(-x)); - } - } + return tanh(x); } -#ifdef HAVE_OPENCL - bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) - { - std::vector inputs; - std::vector outputs; - - inps.getUMatVector(inputs); - outs.getUMatVector(outputs); - String buildopt = oclGetTMacro(inputs[0]); - - for (size_t i = 0; i < inputs.size(); i++) - { - UMat& src = inputs[i]; - UMat& dst = outputs[i]; - - ocl::Kernel kernel("SwishForward", ocl::dnn::activations_oclsrc, buildopt); - kernel.set(0, (int)src.total()); - kernel.set(1, ocl::KernelArg::PtrReadOnly(src)); - kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst)); - - size_t gSize = src.total(); - CV_Assert(kernel.run(1, &gSize, NULL, false)); - } - - return true; - } -#endif - #ifdef HAVE_CUDA Ptr initCUDA(int target, csl::Stream stream) { - return make_cuda_node(target, stream); + return make_cuda_node(target, stream); } #endif @@ -773,57 +724,76 @@ struct SwishFunctor : public BaseFunctor void attachHalide(const Halide::Expr& input, Halide::Func& top) { Halide::Var x("x"), y("y"), c("c"), n("n"); - top(x, y, c, n) = input / (1.0f + exp(-input)); + top(x, y, c, n) = tanh(input); } #endif // HAVE_HALIDE #ifdef HAVE_DNN_IE_NN_BUILDER_2019 InferenceEngine::Builder::Layer initInfEngineBuilderAPI() { - CV_Error(Error::StsNotImplemented, ""); + return InferenceEngine::Builder::TanHLayer(""); } #endif // HAVE_DNN_IE_NN_BUILDER_2019 #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) { - auto sigmoid = std::make_shared(node); - return std::make_shared(node, sigmoid); + return std::make_shared(node); } #endif // HAVE_DNN_NGRAPH -#ifdef HAVE_VULKAN - std::shared_ptr initVkCom() + int64 getFLOPSPerElement() const { return 1; } +}; + +template<> +const char* const TanHFunctor::BaseDefaultFunctor::ocl_kernel_name = "TanHForward"; + +struct SwishFunctor : public BaseDefaultFunctor +{ + typedef SwishLayer Layer; + + bool supportBackend(int backendId, int) { - // TODO: add vkcom implementation - return std::shared_ptr(); + return backendId == DNN_BACKEND_OPENCV || + backendId == DNN_BACKEND_CUDA || + backendId == DNN_BACKEND_HALIDE || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; } -#endif // HAVE_VULKAN - bool tryQuantize(const std::vector > &scales, - const std::vector > &zeropoints, LayerParams& params) + inline float calculate(float x) const { - float inpScale = scales[0][0], outScale = scales[1][0]; - int inpZp = zeropoints[0][0], outZp = zeropoints[1][0]; + return x / (1.f + exp(-x)); + } - Mat lookUpTable(1, 256, CV_8S); - int8_t* table = lookUpTable.ptr(); - for (int i = -128; i < 128; i++) - { - float x = inpScale*(i - inpZp); - float y = x / (1.0f + exp(-x)); - int quantized = outZp + (int)std::round(y/outScale); - table[i+128] = saturate_cast(quantized); - } - params.blobs.clear(); - params.blobs.push_back(lookUpTable); - return true; +#ifdef HAVE_CUDA + Ptr initCUDA(int target, csl::Stream stream) + { + return make_cuda_node(target, stream); + } +#endif + +#ifdef HAVE_HALIDE + void attachHalide(const Halide::Expr& input, Halide::Func& top) + { + Halide::Var x("x"), y("y"), c("c"), n("n"); + top(x, y, c, n) = input / (1.0f + exp(-input)); + } +#endif // HAVE_HALIDE + +#ifdef HAVE_DNN_NGRAPH + std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + { + auto sigmoid = std::make_shared(node); + return std::make_shared(node, sigmoid); } +#endif // HAVE_DNN_NGRAPH int64 getFLOPSPerElement() const { return 3; } }; -struct MishFunctor : public BaseFunctor +template<> +const char* const SwishFunctor::BaseDefaultFunctor::ocl_kernel_name = "SwishForward"; + +struct MishFunctor : public BaseDefaultFunctor { typedef MishLayer Layer; @@ -834,53 +804,18 @@ struct MishFunctor : public BaseFunctor backendId == DNN_BACKEND_HALIDE || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; } - void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const + inline float calculate(float x) const { - for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize ) + // Use fast approximation introduced in https://github.com/opencv/opencv/pull/17200 + if (x >= 8.f) { - for( int i = 0; i < len; i++ ) - { - // Use fast approximation introduced in https://github.com/opencv/opencv/pull/17200 - float x = srcptr[i]; - if (x >= 8.f) - dstptr[i] = x; - else - { - float eX = exp(x); - float n = (eX + 2) * eX; - dstptr[i] = (x * n) / (n + 2); - } - } - } - } - -#ifdef HAVE_OPENCL - bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) - { - std::vector inputs; - std::vector outputs; - - inps.getUMatVector(inputs); - outs.getUMatVector(outputs); - String buildopt = oclGetTMacro(inputs[0]); - - for (size_t i = 0; i < inputs.size(); i++) - { - UMat& src = inputs[i]; - UMat& dst = outputs[i]; - - ocl::Kernel kernel("MishForward", ocl::dnn::activations_oclsrc, buildopt); - kernel.set(0, (int)src.total()); - kernel.set(1, ocl::KernelArg::PtrReadOnly(src)); - kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst)); - - size_t gSize = src.total(); - CV_Assert(kernel.run(1, &gSize, NULL, false)); + return x; } - return true; + float eX = exp(x); + float n = (eX + 2.f) * eX; + return (x * n) / (n + 2.f); } -#endif #ifdef HAVE_CUDA Ptr initCUDA(int target, csl::Stream stream) @@ -897,13 +832,6 @@ struct MishFunctor : public BaseFunctor } #endif // HAVE_HALIDE -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - InferenceEngine::Builder::Layer initInfEngineBuilderAPI() - { - CV_Error(Error::StsNotImplemented, ""); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) { @@ -917,40 +845,13 @@ struct MishFunctor : public BaseFunctor } #endif // HAVE_DNN_NGRAPH -#ifdef HAVE_VULKAN - std::shared_ptr initVkCom() - { - // TODO: add vkcom implementation - return std::shared_ptr(); - } -#endif // HAVE_VULKAN - - bool tryQuantize(const std::vector > &scales, - const std::vector > &zeropoints, LayerParams& params) - { - float inpScale = scales[0][0], outScale = scales[1][0]; - int inpZp = zeropoints[0][0], outZp = zeropoints[1][0]; - - Mat lookUpTable(1, 256, CV_8S); - int8_t* table = lookUpTable.ptr(); - for (int i = -128; i < 128; i++) - { - float x = inpScale*(i - inpZp); - float eX = exp(x); - float n = (eX + 2) * eX; - float y = (x * n) / (n + 2); - int quantized = outZp + (int)std::round(y/outScale); - table[i+128] = saturate_cast(quantized); - } - params.blobs.clear(); - params.blobs.push_back(lookUpTable); - return true; - } - int64 getFLOPSPerElement() const { return 3; } }; -struct SigmoidFunctor : public BaseFunctor +template<> +const char* const MishFunctor::BaseDefaultFunctor::ocl_kernel_name = "MishForward"; + +struct SigmoidFunctor : public BaseDefaultFunctor { typedef SigmoidLayer Layer; @@ -962,45 +863,10 @@ struct SigmoidFunctor : public BaseFunctor backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; } - void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const - { - for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize ) - { - for( int i = 0; i < len; i++ ) - { - float x = srcptr[i]; - dstptr[i] = 1.f/(1.f + exp(-x)); - } - } - } - -#ifdef HAVE_OPENCL - bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) + inline float calculate(float x) const { - std::vector inputs; - std::vector outputs; - - inps.getUMatVector(inputs); - outs.getUMatVector(outputs); - String buildopt = oclGetTMacro(inputs[0]); - - for (size_t i = 0; i < inputs.size(); i++) - { - UMat& src = inputs[i]; - UMat& dst = outputs[i]; - - ocl::Kernel kernel("SigmoidForward", ocl::dnn::activations_oclsrc, buildopt); - kernel.set(0, (int)src.total()); - kernel.set(1, ocl::KernelArg::PtrReadOnly(src)); - kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst)); - - size_t gSize = src.total(); - CV_Assert(kernel.run(1, &gSize, NULL, false)); - } - - return true; + return 1.f / (1.f + exp(-x)); } -#endif #ifdef HAVE_CUDA Ptr initCUDA(int target, csl::Stream stream) @@ -1031,38 +897,13 @@ struct SigmoidFunctor : public BaseFunctor } #endif // HAVE_DNN_NGRAPH -#ifdef HAVE_VULKAN - std::shared_ptr initVkCom() - { - // TODO: add vkcom implementation - return std::shared_ptr(); - } -#endif // HAVE_VULKAN - - bool tryQuantize(const std::vector > &scales, - const std::vector > &zeropoints, LayerParams& params) - { - float inpScale = scales[0][0], outScale = scales[1][0]; - int inpZp = zeropoints[0][0], outZp = zeropoints[1][0]; - - Mat lookUpTable(1, 256, CV_8S); - int8_t* table = lookUpTable.ptr(); - for (int i = -128; i < 128; i++) - { - float x = inpScale*(i - inpZp); - float y = 1.f/(1.f + exp(-x)); - int quantized = outZp + (int)std::round(y/outScale); - table[i+128] = saturate_cast(quantized); - } - params.blobs.clear(); - params.blobs.push_back(lookUpTable); - return true; - } - int64 getFLOPSPerElement() const { return 3; } }; -struct ELUFunctor : public BaseFunctor +template<> +const char* const SigmoidFunctor::BaseDefaultFunctor::ocl_kernel_name = "SigmoidForward"; + +struct ELUFunctor : public BaseDefaultFunctor { typedef ELULayer Layer; @@ -1074,50 +915,70 @@ struct ELUFunctor : public BaseFunctor backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; } - void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const + inline float calculate(float x) const { - for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize ) - { - for(int i = 0; i < len; i++ ) - { - float x = srcptr[i]; - dstptr[i] = x >= 0.f ? x : exp(x) - 1; - } - } + return x >= 0.f ? x : exp(x) - 1.f; } -#ifdef HAVE_OPENCL - bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) +#ifdef HAVE_CUDA + Ptr initCUDA(int target, csl::Stream stream) { - std::vector inputs; - std::vector outputs; + return make_cuda_node(target, stream); + } +#endif - inps.getUMatVector(inputs); - outs.getUMatVector(outputs); - String buildopt = oclGetTMacro(inputs[0]); +#ifdef HAVE_HALIDE + void attachHalide(const Halide::Expr& input, Halide::Func& top) + { + Halide::Var x("x"), y("y"), c("c"), n("n"); + top(x, y, c, n) = select(input >= 0.0f, input, exp(input) - 1); + } +#endif // HAVE_HALIDE - for (size_t i = 0; i < inputs.size(); i++) - { - UMat& src = inputs[i]; - UMat& dst = outputs[i]; +#ifdef HAVE_DNN_IE_NN_BUILDER_2019 + InferenceEngine::Builder::Layer initInfEngineBuilderAPI() + { + return InferenceEngine::Builder::ELULayer(""); + } +#endif // HAVE_DNN_IE_NN_BUILDER_2019 - ocl::Kernel kernel("ELUForward", ocl::dnn::activations_oclsrc, buildopt); - kernel.set(0, (int)src.total()); - kernel.set(1, ocl::KernelArg::PtrReadOnly(src)); - kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst)); +#ifdef HAVE_DNN_NGRAPH + std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + { + return std::make_shared(node, 1.0); + } +#endif // HAVE_DNN_NGRAPH - size_t gSize = src.total(); - CV_Assert(kernel.run(1, &gSize, NULL, false)); - } + int64 getFLOPSPerElement() const { return 2; } +}; - return true; - } +template<> +const char* const ELUFunctor::BaseDefaultFunctor::ocl_kernel_name = "ELUForward"; + +struct AbsValFunctor : public BaseDefaultFunctor +{ + typedef AbsLayer Layer; + + bool supportBackend(int backendId, int) + { +#ifdef HAVE_INF_ENGINE + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + return !INF_ENGINE_VER_MAJOR_EQ(INF_ENGINE_RELEASE_2019R1); #endif + return backendId == DNN_BACKEND_OPENCV || + backendId == DNN_BACKEND_CUDA || + backendId == DNN_BACKEND_HALIDE; + } + + inline float calculate(float x) const + { + return abs(x); + } #ifdef HAVE_CUDA Ptr initCUDA(int target, csl::Stream stream) { - return make_cuda_node(target, stream); + return make_cuda_node(target, stream); } #endif @@ -1125,114 +986,125 @@ struct ELUFunctor : public BaseFunctor void attachHalide(const Halide::Expr& input, Halide::Func& top) { Halide::Var x("x"), y("y"), c("c"), n("n"); - top(x, y, c, n) = select(input >= 0.0f, input, exp(input) - 1); + top(x, y, c, n) = abs(input); } #endif // HAVE_HALIDE #ifdef HAVE_DNN_IE_NN_BUILDER_2019 InferenceEngine::Builder::Layer initInfEngineBuilderAPI() { - return InferenceEngine::Builder::ELULayer(""); + return InferenceEngine::Builder::ReLULayer("").setNegativeSlope(-0.999999f); } #endif // HAVE_DNN_IE_NN_BUILDER_2019 #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) { - return std::make_shared(node, 1.0); + float coeff = -0.999999f; + // float coeff = preferableTarget == DNN_TARGET_MYRIAD ? -0.999f : -0.999999f; + auto slope = std::make_shared(ngraph::element::f32, ngraph::Shape{1}, &coeff); + return std::make_shared(node, slope); } #endif // HAVE_DNN_NGRAPH -#ifdef HAVE_VULKAN - std::shared_ptr initVkCom() + int64 getFLOPSPerElement() const { return 1; } +}; + +template<> +const char* const AbsValFunctor::BaseDefaultFunctor::ocl_kernel_name = "AbsValForward"; + +struct BNLLFunctor : public BaseDefaultFunctor +{ + typedef BNLLLayer Layer; + + bool supportBackend(int backendId, int) { - // TODO: add vkcom implementation - return std::shared_ptr(); + return backendId == DNN_BACKEND_OPENCV || + backendId == DNN_BACKEND_CUDA || + backendId == DNN_BACKEND_HALIDE; } -#endif // HAVE_VULKAN - bool tryQuantize(const std::vector > &scales, - const std::vector > &zeropoints, LayerParams& params) + inline float calculate(float x) const { - float inpScale = scales[0][0], outScale = scales[1][0]; - int inpZp = zeropoints[0][0], outZp = zeropoints[1][0]; + // https://github.com/BVLC/caffe/blame/1.0/src/caffe/layers/bnll_layer.cpp#L17 + return x > 0 ? x + log(1.f + exp(-x)) : log(1.f + exp(x)); + } - Mat lookUpTable(1, 256, CV_8S); - int8_t* table = lookUpTable.ptr(); - for (int i = -128; i < 128; i++) - { - float x = inpScale*(i - inpZp); - float y = x >= 0.f ? x : exp(x) - 1; - int quantized = outZp + (int)std::round(y/outScale); - table[i+128] = saturate_cast(quantized); - } - params.blobs.clear(); - params.blobs.push_back(lookUpTable); - return true; +#ifdef HAVE_CUDA + Ptr initCUDA(int target, csl::Stream stream) + { + return make_cuda_node(target, stream); } +#endif - int64 getFLOPSPerElement() const { return 2; } +#ifdef HAVE_HALIDE + void attachHalide(const Halide::Expr& input, Halide::Func& top) + { + Halide::Var x("x"), y("y"), c("c"), n("n"); + // https://github.com/BVLC/caffe/blame/1.0/src/caffe/layers/bnll_layer.cpp#L17 + top(x, y, c, n) = max(input, 0) + log(1.0f + exp(-abs(input))); + } +#endif // HAVE_HALIDE + + int64 getFLOPSPerElement() const { return 5; } }; -struct AbsValFunctor : public BaseFunctor +template<> +const char* const BNLLFunctor::BaseDefaultFunctor::ocl_kernel_name = "BNLLForward"; + +struct CeilFunctor : public BaseDefaultFunctor { - typedef AbsLayer Layer; + typedef CeilLayer Layer; bool supportBackend(int backendId, int) { -#ifdef HAVE_INF_ENGINE - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - return !INF_ENGINE_VER_MAJOR_EQ(INF_ENGINE_RELEASE_2019R1); -#endif - return backendId == DNN_BACKEND_OPENCV || - backendId == DNN_BACKEND_CUDA || - backendId == DNN_BACKEND_HALIDE; + return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE; } - void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const + inline float calculate(float x) const { - for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize ) - { - for( int i = 0; i < len; i++ ) - { - float x = srcptr[i]; - dstptr[i] = abs(x); - } - } + return ceil(x); } -#ifdef HAVE_OPENCL - bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) +#ifdef HAVE_CUDA + Ptr initCUDA(int target, csl::Stream stream) { - std::vector inputs; - std::vector outputs; + return make_cuda_node(target, stream); + } +#endif + +#ifdef HAVE_HALIDE + void attachHalide(const Halide::Expr& input, Halide::Func& top) + { + Halide::Var x("x"), y("y"), c("c"), n("n"); + top(x, y, c, n) = ceil(input); + } +#endif // HAVE_HALIDE - inps.getUMatVector(inputs); - outs.getUMatVector(outputs); - String buildopt = oclGetTMacro(inputs[0]); + int64 getFLOPSPerElement() const { return 1; } +}; - for (size_t i = 0; i < inputs.size(); i++) - { - UMat& src = inputs[i]; - UMat& dst = outputs[i]; +template<> +const char* const BaseDefaultFunctor::ocl_kernel_name = "CeilForward"; - ocl::Kernel kernel("AbsValForward", ocl::dnn::activations_oclsrc, buildopt); - kernel.set(0, (int)src.total()); - kernel.set(1, ocl::KernelArg::PtrReadOnly(src)); - kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst)); +struct FloorFunctor : public BaseDefaultFunctor +{ + typedef FloorLayer Layer; - size_t gSize = src.total(); - CV_Assert(kernel.run(1, &gSize, NULL, false)); - } + bool supportBackend(int backendId, int) + { + return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE; + } - return true; + inline float calculate(float x) const + { + return floor(x); } -#endif #ifdef HAVE_CUDA Ptr initCUDA(int target, csl::Stream stream) { - return make_cuda_node(target, stream); + return make_cuda_node(target, stream); } #endif @@ -1240,114 +1112,109 @@ struct AbsValFunctor : public BaseFunctor void attachHalide(const Halide::Expr& input, Halide::Func& top) { Halide::Var x("x"), y("y"), c("c"), n("n"); - top(x, y, c, n) = abs(input); + top(x, y, c, n) = floor(input); } #endif // HAVE_HALIDE -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - InferenceEngine::Builder::Layer initInfEngineBuilderAPI() + int64 getFLOPSPerElement() const { return 1; } +}; + +template<> +const char* const BaseDefaultFunctor::ocl_kernel_name = "FloorForward"; + +struct LogFunctor : public BaseDefaultFunctor +{ + typedef LogLayer Layer; + + bool supportBackend(int backendId, int) { - return InferenceEngine::Builder::ReLULayer("").setNegativeSlope(-0.999999f); + return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE; } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 -#ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + inline float calculate(float x) const { - float coeff = -0.999999f; - // float coeff = preferableTarget == DNN_TARGET_MYRIAD ? -0.999f : -0.999999f; - auto slope = std::make_shared(ngraph::element::f32, ngraph::Shape{1}, &coeff); - return std::make_shared(node, slope); + return log(x); } -#endif // HAVE_DNN_NGRAPH -#ifdef HAVE_VULKAN - std::shared_ptr initVkCom() +#ifdef HAVE_CUDA + Ptr initCUDA(int target, csl::Stream stream) { - // TODO: add vkcom implementation - return std::shared_ptr(); + return make_cuda_node(target, stream); } -#endif // HAVE_VULKAN +#endif - bool tryQuantize(const std::vector > &scales, - const std::vector > &zeropoints, LayerParams& params) +#ifdef HAVE_HALIDE + void attachHalide(const Halide::Expr& input, Halide::Func& top) { - float inpScale = scales[0][0], outScale = scales[1][0]; - int inpZp = zeropoints[0][0], outZp = zeropoints[1][0]; - - Mat lookUpTable(1, 256, CV_8S); - int8_t* table = lookUpTable.ptr(); - for (int i = -128; i < 128; i++) - { - float x = inpScale*(i - inpZp); - float y = abs(x); - int quantized = outZp + (int)std::round(y/outScale); - table[i+128] = saturate_cast(quantized); - } - params.blobs.clear(); - params.blobs.push_back(lookUpTable); - return true; + Halide::Var x("x"), y("y"), c("c"), n("n"); + top(x, y, c, n) = log(input); } +#endif // HAVE_HALIDE int64 getFLOPSPerElement() const { return 1; } }; -struct BNLLFunctor : public BaseFunctor +template<> +const char* const BaseDefaultFunctor::ocl_kernel_name = "LogForward"; + +struct RoundFunctor : public BaseDefaultFunctor { - typedef BNLLLayer Layer; + typedef RoundLayer Layer; bool supportBackend(int backendId, int) { - return backendId == DNN_BACKEND_OPENCV || - backendId == DNN_BACKEND_CUDA || - backendId == DNN_BACKEND_HALIDE; + return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE; } - void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const + inline float calculate(float x) const { - for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize ) - { - for( int i = 0; i < len; i++ ) - { - float x = srcptr[i]; - // https://github.com/BVLC/caffe/blame/1.0/src/caffe/layers/bnll_layer.cpp#L17 - dstptr[i] = x > 0 ? x + log(1. + exp(-x)) : log(1. + exp(x)); - } - } + // Rounds to even numbers in halfway cases, so 2.5 -> 2, -2.5 -> -2 + int old_rounding_direction = std::fegetround(); + std::fesetround(FE_TONEAREST); + float y = std::nearbyint(x); + std::fesetround(old_rounding_direction); + return y; } -#ifdef HAVE_OPENCL - bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) +#ifdef HAVE_CUDA + Ptr initCUDA(int target, csl::Stream stream) { - std::vector inputs; - std::vector outputs; + return make_cuda_node(target, stream); + } +#endif - inps.getUMatVector(inputs); - outs.getUMatVector(outputs); - String buildopt = oclGetTMacro(inputs[0]); +#ifdef HAVE_HALIDE + void attachHalide(const Halide::Expr& input, Halide::Func& top) + { + Halide::Var x("x"), y("y"), c("c"), n("n"); + top(x, y, c, n) = round(input); + } +#endif // HAVE_HALIDE - for (size_t i = 0; i < inputs.size(); i++) - { - UMat& src = inputs[i]; - UMat& dst = outputs[i]; + int64 getFLOPSPerElement() const { return 2; } +}; - ocl::Kernel kernel("BNLLForward", ocl::dnn::activations_oclsrc, buildopt); - kernel.set(0, (int)src.total()); - kernel.set(1, ocl::KernelArg::PtrReadOnly(src)); - kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst)); +template<> +const char* const BaseDefaultFunctor::ocl_kernel_name = "RoundForward"; - size_t gSize = src.total(); - CV_Assert(kernel.run(1, &gSize, NULL, false)); - } +struct SqrtFunctor : public BaseDefaultFunctor +{ + typedef SqrtLayer Layer; - return true; + bool supportBackend(int backendId, int) + { + return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE; + } + + inline float calculate(float x) const + { + return sqrt(x); } -#endif #ifdef HAVE_CUDA Ptr initCUDA(int target, csl::Stream stream) { - return make_cuda_node(target, stream); + return make_cuda_node(target, stream); } #endif @@ -1355,56 +1222,58 @@ struct BNLLFunctor : public BaseFunctor void attachHalide(const Halide::Expr& input, Halide::Func& top) { Halide::Var x("x"), y("y"), c("c"), n("n"); - // https://github.com/BVLC/caffe/blame/1.0/src/caffe/layers/bnll_layer.cpp#L17 - top(x, y, c, n) = max(input, 0) + log(1.0f + exp(-abs(input))); + top(x, y, c, n) = sqrt(input); } #endif // HAVE_HALIDE -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - InferenceEngine::Builder::Layer initInfEngineBuilderAPI() - { - CV_Error(Error::StsNotImplemented, ""); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) { - CV_Error(Error::StsNotImplemented, ""); + return std::make_shared(node); } #endif // HAVE_DNN_NGRAPH -#ifdef HAVE_VULKAN - std::shared_ptr initVkCom() + int64 getFLOPSPerElement() const { return 1; } +}; + +template<> +const char* const BaseDefaultFunctor::ocl_kernel_name = "SqrtForward"; + +struct NotFunctor : public BaseDefaultFunctor +{ + typedef NotLayer Layer; + + bool supportBackend(int backendId, int) { - // TODO: add vkcom implementation - return std::shared_ptr(); + return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE; } -#endif // HAVE_VULKAN - bool tryQuantize(const std::vector > &scales, - const std::vector > &zeropoints, LayerParams& params) + inline float calculate(float x) const { - float inpScale = scales[0][0], outScale = scales[1][0]; - int inpZp = zeropoints[0][0], outZp = zeropoints[1][0]; + return floor(1.f - x); + } - Mat lookUpTable(1, 256, CV_8S); - int8_t* table = lookUpTable.ptr(); - for (int i = -128; i < 128; i++) - { - float x = inpScale*(i - inpZp); - float y = x > 0 ? x + log(1. + exp(-x)) : log(1. + exp(x)); - int quantized = outZp + (int)std::round(y/outScale); - table[i+128] = saturate_cast(quantized); - } - params.blobs.clear(); - params.blobs.push_back(lookUpTable); - return true; +#ifdef HAVE_CUDA + Ptr initCUDA(int target, csl::Stream stream) + { + return make_cuda_node(target, stream); } +#endif - int64 getFLOPSPerElement() const { return 5; } +#ifdef HAVE_HALIDE + void attachHalide(const Halide::Expr& input, Halide::Func& top) + { + Halide::Var x("x"), y("y"), c("c"), n("n"); + top(x, y, c, n) = floor(1.0f - input); + } +#endif // HAVE_HALIDE + + int64 getFLOPSPerElement() const { return 2; } }; +template<> +const char* const BaseDefaultFunctor::ocl_kernel_name = "NotForward"; + struct PowerFunctor : public BaseFunctor { typedef PowerLayer Layer; @@ -1583,7 +1452,7 @@ struct PowerFunctor : public BaseFunctor int64 getFLOPSPerElement() const { return power == 1 ? 2 : 10; } }; -struct ExpFunctor : public BaseFunctor +struct ExpFunctor : public BaseDefaultFunctor { typedef ExpLayer Layer; float base, scale, shift; @@ -1609,47 +1478,16 @@ struct ExpFunctor : public BaseFunctor backendId == DNN_BACKEND_HALIDE || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; } - void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const + inline float calculate(float x) const { - float a = normScale, b = normShift; - for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize ) - { - for( int i = 0; i < len; i++ ) - { - float x = srcptr[i]; - dstptr[i] = exp(a*x + b); - } - } + return exp(normScale * x + normShift); } -#ifdef HAVE_OPENCL - bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) + inline void setKernelParams(ocl::Kernel& kernel) const { - std::vector inputs; - std::vector outputs; - - inps.getUMatVector(inputs); - outs.getUMatVector(outputs); - String buildopt = oclGetTMacro(inputs[0]); - - for (size_t i = 0; i < inputs.size(); i++) - { - UMat& src = inputs[i]; - UMat& dst = outputs[i]; - - ocl::Kernel kernel("ExpForward", ocl::dnn::activations_oclsrc, buildopt); - kernel.set(0, (int)src.total()); - kernel.set(1, ocl::KernelArg::PtrReadOnly(src)); - kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst)); - kernel.set(3, (float)normScale); - kernel.set(4, (float)normShift); - - size_t gSize = src.total(); - CV_Assert(kernel.run(1, &gSize, NULL, false)); - } - return true; + kernel.set(3, normScale); + kernel.set(4, normShift); } -#endif #ifdef HAVE_CUDA Ptr initCUDA(int target, csl::Stream stream) @@ -1666,13 +1504,6 @@ struct ExpFunctor : public BaseFunctor } #endif // HAVE_HALIDE -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - InferenceEngine::Builder::Layer initInfEngineBuilderAPI() - { - CV_Error(Error::StsNotImplemented, ""); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) { @@ -1686,17 +1517,12 @@ struct ExpFunctor : public BaseFunctor } #endif // HAVE_DNN_NGRAPH -#ifdef HAVE_VULKAN - std::shared_ptr initVkCom() - { - // TODO: add vkcom implementation - return std::shared_ptr(); - } -#endif // HAVE_VULKAN - int64 getFLOPSPerElement() const { return 3; } }; +template<> +const char* const ExpFunctor::BaseDefaultFunctor::ocl_kernel_name = "ExpForward"; + struct ChannelsPReLUFunctor : public BaseFunctor { typedef ChannelsPReLULayer Layer; @@ -1917,6 +1743,55 @@ Ptr BNLLLayer::create(const LayerParams& params) return l; } + +Ptr CeilLayer::create(const LayerParams& params) +{ + Ptr l(new ElementWiseLayer()); + l->setParamsFrom(params); + + return l; +} + +Ptr FloorLayer::create(const LayerParams& params) +{ + Ptr l(new ElementWiseLayer()); + l->setParamsFrom(params); + + return l; +} + +Ptr LogLayer::create(const LayerParams& params) +{ + Ptr l(new ElementWiseLayer()); + l->setParamsFrom(params); + + return l; +} + +Ptr RoundLayer::create(const LayerParams& params) +{ + Ptr l(new ElementWiseLayer()); + l->setParamsFrom(params); + + return l; +} + +Ptr SqrtLayer::create(const LayerParams& params) +{ + Ptr l(new ElementWiseLayer()); + l->setParamsFrom(params); + + return l; +} + +Ptr NotLayer::create(const LayerParams& params) +{ + Ptr l(new ElementWiseLayer()); + l->setParamsFrom(params); + + return l; +} + Ptr PowerLayer::create(const LayerParams& params) { float power = params.get("power", 1.0f); diff --git a/modules/dnn/src/layers/scale_layer.cpp b/modules/dnn/src/layers/scale_layer.cpp index 001db24a2d..003f78dc1d 100644 --- a/modules/dnn/src/layers/scale_layer.cpp +++ b/modules/dnn/src/layers/scale_layer.cpp @@ -38,6 +38,7 @@ public: hasBias = params.get("bias_term", false); axis = params.get("axis", 1); hasWeights = false; + mode = params.get("mode", "scale"); } bool getMemoryShapes(const std::vector &inputs, @@ -59,6 +60,10 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { + if (mode != "scale") + { + return backendId == DNN_BACKEND_OPENCV; + } return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || backendId == DNN_BACKEND_HALIDE || @@ -66,6 +71,20 @@ public: (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && axis > 0); } + template + void handleCompare(const Mat& a, const T& b, Mat& dst, const int spatialSize) + { + Mat out(1, spatialSize, CV_8U); + if (mode == "equal") + compare(a, b, out, CMP_EQ); + else if (mode == "greater") + compare(a, b, out, CMP_GT); + else + compare(a, b, out, CMP_LT); + + out.convertTo(dst, CV_32F, 1. / 255.); + } + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE { CV_TRACE_FUNCTION(); @@ -123,7 +142,16 @@ public: float b = biasesData ? biasesData[j] : 0; Mat inpSlice(1, spatialSize, CV_32F, inpData); Mat outSlice(1, spatialSize, CV_32F, outData); - inpSlice.convertTo(outSlice, CV_32F, w, b); + + if (mode == "scale") + { + inpSlice.convertTo(outSlice, CV_32F, w, b); + } + else + { + handleCompare(inpSlice, b, outSlice, spatialSize); + } + inpData += spatialSize; outData += spatialSize; } @@ -142,7 +170,16 @@ public: add(outSlice, bias, outSlice); } else if (hasBias) - add(inpSlice, bias, outSlice); + { + if (mode == "scale") + { + add(inpSlice, bias, outSlice); + } + else + { + handleCompare(inpSlice, bias, outSlice, numWeights); + } + } inpData += numWeights; outData += numWeights; } @@ -385,6 +422,18 @@ Ptr ShiftLayer::create(const LayerParams& params) return Ptr(new ScaleLayerImpl(scaleParams)); } +Ptr CompareLayer::create(const LayerParams& params) +{ + LayerParams compareParams; + compareParams.name = params.name; + compareParams.type = "Scale"; + compareParams.blobs = params.blobs; + compareParams.set("bias_term", true); + compareParams.set("axis", 0); + compareParams.set("mode", params.get("mode")); + return Ptr(new ScaleLayerImpl(compareParams)); +} + class DataAugmentationLayerImpl CV_FINAL : public DataAugmentationLayer { public: diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 4a2153d5a7..067447f071 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -118,6 +118,8 @@ private: void parseRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseElu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseTanh (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseAbs (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseCompare (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parsePRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseLRN (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseInstanceNormalization(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); @@ -1410,6 +1412,38 @@ void ONNXImporter::parseTanh(LayerParams& layerParams, const opencv_onnx::NodePr addLayer(layerParams, node_proto); } +void ONNXImporter::parseAbs(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "AbsVal"; + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseCompare(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_Assert(node_proto.input_size() == 2); + const std::string& layer_type = node_proto.op_type(); + + bool is_const_0 = layer_id.find(node_proto.input(0)) == layer_id.end(); + bool is_const_1 = layer_id.find(node_proto.input(1)) == layer_id.end(); + + if (is_const_0 || is_const_1) + { + Mat blob = getBlob(node_proto, static_cast(is_const_1)); + blob = blob.reshape(1, 1); + layerParams.blobs.push_back(blob); + } + + layerParams.type = "Compare"; + + if (layer_type == "Equal") + layerParams.set("mode", "equal"); + else if (layer_type == "Greater") + layerParams.set("mode", "greater"); + else + layerParams.set("mode", "less"); + addLayer(layerParams, node_proto); +} + void ONNXImporter::parsePRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { layerParams.type = "PReLU"; @@ -2939,6 +2973,8 @@ const ONNXImporter::DispatchMap ONNXImporter::buildDispatchMap() dispatch["Relu"] = &ONNXImporter::parseRelu; dispatch["Elu"] = &ONNXImporter::parseElu; dispatch["Tanh"] = &ONNXImporter::parseTanh; + dispatch["Abs"] = &ONNXImporter::parseAbs; + dispatch["Equal"] = dispatch["Greater"] = dispatch["Less"] = &ONNXImporter::parseCompare; dispatch["PRelu"] = &ONNXImporter::parsePRelu; dispatch["LRN"] = &ONNXImporter::parseLRN; dispatch["InstanceNormalization"] = &ONNXImporter::parseInstanceNormalization; diff --git a/modules/dnn/src/opencl/activations.cl b/modules/dnn/src/opencl/activations.cl index 68f0dd7268..bc2a105aba 100644 --- a/modules/dnn/src/opencl/activations.cl +++ b/modules/dnn/src/opencl/activations.cl @@ -151,3 +151,39 @@ __kernel void ExpForward(const int n, __global const T* in, __global T* out, out[index] = exp(normShift + normScale * in[index]); } } + +__kernel void CeilForward(const int n, __global T* in, __global T* out) { + int index = get_global_id(0); + if(index < n) + out[index] = ceil(in[index]); +} + +__kernel void FloorForward(const int n, __global T* in, __global T* out) { + int index = get_global_id(0); + if(index < n) + out[index] = floor(in[index]); +} + +__kernel void LogForward(const int n, __global T* in, __global T* out) { + int index = get_global_id(0); + if(index < n) + out[index] = log(in[index]); +} + +__kernel void RoundForward(const int n, __global T* in, __global T* out) { + int index = get_global_id(0); + if(index < n) + out[index] = rint(in[index]); +} + +__kernel void SqrtForward(const int n, __global T* in, __global T* out) { + int index = get_global_id(0); + if(index < n) + out[index] = sqrt(in[index]); +} + +__kernel void NotForward(const int n, __global T* in, __global T* out) { + int index = get_global_id(0); + if(index < n) + out[index] = floor(1.0f - in[index]); +} diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 5909f2ce6e..56a6ab6ea1 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -353,6 +353,82 @@ TEST_P(Test_ONNX_layers, Exp) testONNXModels("exp"); } +TEST_P(Test_ONNX_layers, Elementwise_Ceil) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + testONNXModels("ceil"); +} + +TEST_P(Test_ONNX_layers, Elementwise_Floor) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + testONNXModels("floor"); +} + +TEST_P(Test_ONNX_layers, Elementwise_Log) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + testONNXModels("log"); +} + +TEST_P(Test_ONNX_layers, Elementwise_Round) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + testONNXModels("round"); +} + +TEST_P(Test_ONNX_layers, Elementwise_Sqrt) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + testONNXModels("sqrt"); +} + +TEST_P(Test_ONNX_layers, Elementwise_not) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + testONNXModels("not"); +} + +TEST_P(Test_ONNX_layers, Compare) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + testONNXModels("equal"); + testONNXModels("greater"); + testONNXModels("less"); +} + +TEST_P(Test_ONNX_layers, CompareSameDims) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + testONNXModels("equal_same_dims", npy, 0, 0, false, true, 2); + testONNXModels("greater_same_dims", npy, 0, 0, false, true, 2); + testONNXModels("less_same_dims", npy, 0, 0, false, true, 2); +} + TEST_P(Test_ONNX_layers, Concatenation) { if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)