Merge pull request #21161 from rogday:elu_alpha_4x

3 years ago · 6e50e4b9ee
parent 1599f9f0c0 4995aecd62
commit 6e50e4b9ee
5 changed files with 17 additions and 12 deletions
--- a/modules/dnn/src/cuda/activations.cu
+++ b/modules/dnn/src/cuda/activations.cu
@ -119,8 +119,8 @@ void sigmoid(const Stream& stream, Span<T> output, View<T> input) {
 }

 template <class T>
-void elu(const Stream& stream, Span<T> output, View<T> input) {
-    generic_op<T, ELUFunctor<T>>(stream, output, input);
+void elu(const Stream& stream, Span<T> output, View<T> input, T alpha) {
+    generic_op<T, ELUFunctor<T>>(stream, output, input, {alpha});
 }

 template <class T>
@ -187,7 +187,7 @@ template void tanh<__half>(const Stream&, Span<__half>, View<__half>);
 template void swish<__half>(const Stream&, Span<__half>, View<__half>);
 template void mish<__half>(const Stream&, Span<__half>, View<__half>);
 template void sigmoid<__half>(const Stream&, Span<__half>, View<__half>);
-template void elu<__half>(const Stream&, Span<__half>, View<__half>);
+template void elu<__half>(const Stream&, Span<__half>, View<__half>, __half);
 template void abs<__half>(const Stream& stream, Span<__half> output, View<__half> input);
 template void bnll<__half>(const Stream&, Span<__half>, View<__half>);
 template void ceil<__half>(const Stream&, Span<__half>, View<__half>);
@ -207,7 +207,7 @@ template void tanh<float>(const Stream&, Span<float>, View<float>);
 template void swish<float>(const Stream&, Span<float>, View<float>);
 template void mish<float>(const Stream&, Span<float>, View<float>);
 template void sigmoid<float>(const Stream&, Span<float>, View<float>);
-template void elu<float>(const Stream&, Span<float>, View<float>);
+template void elu<float>(const Stream&, Span<float>, View<float>, float);
 template void abs<float>(const Stream& stream, Span<float> output, View<float> input);
 template void bnll<float>(const Stream&, Span<float>, View<float>);
 template void ceil<float>(const Stream&, Span<float>, View<float>);
--- a/modules/dnn/src/cuda/functors.hpp
+++ b/modules/dnn/src/cuda/functors.hpp
@ -169,16 +169,20 @@ struct SigmoidFunctor {
 template <class T>
 struct ELUFunctor {
    struct Params {
-        CUDA4DNN_HOST_DEVICE Params() { }
+        CUDA4DNN_HOST_DEVICE Params() : alpha(1) { }
+        CUDA4DNN_HOST_DEVICE Params(T alpha_) : alpha(alpha_) { }
+        T alpha;
    };

-    CUDA4DNN_DEVICE ELUFunctor() { }
-    CUDA4DNN_DEVICE ELUFunctor(const Params& params) { }
+    CUDA4DNN_DEVICE ELUFunctor() : ELUFunctor(Params{}) { }
+    CUDA4DNN_DEVICE ELUFunctor(const Params& params) : alpha{params.alpha} { }

    CUDA4DNN_DEVICE T operator()(T value) {
        using csl::device::expm1;
-        return value >= T(0) ? value : expm1(value);
+        return value >= T(0) ? value : alpha * expm1(value);
    }
+
+    T alpha;
 };

 template <class T>
--- a/modules/dnn/src/cuda4dnn/kernels/activations.hpp
+++ b/modules/dnn/src/cuda4dnn/kernels/activations.hpp
@ -34,7 +34,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
    void sigmoid(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);

    template <class T>
-    void elu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
+    void elu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T alpha);

    template <class T>
    void abs(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
--- a/modules/dnn/src/cuda4dnn/primitives/activation.hpp
+++ b/modules/dnn/src/cuda4dnn/primitives/activation.hpp
@ -156,15 +156,16 @@ namespace cv { namespace dnn { namespace cuda4dnn {
    template <class T>
    class ELUOp final : public BaseOp<ELUOp, T> {
    public:
-        ELUOp(csl::Stream stream_) : stream(std::move(stream_)) { }
+        ELUOp(csl::Stream stream_, T alpha_) : stream(std::move(stream_)), alpha(alpha_) { }

        void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
        {
-            kernels::elu<T>(stream, output, input);
+            kernels::elu<T>(stream, output, input, alpha);
        }

    private:
        csl::Stream stream;
+        T alpha;
    };

    template <class T>
--- a/modules/dnn/src/layers/elementwise_layers.cpp
+++ b/modules/dnn/src/layers/elementwise_layers.cpp
@ -1012,7 +1012,7 @@ struct ELUFunctor : public BaseDefaultFunctor<ELUFunctor>
 #ifdef HAVE_CUDA
    Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
    {
-        return make_cuda_node<cuda4dnn::ELUOp>(target, stream);
+        return make_cuda_node<cuda4dnn::ELUOp>(target, stream, alpha);
    }
 #endif