used new device layer in math per element operations

12 years ago · 766d950ff3
parent 7628e57fc6
commit 766d950ff3
2 changed files with 195 additions and 402 deletions
--- a/modules/cudaarithm/src/cuda/math.cu
+++ b/modules/cudaarithm/src/cuda/math.cu
@ -40,196 +40,248 @@
 //
 //M*/

-#if !defined CUDA_DISABLER
+#include "opencv2/opencv_modules.hpp"

-#include "opencv2/core/cuda/common.hpp"
-#include "opencv2/core/cuda/functional.hpp"
-#include "opencv2/core/cuda/transform.hpp"
-#include "opencv2/core/cuda/saturate_cast.hpp"
-#include "opencv2/core/cuda/simd_functions.hpp"
-#include "opencv2/core/cuda/limits.hpp"
-#include "opencv2/core/cuda/type_traits.hpp"
+#ifndef HAVE_OPENCV_CUDEV

-#include "arithm_func_traits.hpp"
+#error "opencv_cudev is required"

-using namespace cv::cuda;
-using namespace cv::cuda::device;
+#else

-//////////////////////////////////////////////////////////////////////////
-// absMat
+#include "opencv2/cudaarithm.hpp"
+#include "opencv2/cudev.hpp"

-namespace cv { namespace cuda { namespace device
+using namespace cv::cudev;
+
+namespace
 {
-    template <typename T> struct TransformFunctorTraits< abs_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    template <typename ScalarDepth> struct TransformPolicy : DefaultTransformPolicy
+    {
+    };
+    template <> struct TransformPolicy<double> : DefaultTransformPolicy
    {
+        enum {
+            shift = 1
+        };
    };
-}}}
+}
+
+//////////////////////////////////////////////////////////////////////////////
+/// abs

-namespace arithm
+namespace
 {
    template <typename T>
-    void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
+    void absMat(const GpuMat& src, const GpuMat& dst, Stream& stream)
    {
-        device::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, abs_func<T>(), WithOutMask(), stream);
+        gridTransformUnary_< TransformPolicy<T> >(globPtr<T>(src), globPtr<T>(dst), abs_func<T>(), stream);
    }
+}
+
+void cv::cuda::abs(InputArray _src, OutputArray _dst, Stream& stream)
+{
+    typedef void (*func_t)(const GpuMat& src, const GpuMat& dst, Stream& stream);
+    static const func_t funcs[] =
+    {
+        absMat<uchar>,
+        absMat<schar>,
+        absMat<ushort>,
+        absMat<short>,
+        absMat<int>,
+        absMat<float>,
+        absMat<double>
+    };

-    template void absMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void absMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void absMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void absMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void absMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void absMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void absMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    GpuMat src = _src.getGpuMat();
+
+    const int depth = src.depth();
+
+    CV_DbgAssert( depth <= CV_64F );
+
+    _dst.create(src.size(), src.type());
+    GpuMat dst = _dst.getGpuMat();
+
+    funcs[depth](src.reshape(1), dst.reshape(1), stream);
 }

-//////////////////////////////////////////////////////////////////////////
-// sqrMat
+//////////////////////////////////////////////////////////////////////////////
+/// sqr

-namespace arithm
+namespace
 {
-    template <typename T> struct Sqr : unary_function<T, T>
+    template <typename T> struct SqrOp : unary_function<T, T>
    {
        __device__ __forceinline__ T operator ()(T x) const
        {
            return saturate_cast<T>(x * x);
        }
-
-        __host__ __device__ __forceinline__ Sqr() {}
-        __host__ __device__ __forceinline__ Sqr(const Sqr&) {}
-    };
-}
-
-namespace cv { namespace cuda { namespace device
-{
-    template <typename T> struct TransformFunctorTraits< arithm::Sqr<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
    };
-}}}

-namespace arithm
-{
    template <typename T>
-    void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
+    void sqrMat(const GpuMat& src, const GpuMat& dst, Stream& stream)
    {
-        device::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, Sqr<T>(), WithOutMask(), stream);
+        gridTransformUnary_< TransformPolicy<T> >(globPtr<T>(src), globPtr<T>(dst), SqrOp<T>(), stream);
    }
-
-    template void sqrMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void sqrMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void sqrMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void sqrMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void sqrMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void sqrMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void sqrMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 }

-//////////////////////////////////////////////////////////////////////////
-// sqrtMat
-
-namespace cv { namespace cuda { namespace device
+void cv::cuda::sqr(InputArray _src, OutputArray _dst, Stream& stream)
 {
-    template <typename T> struct TransformFunctorTraits< sqrt_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    typedef void (*func_t)(const GpuMat& src, const GpuMat& dst, Stream& stream);
+    static const func_t funcs[] =
    {
+        sqrMat<uchar>,
+        sqrMat<schar>,
+        sqrMat<ushort>,
+        sqrMat<short>,
+        sqrMat<int>,
+        sqrMat<float>,
+        sqrMat<double>
    };
-}}}

-namespace arithm
+    GpuMat src = _src.getGpuMat();
+
+    const int depth = src.depth();
+
+    CV_DbgAssert( depth <= CV_64F );
+
+    _dst.create(src.size(), src.type());
+    GpuMat dst = _dst.getGpuMat();
+
+    funcs[depth](src.reshape(1), dst.reshape(1), stream);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+/// sqrt
+
+namespace
 {
    template <typename T>
-    void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
+    void sqrtMat(const GpuMat& src, const GpuMat& dst, Stream& stream)
    {
-        device::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, sqrt_func<T>(), WithOutMask(), stream);
+        gridTransformUnary_< TransformPolicy<T> >(globPtr<T>(src), globPtr<T>(dst), sqrt_func<T>(), stream);
    }
-
-    template void sqrtMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void sqrtMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void sqrtMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void sqrtMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void sqrtMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void sqrtMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void sqrtMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 }

-//////////////////////////////////////////////////////////////////////////
-// logMat
-
-namespace cv { namespace cuda { namespace device
+void cv::cuda::sqrt(InputArray _src, OutputArray _dst, Stream& stream)
 {
-    template <typename T> struct TransformFunctorTraits< log_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    typedef void (*func_t)(const GpuMat& src, const GpuMat& dst, Stream& stream);
+    static const func_t funcs[] =
    {
+        sqrtMat<uchar>,
+        sqrtMat<schar>,
+        sqrtMat<ushort>,
+        sqrtMat<short>,
+        sqrtMat<int>,
+        sqrtMat<float>,
+        sqrtMat<double>
    };
-}}}

-namespace arithm
-{
-    template <typename T>
-    void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
-    {
-        device::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, log_func<T>(), WithOutMask(), stream);
-    }
+    GpuMat src = _src.getGpuMat();
+
+    const int depth = src.depth();
+
+    CV_DbgAssert( depth <= CV_64F );

-    template void logMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void logMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void logMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void logMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void logMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void logMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void logMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    _dst.create(src.size(), src.type());
+    GpuMat dst = _dst.getGpuMat();
+
+    funcs[depth](src.reshape(1), dst.reshape(1), stream);
 }

-//////////////////////////////////////////////////////////////////////////
-// expMat
+////////////////////////////////////////////////////////////////////////
+/// exp

-namespace arithm
+namespace
 {
-    template <typename T> struct Exp : unary_function<T, T>
+    template <typename T> struct ExpOp : unary_function<T, T>
    {
        __device__ __forceinline__ T operator ()(T x) const
        {
            exp_func<T> f;
            return saturate_cast<T>(f(x));
        }
-
-        __host__ __device__ __forceinline__ Exp() {}
-        __host__ __device__ __forceinline__ Exp(const Exp&) {}
    };
+
+    template <typename T>
+    void expMat(const GpuMat& src, const GpuMat& dst, Stream& stream)
+    {
+        gridTransformUnary_< TransformPolicy<T> >(globPtr<T>(src), globPtr<T>(dst), ExpOp<T>(), stream);
+    }
 }

-namespace cv { namespace cuda { namespace device
+void cv::cuda::exp(InputArray _src, OutputArray _dst, Stream& stream)
 {
-    template <typename T> struct TransformFunctorTraits< arithm::Exp<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    typedef void (*func_t)(const GpuMat& src, const GpuMat& dst, Stream& stream);
+    static const func_t funcs[] =
    {
+        expMat<uchar>,
+        expMat<schar>,
+        expMat<ushort>,
+        expMat<short>,
+        expMat<int>,
+        expMat<float>,
+        expMat<double>
    };
-}}}

-namespace arithm
+    GpuMat src = _src.getGpuMat();
+
+    const int depth = src.depth();
+
+    CV_DbgAssert( depth <= CV_64F );
+
+    _dst.create(src.size(), src.type());
+    GpuMat dst = _dst.getGpuMat();
+
+    funcs[depth](src.reshape(1), dst.reshape(1), stream);
+}
+
+////////////////////////////////////////////////////////////////////////
+// log
+
+namespace
 {
    template <typename T>
-    void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
+    void logMat(const GpuMat& src, const GpuMat& dst, Stream& stream)
    {
-        device::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, Exp<T>(), WithOutMask(), stream);
+        gridTransformUnary_< TransformPolicy<T> >(globPtr<T>(src), globPtr<T>(dst), log_func<T>(), stream);
    }
+}
+
+void cv::cuda::log(InputArray _src, OutputArray _dst, Stream& stream)
+{
+    typedef void (*func_t)(const GpuMat& src, const GpuMat& dst, Stream& stream);
+    static const func_t funcs[] =
+    {
+        logMat<uchar>,
+        logMat<schar>,
+        logMat<ushort>,
+        logMat<short>,
+        logMat<int>,
+        logMat<float>,
+        logMat<double>
+    };
+
+    GpuMat src = _src.getGpuMat();

-    template void expMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void expMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void expMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void expMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void expMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void expMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void expMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    const int depth = src.depth();
+
+    CV_DbgAssert( depth <= CV_64F );
+
+    _dst.create(src.size(), src.type());
+    GpuMat dst = _dst.getGpuMat();
+
+    funcs[depth](src.reshape(1), dst.reshape(1), stream);
 }

-//////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
 // pow

-namespace arithm
+namespace
 {
    template<typename T, bool Signed = numeric_limits<T>::is_signed> struct PowOp : unary_function<T, T>
    {
        float power;

-        __host__ explicit PowOp(double power_) : power(static_cast<float>(power_)) {}
-
        __device__ __forceinline__ T operator()(T e) const
        {
            return saturate_cast<T>(__powf((float)e, power));
@ -239,8 +291,6 @@ namespace arithm
    {
        float power;

-        __host__ explicit PowOp(double power_) : power(static_cast<float>(power_)) {}
-
        __device__ __forceinline__ T operator()(T e) const
        {
            T res = saturate_cast<T>(__powf((float)e, power));
@ -255,8 +305,6 @@ namespace arithm
    {
        float power;

-        __host__ explicit PowOp(double power_) : power(static_cast<float>(power_)) {}
-
        __device__ __forceinline__ float operator()(float e) const
        {
            return __powf(::fabs(e), power);
@ -266,37 +314,46 @@ namespace arithm
    {
        double power;

-        __host__ explicit PowOp(double power_) : power(power_) {}
-
        __device__ __forceinline__ double operator()(double e) const
        {
            return ::pow(::fabs(e), power);
        }
    };
+
+    template<typename T>
+    void powMat(const GpuMat& src, double power, const GpuMat& dst, Stream& stream)
+    {
+        PowOp<T> op;
+        op.power = static_cast<typename LargerType<T, float>::type>(power);
+
+        gridTransformUnary_< TransformPolicy<T> >(globPtr<T>(src), globPtr<T>(dst), op, stream);
+    }
 }

-namespace cv { namespace cuda { namespace device
+void cv::cuda::pow(InputArray _src, double power, OutputArray _dst, Stream& stream)
 {
-    template <typename T> struct TransformFunctorTraits< arithm::PowOp<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    typedef void (*func_t)(const GpuMat& src, double power, const GpuMat& dst, Stream& stream);
+    static const func_t funcs[] =
    {
+        powMat<uchar>,
+        powMat<schar>,
+        powMat<ushort>,
+        powMat<short>,
+        powMat<int>,
+        powMat<float>,
+        powMat<double>
    };
-}}}

-namespace arithm
-{
-    template<typename T>
-    void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream)
-    {
-        device::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, PowOp<T>(power), WithOutMask(), stream);
-    }
+    GpuMat src = _src.getGpuMat();
+
+    const int depth = src.depth();
+
+    CV_DbgAssert(depth <= CV_64F);
+
+    _dst.create(src.size(), src.type());
+    GpuMat dst = _dst.getGpuMat();

-    template void pow<uchar>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
-    template void pow<schar>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
-    template void pow<short>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
-    template void pow<ushort>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
-    template void pow<int>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
-    template void pow<float>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
-    template void pow<double>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
+    funcs[depth](src.reshape(1), power, dst.reshape(1), stream);
 }

-#endif // CUDA_DISABLER
+#endif
--- a/modules/cudaarithm/src/element_operations.cpp
+++ b/modules/cudaarithm/src/element_operations.cpp
@ -451,270 +451,6 @@ void cv::cuda::absdiff(InputArray src1, InputArray src2, OutputArray dst, Stream
    arithm_op(src1, src2, dst, noArray(), 1.0, -1, stream, absDiffMat, absDiffScalar);
 }

-//////////////////////////////////////////////////////////////////////////////
-// abs
-
-namespace arithm
-{
-    template <typename T>
-    void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-}
-
-void cv::cuda::abs(InputArray _src, OutputArray _dst, Stream& stream)
-{
-    using namespace arithm;
-
-    typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    static const func_t funcs[] =
-    {
-        absMat<unsigned char>,
-        absMat<signed char>,
-        absMat<unsigned short>,
-        absMat<short>,
-        absMat<int>,
-        absMat<float>,
-        absMat<double>
-    };
-
-    GpuMat src = _src.getGpuMat();
-
-    const int depth = src.depth();
-
-    CV_Assert( depth <= CV_64F );
-    CV_Assert( src.channels() == 1 );
-
-    if (depth == CV_64F)
-    {
-        if (!deviceSupports(NATIVE_DOUBLE))
-            CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
-    }
-
-    _dst.create(src.size(), src.type());
-    GpuMat dst = _dst.getGpuMat();
-
-    funcs[depth](src, dst, StreamAccessor::getStream(stream));
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// sqr
-
-namespace arithm
-{
-    template <typename T>
-    void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-}
-
-void cv::cuda::sqr(InputArray _src, OutputArray _dst, Stream& stream)
-{
-    using namespace arithm;
-
-    typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    static const func_t funcs[] =
-    {
-        sqrMat<unsigned char>,
-        sqrMat<signed char>,
-        sqrMat<unsigned short>,
-        sqrMat<short>,
-        sqrMat<int>,
-        sqrMat<float>,
-        sqrMat<double>
-    };
-
-    GpuMat src = _src.getGpuMat();
-
-    const int depth = src.depth();
-
-    CV_Assert( depth <= CV_64F );
-    CV_Assert( src.channels() == 1 );
-
-    if (depth == CV_64F)
-    {
-        if (!deviceSupports(NATIVE_DOUBLE))
-            CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
-    }
-
-    _dst.create(src.size(), src.type());
-    GpuMat dst = _dst.getGpuMat();
-
-    funcs[depth](src, dst, StreamAccessor::getStream(stream));
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// sqrt
-
-namespace arithm
-{
-    template <typename T>
-    void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-}
-
-void cv::cuda::sqrt(InputArray _src, OutputArray _dst, Stream& stream)
-{
-    using namespace arithm;
-
-    typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    static const func_t funcs[] =
-    {
-        sqrtMat<unsigned char>,
-        sqrtMat<signed char>,
-        sqrtMat<unsigned short>,
-        sqrtMat<short>,
-        sqrtMat<int>,
-        sqrtMat<float>,
-        sqrtMat<double>
-    };
-
-    GpuMat src = _src.getGpuMat();
-
-    const int depth = src.depth();
-
-    CV_Assert( depth <= CV_64F );
-    CV_Assert( src.channels() == 1 );
-
-    if (depth == CV_64F)
-    {
-        if (!deviceSupports(NATIVE_DOUBLE))
-            CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
-    }
-
-    _dst.create(src.size(), src.type());
-    GpuMat dst = _dst.getGpuMat();
-
-    funcs[depth](src, dst, StreamAccessor::getStream(stream));
-}
-
-////////////////////////////////////////////////////////////////////////
-// exp
-
-namespace arithm
-{
-    template <typename T>
-    void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-}
-
-void cv::cuda::exp(InputArray _src, OutputArray _dst, Stream& stream)
-{
-    using namespace arithm;
-
-    typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    static const func_t funcs[] =
-    {
-        expMat<unsigned char>,
-        expMat<signed char>,
-        expMat<unsigned short>,
-        expMat<short>,
-        expMat<int>,
-        expMat<float>,
-        expMat<double>
-    };
-
-    GpuMat src = _src.getGpuMat();
-
-    const int depth = src.depth();
-
-    CV_Assert( depth <= CV_64F );
-    CV_Assert( src.channels() == 1 );
-
-    if (depth == CV_64F)
-    {
-        if (!deviceSupports(NATIVE_DOUBLE))
-            CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
-    }
-
-    _dst.create(src.size(), src.type());
-    GpuMat dst = _dst.getGpuMat();
-
-    funcs[depth](src, dst, StreamAccessor::getStream(stream));
-}
-
-////////////////////////////////////////////////////////////////////////
-// log
-
-namespace arithm
-{
-    template <typename T>
-    void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-}
-
-void cv::cuda::log(InputArray _src, OutputArray _dst, Stream& stream)
-{
-    using namespace arithm;
-
-    typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    static const func_t funcs[] =
-    {
-        logMat<unsigned char>,
-        logMat<signed char>,
-        logMat<unsigned short>,
-        logMat<short>,
-        logMat<int>,
-        logMat<float>,
-        logMat<double>
-    };
-
-    GpuMat src = _src.getGpuMat();
-
-    const int depth = src.depth();
-
-    CV_Assert( depth <= CV_64F );
-    CV_Assert( src.channels() == 1 );
-
-    if (depth == CV_64F)
-    {
-        if (!deviceSupports(NATIVE_DOUBLE))
-            CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
-    }
-
-    _dst.create(src.size(), src.type());
-    GpuMat dst = _dst.getGpuMat();
-
-    funcs[depth](src, dst, StreamAccessor::getStream(stream));
-}
-
-////////////////////////////////////////////////////////////////////////
-// pow
-
-namespace arithm
-{
-    template<typename T> void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
-}
-
-void cv::cuda::pow(InputArray _src, double power, OutputArray _dst, Stream& stream)
-{
-    typedef void (*func_t)(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
-    static const func_t funcs[] =
-    {
-        arithm::pow<unsigned char>,
-        arithm::pow<signed char>,
-        arithm::pow<unsigned short>,
-        arithm::pow<short>,
-        arithm::pow<int>,
-        arithm::pow<float>,
-        arithm::pow<double>
-    };
-
-    GpuMat src = _src.getGpuMat();
-
-    const int depth = src.depth();
-    const int cn = src.channels();
-
-    CV_Assert(depth <= CV_64F);
-
-    if (depth == CV_64F)
-    {
-        if (!deviceSupports(NATIVE_DOUBLE))
-            CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
-    }
-
-    _dst.create(src.size(), src.type());
-    GpuMat dst = _dst.getGpuMat();
-
-    PtrStepSzb src_(src.rows, src.cols * cn, src.data, src.step);
-    PtrStepSzb dst_(src.rows, src.cols * cn, dst.data, dst.step);
-
-    funcs[depth](src_, power, dst_, StreamAccessor::getStream(stream));
-}
-
 //////////////////////////////////////////////////////////////////////////////
 // compare