switched to new device layer in polar <-> cart

11 years ago · 7c8c836a7b
parent 5522f43b18
commit 7c8c836a7b
3 changed files with 189 additions and 277 deletions
--- a/modules/cudaarithm/src/cuda/polar_cart.cu
+++ b/modules/cudaarithm/src/cuda/polar_cart.cu
@ -40,178 +40,172 @@
 //
 //M*/

-#if !defined CUDA_DISABLER
+#include "opencv2/opencv_modules.hpp"

-#include "opencv2/core/cuda/common.hpp"
+#ifndef HAVE_OPENCV_CUDEV

-namespace cv { namespace cuda { namespace device
+#error "opencv_cudev is required"
+
+#else
+
+#include "opencv2/cudaarithm.hpp"
+#include "opencv2/cudev.hpp"
+
+using namespace cv::cudev;
+
+void cv::cuda::magnitude(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream)
 {
-    namespace mathfunc
-    {
-        //////////////////////////////////////////////////////////////////////////////////////
-        // Cart <-> Polar
+    GpuMat x = _x.getGpuMat();
+    GpuMat y = _y.getGpuMat();

-        struct Nothing
-        {
-            static __device__ __forceinline__ void calc(int, int, float, float, float*, size_t, float)
-            {
-            }
-        };
-        struct Magnitude
-        {
-            static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
-            {
-                dst[y * dst_step + x] = ::sqrtf(x_data * x_data + y_data * y_data);
-            }
-        };
-        struct MagnitudeSqr
-        {
-            static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
-            {
-                dst[y * dst_step + x] = x_data * x_data + y_data * y_data;
-            }
-        };
-        struct Atan2
-        {
-            static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float scale)
-            {
-                float angle = ::atan2f(y_data, x_data);
-                angle += (angle < 0) * 2.0f * CV_PI_F;
-                dst[y * dst_step + x] = scale * angle;
-            }
-        };
-        template <typename Mag, typename Angle>
-        __global__ void cartToPolar(const float* xptr, size_t x_step, const float* yptr, size_t y_step,
-                                    float* mag, size_t mag_step, float* angle, size_t angle_step, float scale, int width, int height)
-        {
-            const int x = blockDim.x * blockIdx.x + threadIdx.x;
-            const int y = blockDim.y * blockIdx.y + threadIdx.y;
+    CV_DbgAssert( x.depth() == CV_32F );
+    CV_DbgAssert( y.type() == x.type() && y.size() == x.size() );

-            if (x < width && y < height)
-            {
-                float x_data = xptr[y * x_step + x];
-                float y_data = yptr[y * y_step + x];
+    _dst.create(x.size(), CV_32FC1);
+    GpuMat dst = _dst.getGpuMat();

-                Mag::calc(x, y, x_data, y_data, mag, mag_step, scale);
-                Angle::calc(x, y, x_data, y_data, angle, angle_step, scale);
-            }
-        }
+    GpuMat_<float> xc(x.reshape(1));
+    GpuMat_<float> yc(y.reshape(1));
+    GpuMat_<float> magc(dst.reshape(1));

-        struct NonEmptyMag
-        {
-            static __device__ __forceinline__ float get(const float* mag, size_t mag_step, int x, int y)
-            {
-                return mag[y * mag_step + x];
-            }
-        };
-        struct EmptyMag
-        {
-            static __device__ __forceinline__ float get(const float*, size_t, int, int)
-            {
-                return 1.0f;
-            }
-        };
-        template <typename Mag>
-        __global__ void polarToCart(const float* mag, size_t mag_step, const float* angle, size_t angle_step, float scale,
-            float* xptr, size_t x_step, float* yptr, size_t y_step, int width, int height)
-        {
-            const int x = blockDim.x * blockIdx.x + threadIdx.x;
-            const int y = blockDim.y * blockIdx.y + threadIdx.y;
+    gridTransformBinary(xc, yc, magc, magnitude_func<float>(), stream);
+}

-            if (x < width && y < height)
-            {
-                float mag_data = Mag::get(mag, mag_step, x, y);
-                float angle_data = angle[y * angle_step + x];
-                float sin_a, cos_a;
+void cv::cuda::magnitudeSqr(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream)
+{
+    GpuMat x = _x.getGpuMat();
+    GpuMat y = _y.getGpuMat();

-                ::sincosf(scale * angle_data, &sin_a, &cos_a);
+    CV_DbgAssert( x.depth() == CV_32F );
+    CV_DbgAssert( y.type() == x.type() && y.size() == x.size() );

-                xptr[y * x_step + x] = mag_data * cos_a;
-                yptr[y * y_step + x] = mag_data * sin_a;
-            }
-        }
+    _dst.create(x.size(), CV_32FC1);
+    GpuMat dst = _dst.getGpuMat();

-        template <typename Mag, typename Angle>
-        void cartToPolar_caller(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream)
-        {
-            dim3 threads(32, 8, 1);
-            dim3 grid(1, 1, 1);
+    GpuMat_<float> xc(x.reshape(1));
+    GpuMat_<float> yc(y.reshape(1));
+    GpuMat_<float> magc(dst.reshape(1));

-            grid.x = divUp(x.cols, threads.x);
-            grid.y = divUp(x.rows, threads.y);
+    gridTransformBinary(xc, yc, magc, magnitude_sqr_func<float>(), stream);
+}

-            const float scale = angleInDegrees ? (180.0f / CV_PI_F) : 1.f;
+void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleInDegrees, Stream& stream)
+{
+    GpuMat x = _x.getGpuMat();
+    GpuMat y = _y.getGpuMat();

-            cartToPolar<Mag, Angle><<<grid, threads, 0, stream>>>(
-                x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(),
-                mag.data, mag.step/mag.elemSize(), angle.data, angle.step/angle.elemSize(), scale, x.cols, x.rows);
-            cudaSafeCall( cudaGetLastError() );
+    CV_DbgAssert( x.depth() == CV_32F );
+    CV_DbgAssert( y.type() == x.type() && y.size() == x.size() );

-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
+    _dst.create(x.size(), CV_32FC1);
+    GpuMat dst = _dst.getGpuMat();

-        void cartToPolar_gpu(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, bool magSqr, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream)
-        {
-            typedef void (*caller_t)(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream);
-            static const caller_t callers[2][2][2] =
-            {
-                {
-                    {
-                        cartToPolar_caller<Magnitude, Atan2>,
-                        cartToPolar_caller<Magnitude, Nothing>
-                    },
+    GpuMat_<float> xc(x.reshape(1));
+    GpuMat_<float> yc(y.reshape(1));
+    GpuMat_<float> anglec(dst.reshape(1));
+
+    if (angleInDegrees)
+        gridTransformBinary(xc, yc, anglec, direction_func<float, true>(), stream);
+    else
+        gridTransformBinary(xc, yc, anglec, direction_func<float, false>(), stream);
+}
+
+void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, OutputArray _angle, bool angleInDegrees, Stream& stream)
+{
+    GpuMat x = _x.getGpuMat();
+    GpuMat y = _y.getGpuMat();
+
+    CV_DbgAssert( x.depth() == CV_32F );
+    CV_DbgAssert( y.type() == x.type() && y.size() == x.size() );
+
+    _mag.create(x.size(), CV_32FC1);
+    GpuMat mag = _mag.getGpuMat();
+
+    _angle.create(x.size(), CV_32FC1);
+    GpuMat angle = _angle.getGpuMat();
+
+    GpuMat_<float> xc(x.reshape(1));
+    GpuMat_<float> yc(y.reshape(1));
+    GpuMat_<float> magc(mag.reshape(1));
+    GpuMat_<float> anglec(angle.reshape(1));
+
+    if (angleInDegrees)
    {
-                        cartToPolar_caller<MagnitudeSqr, Atan2>,
-                        cartToPolar_caller<MagnitudeSqr, Nothing>,
+        gridTransformTuple(zipPtr(xc, yc),
+                           tie(magc, anglec),
+                           make_tuple(
+                               binaryTupleAdapter<0, 1>(magnitude_func<float>()),
+                               binaryTupleAdapter<0, 1>(direction_func<float, true>())),
+                           stream);
    }
-                },
-                {
+    else
    {
-                        cartToPolar_caller<Nothing, Atan2>,
-                        cartToPolar_caller<Nothing, Nothing>
-                    },
-                    {
-                        cartToPolar_caller<Nothing, Atan2>,
-                        cartToPolar_caller<Nothing, Nothing>,
-                    }
+        gridTransformTuple(zipPtr(xc, yc),
+                           tie(magc, anglec),
+                           make_tuple(
+                               binaryTupleAdapter<0, 1>(magnitude_func<float>()),
+                               binaryTupleAdapter<0, 1>(direction_func<float, false>())),
+                           stream);
    }
-            };
+}
+
+namespace
+{
+    template <bool useMag>
+    __global__ void polarToCartImpl(const GlobPtr<float> mag, const GlobPtr<float> angle, GlobPtr<float> xmat, GlobPtr<float> ymat, const float scale, const int rows, const int cols)
+    {
+        const int x = blockDim.x * blockIdx.x + threadIdx.x;
+        const int y = blockDim.y * blockIdx.y + threadIdx.y;
+
+        if (x >= cols || y >= rows)
+            return;
+
+        const float mag_val = useMag ? mag(y, x) : 1.0f;
+        const float angle_val = angle(y, x);

-            callers[mag.data == 0][magSqr][angle.data == 0](x, y, mag, angle, angleInDegrees, stream);
+        float sin_a, cos_a;
+        ::sincosf(scale * angle_val, &sin_a, &cos_a);
+
+        xmat(y, x) = mag_val * cos_a;
+        ymat(y, x) = mag_val * sin_a;
    }
+}

-        template <typename Mag>
-        void polarToCart_caller(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream)
-        {
-            dim3 threads(32, 8, 1);
-            dim3 grid(1, 1, 1);
+void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, OutputArray _y, bool angleInDegrees, Stream& _stream)
+{
+    GpuMat mag = _mag.getGpuMat();
+    GpuMat angle = _angle.getGpuMat();
+
+    CV_DbgAssert( angle.depth() == CV_32F );
+    CV_DbgAssert( mag.empty() || (mag.type() == angle.type() && mag.size() == angle.size()) );

-            grid.x = divUp(mag.cols, threads.x);
-            grid.y = divUp(mag.rows, threads.y);
+    _x.create(angle.size(), CV_32FC1);
+    GpuMat x = _x.getGpuMat();
+
+    _y.create(angle.size(), CV_32FC1);
+    GpuMat y = _y.getGpuMat();
+
+    GpuMat_<float> xc(x.reshape(1));
+    GpuMat_<float> yc(y.reshape(1));
+    GpuMat_<float> magc(mag.reshape(1));
+    GpuMat_<float> anglec(angle.reshape(1));
+
+    const dim3 block(32, 8);
+    const dim3 grid(divUp(anglec.cols, block.x), divUp(anglec.rows, block.y));

    const float scale = angleInDegrees ? (CV_PI_F / 180.0f) : 1.0f;

-            polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.data, mag.step/mag.elemSize(),
-                angle.data, angle.step/angle.elemSize(), scale, x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(), mag.cols, mag.rows);
-            cudaSafeCall( cudaGetLastError() );
+    cudaStream_t stream = StreamAccessor::getStream(_stream);

-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
+    if (magc.empty())
+        polarToCartImpl<false><<<grid, block, 0, stream>>>(shrinkPtr(magc), shrinkPtr(anglec), shrinkPtr(xc), shrinkPtr(yc), scale, anglec.rows, anglec.cols);
+    else
+        polarToCartImpl<true><<<grid, block, 0, stream>>>(shrinkPtr(magc), shrinkPtr(anglec), shrinkPtr(xc), shrinkPtr(yc), scale, anglec.rows, anglec.cols);

-        void polarToCart_gpu(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream)
-        {
-            typedef void (*caller_t)(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream);
-            static const caller_t callers[2] =
-            {
-                polarToCart_caller<NonEmptyMag>,
-                polarToCart_caller<EmptyMag>
-            };
+    CV_CUDEV_SAFE_CALL( cudaGetLastError() );

-            callers[mag.data == 0](mag, angle, x, y, angleInDegrees, stream);
-        }
-    } // namespace mathfunc
-}}} // namespace cv { namespace cuda { namespace cudev
+    if (stream == 0)
+        CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
+}

-#endif /* CUDA_DISABLER */
+#endif
--- a/modules/cudaarithm/src/element_operations.cpp
+++ b/modules/cudaarithm/src/element_operations.cpp
@ -493,110 +493,4 @@ void cv::cuda::magnitudeSqr(InputArray _src, OutputArray _dst, Stream& stream)
    npp_magnitude(src, dst, nppiMagnitudeSqr_32fc32f_C1R, StreamAccessor::getStream(stream));
 }

-////////////////////////////////////////////////////////////////////////
-// Polar <-> Cart
-
-namespace cv { namespace cuda { namespace device
-{
-    namespace mathfunc
-    {
-        void cartToPolar_gpu(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, bool magSqr, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream);
-        void polarToCart_gpu(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream);
-    }
-}}}
-
-namespace
-{
-    void cartToPolar_caller(const GpuMat& x, const GpuMat& y, GpuMat* mag, bool magSqr, GpuMat* angle, bool angleInDegrees, cudaStream_t stream)
-    {
-        using namespace ::cv::cuda::device::mathfunc;
-
-        CV_Assert(x.size() == y.size() && x.type() == y.type());
-        CV_Assert(x.depth() == CV_32F);
-
-        GpuMat x1cn = x.reshape(1);
-        GpuMat y1cn = y.reshape(1);
-        GpuMat mag1cn = mag ? mag->reshape(1) : GpuMat();
-        GpuMat angle1cn = angle ? angle->reshape(1) : GpuMat();
-
-        cartToPolar_gpu(x1cn, y1cn, mag1cn, magSqr, angle1cn, angleInDegrees, stream);
-    }
-
-    void polarToCart_caller(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t stream)
-    {
-        using namespace ::cv::cuda::device::mathfunc;
-
-        CV_Assert((mag.empty() || mag.size() == angle.size()) && mag.type() == angle.type());
-        CV_Assert(mag.depth() == CV_32F);
-
-        GpuMat mag1cn = mag.reshape(1);
-        GpuMat angle1cn = angle.reshape(1);
-        GpuMat x1cn = x.reshape(1);
-        GpuMat y1cn = y.reshape(1);
-
-        polarToCart_gpu(mag1cn, angle1cn, x1cn, y1cn, angleInDegrees, stream);
-    }
-}
-
-void cv::cuda::magnitude(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream)
-{
-    GpuMat x = _x.getGpuMat();
-    GpuMat y = _y.getGpuMat();
-
-    _dst.create(x.size(), CV_32FC1);
-    GpuMat dst = _dst.getGpuMat();
-
-    cartToPolar_caller(x, y, &dst, false, 0, false, StreamAccessor::getStream(stream));
-}
-
-void cv::cuda::magnitudeSqr(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream)
-{
-    GpuMat x = _x.getGpuMat();
-    GpuMat y = _y.getGpuMat();
-
-    _dst.create(x.size(), CV_32FC1);
-    GpuMat dst = _dst.getGpuMat();
-
-    cartToPolar_caller(x, y, &dst, true, 0, false, StreamAccessor::getStream(stream));
-}
-
-void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleInDegrees, Stream& stream)
-{
-    GpuMat x = _x.getGpuMat();
-    GpuMat y = _y.getGpuMat();
-
-    _dst.create(x.size(), CV_32FC1);
-    GpuMat dst = _dst.getGpuMat();
-
-    cartToPolar_caller(x, y, 0, false, &dst, angleInDegrees, StreamAccessor::getStream(stream));
-}
-
-void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, OutputArray _angle, bool angleInDegrees, Stream& stream)
-{
-    GpuMat x = _x.getGpuMat();
-    GpuMat y = _y.getGpuMat();
-
-    _mag.create(x.size(), CV_32FC1);
-    GpuMat mag = _mag.getGpuMat();
-
-    _angle.create(x.size(), CV_32FC1);
-    GpuMat angle = _angle.getGpuMat();
-
-    cartToPolar_caller(x, y, &mag, false, &angle, angleInDegrees, StreamAccessor::getStream(stream));
-}
-
-void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, OutputArray _y, bool angleInDegrees, Stream& stream)
-{
-    GpuMat mag = _mag.getGpuMat();
-    GpuMat angle = _angle.getGpuMat();
-
-    _x.create(mag.size(), CV_32FC1);
-    GpuMat x = _x.getGpuMat();
-
-    _y.create(mag.size(), CV_32FC1);
-    GpuMat y = _y.getGpuMat();
-
-    polarToCart_caller(mag, angle, x, y, angleInDegrees, StreamAccessor::getStream(stream));
-}
-
 #endif
--- a/modules/cudev/include/opencv2/cudev/functional/functional.hpp
+++ b/modules/cudev/include/opencv2/cudev/functional/functional.hpp
@ -616,6 +616,30 @@ template <typename T> struct magnitude_func : binary_function<T, T, typename fun
    }
 };

+template <typename T> struct magnitude_sqr_func : binary_function<T, T, typename functional_detail::FloatType<T>::type>
+{
+    __device__ __forceinline__ typename functional_detail::FloatType<T>::type operator ()(typename TypeTraits<T>::parameter_type a, typename TypeTraits<T>::parameter_type b) const
+    {
+        return a * a + b * b;
+    }
+};
+
+template <typename T, bool angleInDegrees> struct direction_func : binary_function<T, T, T>
+{
+    __device__ T operator ()(T x, T y) const
+    {
+        atan2_func<T> f;
+        typename atan2_func<T>::result_type angle = f(y, x);
+
+        angle += (angle < 0) * (2.0f * CV_PI_F);
+
+        if (angleInDegrees)
+            angle *= (180.0f / CV_PI_F);
+
+        return saturate_cast<T>(angle);
+    }
+};
+
 template <typename T> struct pow_func : binary_function<T, float, float>
 {
    __device__ __forceinline__ float operator ()(T val, float power) const