Merge pull request #1298 from jet47:gpu-eliminate-doubles

11 years ago · a1b633e28e
parent beada19ad0 cd2b7448f4
commit a1b633e28e
9 changed files with 26 additions and 17 deletions
--- a/modules/gpuarithm/src/cuda/reduce.cu
+++ b/modules/gpuarithm/src/cuda/reduce.cu
@ -72,7 +72,7 @@ namespace reduce
        }
        template <typename T>
-        __device__ __forceinline__ T result(T r, double) const
+        __device__ __forceinline__ T result(T r, int) const
        {
            return r;
        }
@ -81,6 +81,15 @@ namespace reduce
        __host__ __device__ __forceinline__ Sum(const Sum&) {}
    };
    template <typename T> struct OutputType
    {
        typedef float type;
    };
    template <> struct OutputType<double>
    {
        typedef double type;
    };
    struct Avg
    {
        template <typename T>
@ -96,7 +105,7 @@ namespace reduce
        }
        template <typename T>
-        __device__ __forceinline__ typename TypeVec<double, VecTraits<T>::cn>::vec_type result(T r, double sz) const
+        __device__ __forceinline__ typename TypeVec<typename OutputType<typename VecTraits<T>::elem_type>::type, VecTraits<T>::cn>::vec_type result(T r, float sz) const
        {
            return r / sz;
        }
@ -121,7 +130,7 @@ namespace reduce
        }
        template <typename T>
-        __device__ __forceinline__ T result(T r, double) const
+        __device__ __forceinline__ T result(T r, int) const
        {
            return r;
        }
@ -146,7 +155,7 @@ namespace reduce
        }
        template <typename T>
-        __device__ __forceinline__ T result(T r, double) const
+        __device__ __forceinline__ T result(T r, int) const
        {
            return r;
        }
--- a/modules/gpubgsegm/src/cuda/mog2.cu
+++ b/modules/gpubgsegm/src/cuda/mog2.cu
@ -227,7 +227,7 @@ namespace cv { namespace gpu { namespace cudev
                //check prune
                if (weight < -prune)
                {
-                    weight = 0.0;
+                    weight = 0.0f;
                    nmodes--;
                }
--- a/modules/gpufeatures2d/perf/perf_features2d.cpp
+++ b/modules/gpufeatures2d/perf/perf_features2d.cpp
@ -123,7 +123,7 @@ PERF_TEST_P(Image_NFeatures, ORB,
        sortKeyPoints(gpu_keypoints, gpu_descriptors);
-        SANITY_CHECK_KEYPOINTS(gpu_keypoints);
+        SANITY_CHECK_KEYPOINTS(gpu_keypoints, 1e-4);
        SANITY_CHECK(gpu_descriptors);
    }
    else
--- a/modules/gpufeatures2d/src/cuda/orb.cu
+++ b/modules/gpufeatures2d/src/cuda/orb.cu
@ -197,8 +197,8 @@ namespace cv { namespace gpu { namespace cudev
                if (threadIdx.x == 0)
                {
                    float kp_dir = ::atan2f((float)m_01, (float)m_10);
-                    kp_dir += (kp_dir < 0) * (2.0f * CV_PI);
+                    kp_dir += (kp_dir < 0) * (2.0f * CV_PI_F);
-                    kp_dir *= 180.0f / CV_PI;
+                    kp_dir *= 180.0f / CV_PI_F;
                    angle[ptidx] = kp_dir;
                }
@ -349,7 +349,7 @@ namespace cv { namespace gpu { namespace cudev
            if (ptidx < npoints && descidx < dsize)
            {
                float angle = angle_[ptidx];
-                angle *= (float)(CV_PI / 180.f);
+                angle *= (float)(CV_PI_F / 180.f);
                float sina, cosa;
                ::sincosf(angle, &sina, &cosa);
--- a/modules/gpuimgproc/src/cuda/canny.cu
+++ b/modules/gpuimgproc/src/cuda/canny.cu
@ -43,7 +43,7 @@
 #if !defined CUDA_DISABLER
 #include <utility>
-#include <algorithm>//std::swap
+#include <algorithm>
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/emulation.hpp"
 #include "opencv2/core/cuda/transform.hpp"
--- a/modules/gpuoptflow/src/cuda/needle_map.cu
+++ b/modules/gpuoptflow/src/cuda/needle_map.cu
@ -140,7 +140,7 @@ namespace cv { namespace gpu { namespace cudev
                const float u_avg_val = u_avg(y, x);
                const float v_avg_val = v_avg(y, x);
-                const float theta = ::atan2f(v_avg_val, u_avg_val);// + CV_PI;
+                const float theta = ::atan2f(v_avg_val, u_avg_val);
                float r = ::sqrtf(v_avg_val * v_avg_val + u_avg_val * u_avg_val);
                r = fmin(14.0f * (r / max_flow), 14.0f);
--- a/modules/softcascade/src/cuda/icf-sc.cu
+++ b/modules/softcascade/src/cuda/icf-sc.cu
@ -137,10 +137,10 @@ typedef unsigned char uchar;
    template<bool isDefaultNum>
    __device__ __forceinline__ int fast_angle_bin(const float& dx, const float& dy)
    {
-        const float angle_quantum = CV_PI / 6.f;
+        const float angle_quantum = CV_PI_F / 6.f;
        float angle = atan2(dx, dy) + (angle_quantum / 2.f);
-        if (angle < 0) angle += CV_PI;
+        if (angle < 0) angle += CV_PI_F;
        const float angle_scaling = 1.f / angle_quantum;
        return static_cast<int>(angle * angle_scaling) % 6;
@ -174,8 +174,8 @@ typedef unsigned char uchar;
        {
            int i = 3;
            float2 bin_vector_i;
-            bin_vector_i.x = ::cos(i * (CV_PI / 6.f));
+            bin_vector_i.x = ::cos(i * (CV_PI_F / 6.f));
-            bin_vector_i.y = ::sin(i * (CV_PI / 6.f));
+            bin_vector_i.y = ::sin(i * (CV_PI_F / 6.f));
            const float dot_product = fabs(dx * bin_vector_i.x + dy * bin_vector_i.y);
            if(dot_product > max_dot)
--- a/modules/superres/perf/perf_superres.cpp
+++ b/modules/superres/perf/perf_superres.cpp
@ -160,7 +160,7 @@ PERF_TEST_P(Size_MatType, SuperResolution_BTVL1,
        TEST_CYCLE_N(10) superRes->nextFrame(dst);
-        GPU_SANITY_CHECK(dst);
+        GPU_SANITY_CHECK(dst, 2);
    }
    else
    {