Merge pull request #1298 from jet47:gpu-eliminate-doubles

pull/1307/merge
Roman Donchenko 11 years ago committed by OpenCV Buildbot
commit a1b633e28e
  1. 17
      modules/gpuarithm/src/cuda/reduce.cu
  2. 2
      modules/gpubgsegm/src/cuda/mog2.cu
  3. 2
      modules/gpufeatures2d/perf/perf_features2d.cpp
  4. 6
      modules/gpufeatures2d/src/cuda/orb.cu
  5. 2
      modules/gpuimgproc/src/cuda/bilateral_filter.cu
  6. 2
      modules/gpuimgproc/src/cuda/canny.cu
  7. 2
      modules/gpuoptflow/src/cuda/needle_map.cu
  8. 8
      modules/softcascade/src/cuda/icf-sc.cu
  9. 2
      modules/superres/perf/perf_superres.cpp

@ -72,7 +72,7 @@ namespace reduce
} }
template <typename T> template <typename T>
__device__ __forceinline__ T result(T r, double) const __device__ __forceinline__ T result(T r, int) const
{ {
return r; return r;
} }
@ -81,6 +81,15 @@ namespace reduce
__host__ __device__ __forceinline__ Sum(const Sum&) {} __host__ __device__ __forceinline__ Sum(const Sum&) {}
}; };
template <typename T> struct OutputType
{
typedef float type;
};
template <> struct OutputType<double>
{
typedef double type;
};
struct Avg struct Avg
{ {
template <typename T> template <typename T>
@ -96,7 +105,7 @@ namespace reduce
} }
template <typename T> template <typename T>
__device__ __forceinline__ typename TypeVec<double, VecTraits<T>::cn>::vec_type result(T r, double sz) const __device__ __forceinline__ typename TypeVec<typename OutputType<typename VecTraits<T>::elem_type>::type, VecTraits<T>::cn>::vec_type result(T r, float sz) const
{ {
return r / sz; return r / sz;
} }
@ -121,7 +130,7 @@ namespace reduce
} }
template <typename T> template <typename T>
__device__ __forceinline__ T result(T r, double) const __device__ __forceinline__ T result(T r, int) const
{ {
return r; return r;
} }
@ -146,7 +155,7 @@ namespace reduce
} }
template <typename T> template <typename T>
__device__ __forceinline__ T result(T r, double) const __device__ __forceinline__ T result(T r, int) const
{ {
return r; return r;
} }

@ -227,7 +227,7 @@ namespace cv { namespace gpu { namespace cudev
//check prune //check prune
if (weight < -prune) if (weight < -prune)
{ {
weight = 0.0; weight = 0.0f;
nmodes--; nmodes--;
} }

@ -123,7 +123,7 @@ PERF_TEST_P(Image_NFeatures, ORB,
sortKeyPoints(gpu_keypoints, gpu_descriptors); sortKeyPoints(gpu_keypoints, gpu_descriptors);
SANITY_CHECK_KEYPOINTS(gpu_keypoints); SANITY_CHECK_KEYPOINTS(gpu_keypoints, 1e-4);
SANITY_CHECK(gpu_descriptors); SANITY_CHECK(gpu_descriptors);
} }
else else

@ -197,8 +197,8 @@ namespace cv { namespace gpu { namespace cudev
if (threadIdx.x == 0) if (threadIdx.x == 0)
{ {
float kp_dir = ::atan2f((float)m_01, (float)m_10); float kp_dir = ::atan2f((float)m_01, (float)m_10);
kp_dir += (kp_dir < 0) * (2.0f * CV_PI); kp_dir += (kp_dir < 0) * (2.0f * CV_PI_F);
kp_dir *= 180.0f / CV_PI; kp_dir *= 180.0f / CV_PI_F;
angle[ptidx] = kp_dir; angle[ptidx] = kp_dir;
} }
@ -349,7 +349,7 @@ namespace cv { namespace gpu { namespace cudev
if (ptidx < npoints && descidx < dsize) if (ptidx < npoints && descidx < dsize)
{ {
float angle = angle_[ptidx]; float angle = angle_[ptidx];
angle *= (float)(CV_PI / 180.f); angle *= (float)(CV_PI_F / 180.f);
float sina, cosa; float sina, cosa;
::sincosf(angle, &sina, &cosa); ::sincosf(angle, &sina, &cosa);

@ -133,7 +133,7 @@ namespace cv { namespace gpu { namespace cudev
B<T> b(src.rows, src.cols); B<T> b(src.rows, src.cols);
float sigma_spatial2_inv_half = -0.5f/(sigma_spatial * sigma_spatial); float sigma_spatial2_inv_half = -0.5f/(sigma_spatial * sigma_spatial);
float sigma_color2_inv_half = -0.5f/(sigma_color * sigma_color); float sigma_color2_inv_half = -0.5f/(sigma_color * sigma_color);
cudaSafeCall( cudaFuncSetCacheConfig (bilateral_kernel<T, B<T> >, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig (bilateral_kernel<T, B<T> >, cudaFuncCachePreferL1) );
bilateral_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, kernel_size, sigma_spatial2_inv_half, sigma_color2_inv_half); bilateral_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, kernel_size, sigma_spatial2_inv_half, sigma_color2_inv_half);

@ -43,7 +43,7 @@
#if !defined CUDA_DISABLER #if !defined CUDA_DISABLER
#include <utility> #include <utility>
#include <algorithm>//std::swap #include <algorithm>
#include "opencv2/core/cuda/common.hpp" #include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/emulation.hpp" #include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/transform.hpp" #include "opencv2/core/cuda/transform.hpp"

@ -140,7 +140,7 @@ namespace cv { namespace gpu { namespace cudev
const float u_avg_val = u_avg(y, x); const float u_avg_val = u_avg(y, x);
const float v_avg_val = v_avg(y, x); const float v_avg_val = v_avg(y, x);
const float theta = ::atan2f(v_avg_val, u_avg_val);// + CV_PI; const float theta = ::atan2f(v_avg_val, u_avg_val);
float r = ::sqrtf(v_avg_val * v_avg_val + u_avg_val * u_avg_val); float r = ::sqrtf(v_avg_val * v_avg_val + u_avg_val * u_avg_val);
r = fmin(14.0f * (r / max_flow), 14.0f); r = fmin(14.0f * (r / max_flow), 14.0f);

@ -137,10 +137,10 @@ typedef unsigned char uchar;
template<bool isDefaultNum> template<bool isDefaultNum>
__device__ __forceinline__ int fast_angle_bin(const float& dx, const float& dy) __device__ __forceinline__ int fast_angle_bin(const float& dx, const float& dy)
{ {
const float angle_quantum = CV_PI / 6.f; const float angle_quantum = CV_PI_F / 6.f;
float angle = atan2(dx, dy) + (angle_quantum / 2.f); float angle = atan2(dx, dy) + (angle_quantum / 2.f);
if (angle < 0) angle += CV_PI; if (angle < 0) angle += CV_PI_F;
const float angle_scaling = 1.f / angle_quantum; const float angle_scaling = 1.f / angle_quantum;
return static_cast<int>(angle * angle_scaling) % 6; return static_cast<int>(angle * angle_scaling) % 6;
@ -174,8 +174,8 @@ typedef unsigned char uchar;
{ {
int i = 3; int i = 3;
float2 bin_vector_i; float2 bin_vector_i;
bin_vector_i.x = ::cos(i * (CV_PI / 6.f)); bin_vector_i.x = ::cos(i * (CV_PI_F / 6.f));
bin_vector_i.y = ::sin(i * (CV_PI / 6.f)); bin_vector_i.y = ::sin(i * (CV_PI_F / 6.f));
const float dot_product = fabs(dx * bin_vector_i.x + dy * bin_vector_i.y); const float dot_product = fabs(dx * bin_vector_i.x + dy * bin_vector_i.y);
if(dot_product > max_dot) if(dot_product > max_dot)

@ -160,7 +160,7 @@ PERF_TEST_P(Size_MatType, SuperResolution_BTVL1,
TEST_CYCLE_N(10) superRes->nextFrame(dst); TEST_CYCLE_N(10) superRes->nextFrame(dst);
GPU_SANITY_CHECK(dst); GPU_SANITY_CHECK(dst, 2);
} }
else else
{ {

Loading…
Cancel
Save