Merge pull request #964 from jet47:cuda-5.5-support

pull/856/merge
Roman Donchenko 12 years ago committed by OpenCV Buildbot
commit 173442bb2e
  1. 1064
      modules/core/include/opencv2/core/cuda/vec_math.hpp
  2. 2
      modules/core/include/opencv2/core/gpu_private.hpp
  3. 78
      modules/core/src/gpumat.cpp
  4. 4
      modules/gpu/src/cuda/ccomponetns.cu
  5. 12
      modules/gpuarithm/src/cuda/absdiff_mat.cu
  6. 2
      modules/gpuarithm/src/cuda/absdiff_scalar.cu
  7. 12
      modules/gpuarithm/src/cuda/add_mat.cu
  8. 2
      modules/gpuarithm/src/cuda/add_scalar.cu
  9. 4
      modules/gpuarithm/src/cuda/add_weighted.cu
  10. 16
      modules/gpuarithm/src/cuda/cmp_mat.cu
  11. 1
      modules/gpuarithm/src/cuda/countnonzero.cu
  12. 2
      modules/gpuarithm/src/cuda/div_inv.cu
  13. 14
      modules/gpuarithm/src/cuda/div_mat.cu
  14. 2
      modules/gpuarithm/src/cuda/div_scalar.cu
  15. 16
      modules/gpuarithm/src/cuda/math.cu
  16. 1
      modules/gpuarithm/src/cuda/minmax.cu
  17. 16
      modules/gpuarithm/src/cuda/minmax_mat.cu
  18. 1
      modules/gpuarithm/src/cuda/minmaxloc.cu
  19. 14
      modules/gpuarithm/src/cuda/mul_mat.cu
  20. 2
      modules/gpuarithm/src/cuda/mul_scalar.cu
  21. 17
      modules/gpuarithm/src/cuda/reduce.cu
  22. 12
      modules/gpuarithm/src/cuda/sub_mat.cu
  23. 2
      modules/gpuarithm/src/cuda/sub_scalar.cu
  24. 1
      modules/gpuarithm/src/cuda/sum.cu
  25. 2
      modules/gpufilters/perf/perf_filters.cpp
  26. 5
      modules/gpuimgproc/src/cuda/hough.cu
  27. 8
      modules/gpuoptflow/perf/perf_optflow.cpp
  28. 4
      modules/gpuoptflow/test/test_optflow.cpp

File diff suppressed because it is too large Load Diff

@ -60,6 +60,8 @@
# include "opencv2/core/stream_accessor.hpp"
# include "opencv2/core/cuda/common.hpp"
# define NPP_VERSION (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD)
# define CUDART_MINIMUM_REQUIRED_VERSION 4020
# if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)

@ -1547,48 +1547,90 @@ namespace
const ErrorEntry npp_errors [] =
{
error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
#if defined (_MSC_VER)
#if defined (_MSC_VER)
error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ),
#endif
#endif
#if NPP_VERSION < 5500
error_entry( NPP_BAD_ARG_ERROR ),
error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_TEXTURE_BIND_ERROR ),
error_entry( NPP_COEFF_ERROR ),
error_entry( NPP_RECT_ERROR ),
error_entry( NPP_QUAD_ERROR ),
error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
error_entry( NPP_NOT_EVEN_STEP_ERROR ),
error_entry( NPP_INTERPOLATION_ERROR ),
error_entry( NPP_RESIZE_FACTOR_ERROR ),
error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
error_entry( NPP_MEMFREE_ERR ),
error_entry( NPP_MEMSET_ERR ),
error_entry( NPP_MEMCPY_ERROR ),
error_entry( NPP_MEM_ALLOC_ERR ),
error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_MIRROR_FLIP_ERR ),
error_entry( NPP_INVALID_INPUT ),
error_entry( NPP_POINTER_ERROR ),
error_entry( NPP_WARNING ),
error_entry( NPP_ODD_ROI_WARNING ),
#else
error_entry( NPP_INVALID_HOST_POINTER_ERROR ),
error_entry( NPP_INVALID_DEVICE_POINTER_ERROR ),
error_entry( NPP_LUT_PALETTE_BITSIZE_ERROR ),
error_entry( NPP_ZC_MODE_NOT_SUPPORTED_ERROR ),
error_entry( NPP_MEMFREE_ERROR ),
error_entry( NPP_MEMSET_ERROR ),
error_entry( NPP_QUALITY_INDEX_ERROR ),
error_entry( NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_CHANNEL_ORDER_ERROR ),
error_entry( NPP_ZERO_MASK_VALUE_ERROR ),
error_entry( NPP_QUADRANGLE_ERROR ),
error_entry( NPP_RECTANGLE_ERROR ),
error_entry( NPP_COEFFICIENT_ERROR ),
error_entry( NPP_NUMBER_OF_CHANNELS_ERROR ),
error_entry( NPP_COI_ERROR ),
error_entry( NPP_DIVISOR_ERROR ),
error_entry( NPP_CHANNEL_ERROR ),
error_entry( NPP_STRIDE_ERROR ),
error_entry( NPP_ANCHOR_ERROR ),
error_entry( NPP_MASK_SIZE_ERROR ),
error_entry( NPP_MIRROR_FLIP_ERROR ),
error_entry( NPP_MOMENT_00_ZERO_ERROR ),
error_entry( NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR ),
error_entry( NPP_THRESHOLD_ERROR ),
error_entry( NPP_CONTEXT_MATCH_ERROR ),
error_entry( NPP_FFT_FLAG_ERROR ),
error_entry( NPP_FFT_ORDER_ERROR ),
error_entry( NPP_SCALE_RANGE_ERROR ),
error_entry( NPP_DATA_TYPE_ERROR ),
error_entry( NPP_OUT_OFF_RANGE_ERROR ),
error_entry( NPP_DIVIDE_BY_ZERO_ERROR ),
error_entry( NPP_MEMORY_ALLOCATION_ERR ),
error_entry( NPP_RANGE_ERROR ),
error_entry( NPP_BAD_ARGUMENT_ERROR ),
error_entry( NPP_NO_MEMORY_ERROR ),
error_entry( NPP_ERROR_RESERVED ),
error_entry( NPP_NO_OPERATION_WARNING ),
error_entry( NPP_DIVIDE_BY_ZERO_WARNING ),
error_entry( NPP_WRONG_INTERSECTION_ROI_WARNING ),
#endif
error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_TEXTURE_BIND_ERROR ),
error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
error_entry( NPP_NOT_EVEN_STEP_ERROR ),
error_entry( NPP_INTERPOLATION_ERROR ),
error_entry( NPP_RESIZE_FACTOR_ERROR ),
error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
error_entry( NPP_MEMCPY_ERROR ),
error_entry( NPP_ALIGNMENT_ERROR ),
error_entry( NPP_STEP_ERROR ),
error_entry( NPP_SIZE_ERROR ),
error_entry( NPP_POINTER_ERROR ),
error_entry( NPP_NULL_POINTER_ERROR ),
error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ),
error_entry( NPP_NOT_IMPLEMENTED_ERROR ),
error_entry( NPP_ERROR ),
error_entry( NPP_NO_ERROR ),
error_entry( NPP_SUCCESS ),
error_entry( NPP_WARNING ),
error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ),
error_entry( NPP_MISALIGNED_DST_ROI_WARNING ),
error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ),
error_entry( NPP_DOUBLE_SIZE_WARNING ),
error_entry( NPP_ODD_ROI_WARNING )
error_entry( NPP_DOUBLE_SIZE_WARNING )
};
const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);

@ -153,7 +153,7 @@ namespace cv { namespace gpu { namespace cudev
template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const
{
I d = a - b;
I d = saturate_cast<I>(a - b);
return lo.x <= d.x && d.x <= hi.x &&
lo.y <= d.y && d.y <= hi.y &&
lo.z <= d.z && d.z <= hi.z;
@ -169,7 +169,7 @@ namespace cv { namespace gpu { namespace cudev
template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const
{
I d = a - b;
I d = saturate_cast<I>(a - b);
return lo.x <= d.x && d.x <= hi.x &&
lo.y <= d.y && d.y <= hi.y &&
lo.z <= d.z && d.z <= hi.z &&

@ -62,8 +62,8 @@ namespace arithm
return vabsdiff4(a, b);
}
__device__ __forceinline__ VAbsDiff4() {}
__device__ __forceinline__ VAbsDiff4(const VAbsDiff4& other) {}
__host__ __device__ __forceinline__ VAbsDiff4() {}
__host__ __device__ __forceinline__ VAbsDiff4(const VAbsDiff4&) {}
};
struct VAbsDiff2 : binary_function<uint, uint, uint>
@ -73,8 +73,8 @@ namespace arithm
return vabsdiff2(a, b);
}
__device__ __forceinline__ VAbsDiff2() {}
__device__ __forceinline__ VAbsDiff2(const VAbsDiff2& other) {}
__host__ __device__ __forceinline__ VAbsDiff2() {}
__host__ __device__ __forceinline__ VAbsDiff2(const VAbsDiff2&) {}
};
__device__ __forceinline__ int _abs(int a)
@ -97,8 +97,8 @@ namespace arithm
return saturate_cast<T>(_abs(a - b));
}
__device__ __forceinline__ AbsDiffMat() {}
__device__ __forceinline__ AbsDiffMat(const AbsDiffMat& other) {}
__host__ __device__ __forceinline__ AbsDiffMat() {}
__host__ __device__ __forceinline__ AbsDiffMat(const AbsDiffMat&) {}
};
}

@ -59,7 +59,7 @@ namespace arithm
{
S val;
explicit AbsDiffScalar(S val_) : val(val_) {}
__host__ explicit AbsDiffScalar(S val_) : val(val_) {}
__device__ __forceinline__ T operator ()(T a) const
{

@ -62,8 +62,8 @@ namespace arithm
return vadd4(a, b);
}
__device__ __forceinline__ VAdd4() {}
__device__ __forceinline__ VAdd4(const VAdd4& other) {}
__host__ __device__ __forceinline__ VAdd4() {}
__host__ __device__ __forceinline__ VAdd4(const VAdd4&) {}
};
struct VAdd2 : binary_function<uint, uint, uint>
@ -73,8 +73,8 @@ namespace arithm
return vadd2(a, b);
}
__device__ __forceinline__ VAdd2() {}
__device__ __forceinline__ VAdd2(const VAdd2& other) {}
__host__ __device__ __forceinline__ VAdd2() {}
__host__ __device__ __forceinline__ VAdd2(const VAdd2&) {}
};
template <typename T, typename D> struct AddMat : binary_function<T, T, D>
@ -84,8 +84,8 @@ namespace arithm
return saturate_cast<D>(a + b);
}
__device__ __forceinline__ AddMat() {}
__device__ __forceinline__ AddMat(const AddMat& other) {}
__host__ __device__ __forceinline__ AddMat() {}
__host__ __device__ __forceinline__ AddMat(const AddMat&) {}
};
}

@ -59,7 +59,7 @@ namespace arithm
{
S val;
explicit AddScalar(S val_) : val(val_) {}
__host__ explicit AddScalar(S val_) : val(val_) {}
__device__ __forceinline__ D operator ()(T a) const
{

@ -74,7 +74,7 @@ namespace arithm
float beta;
float gamma;
AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(static_cast<float>(alpha_)), beta(static_cast<float>(beta_)), gamma(static_cast<float>(gamma_)) {}
__host__ AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(static_cast<float>(alpha_)), beta(static_cast<float>(beta_)), gamma(static_cast<float>(gamma_)) {}
__device__ __forceinline__ D operator ()(T1 a, T2 b) const
{
@ -87,7 +87,7 @@ namespace arithm
double beta;
double gamma;
AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(alpha_), beta(beta_), gamma(gamma_) {}
__host__ AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(alpha_), beta(beta_), gamma(gamma_) {}
__device__ __forceinline__ D operator ()(T1 a, T2 b) const
{

@ -62,8 +62,8 @@ namespace arithm
return vcmpeq4(a, b);
}
__device__ __forceinline__ VCmpEq4() {}
__device__ __forceinline__ VCmpEq4(const VCmpEq4& other) {}
__host__ __device__ __forceinline__ VCmpEq4() {}
__host__ __device__ __forceinline__ VCmpEq4(const VCmpEq4&) {}
};
struct VCmpNe4 : binary_function<uint, uint, uint>
{
@ -72,8 +72,8 @@ namespace arithm
return vcmpne4(a, b);
}
__device__ __forceinline__ VCmpNe4() {}
__device__ __forceinline__ VCmpNe4(const VCmpNe4& other) {}
__host__ __device__ __forceinline__ VCmpNe4() {}
__host__ __device__ __forceinline__ VCmpNe4(const VCmpNe4&) {}
};
struct VCmpLt4 : binary_function<uint, uint, uint>
{
@ -82,8 +82,8 @@ namespace arithm
return vcmplt4(a, b);
}
__device__ __forceinline__ VCmpLt4() {}
__device__ __forceinline__ VCmpLt4(const VCmpLt4& other) {}
__host__ __device__ __forceinline__ VCmpLt4() {}
__host__ __device__ __forceinline__ VCmpLt4(const VCmpLt4&) {}
};
struct VCmpLe4 : binary_function<uint, uint, uint>
{
@ -92,8 +92,8 @@ namespace arithm
return vcmple4(a, b);
}
__device__ __forceinline__ VCmpLe4() {}
__device__ __forceinline__ VCmpLe4(const VCmpLe4& other) {}
__host__ __device__ __forceinline__ VCmpLe4() {}
__host__ __device__ __forceinline__ VCmpLe4(const VCmpLe4&) {}
};
template <class Op, typename T>

@ -45,6 +45,7 @@
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/emulation.hpp"

@ -59,7 +59,7 @@ namespace arithm
{
S val;
explicit DivInv(S val_) : val(val_) {}
__host__ explicit DivInv(S val_) : val(val_) {}
__device__ __forceinline__ D operator ()(T a) const
{

@ -91,8 +91,8 @@ namespace arithm
return b != 0 ? saturate_cast<D>(a / b) : 0;
}
__device__ __forceinline__ Div() {}
__device__ __forceinline__ Div(const Div& other) {}
__host__ __device__ __forceinline__ Div() {}
__host__ __device__ __forceinline__ Div(const Div&) {}
};
template <typename T> struct Div<T, float> : binary_function<T, T, float>
{
@ -101,8 +101,8 @@ namespace arithm
return b != 0 ? static_cast<float>(a) / b : 0;
}
__device__ __forceinline__ Div() {}
__device__ __forceinline__ Div(const Div& other) {}
__host__ __device__ __forceinline__ Div() {}
__host__ __device__ __forceinline__ Div(const Div&) {}
};
template <typename T> struct Div<T, double> : binary_function<T, T, double>
{
@ -111,15 +111,15 @@ namespace arithm
return b != 0 ? static_cast<double>(a) / b : 0;
}
__device__ __forceinline__ Div() {}
__device__ __forceinline__ Div(const Div& other) {}
__host__ __device__ __forceinline__ Div() {}
__host__ __device__ __forceinline__ Div(const Div&) {}
};
template <typename T, typename S, typename D> struct DivScale : binary_function<T, T, D>
{
S scale;
explicit DivScale(S scale_) : scale(scale_) {}
__host__ explicit DivScale(S scale_) : scale(scale_) {}
__device__ __forceinline__ D operator ()(T a, T b) const
{

@ -59,7 +59,7 @@ namespace arithm
{
S val;
explicit DivScalar(S val_) : val(val_) {}
__host__ explicit DivScalar(S val_) : val(val_) {}
__device__ __forceinline__ D operator ()(T a) const
{

@ -94,8 +94,8 @@ namespace arithm
return saturate_cast<T>(x * x);
}
__device__ __forceinline__ Sqr() {}
__device__ __forceinline__ Sqr(const Sqr& other) {}
__host__ __device__ __forceinline__ Sqr() {}
__host__ __device__ __forceinline__ Sqr(const Sqr&) {}
};
}
@ -190,8 +190,8 @@ namespace arithm
return saturate_cast<T>(f(x));
}
__device__ __forceinline__ Exp() {}
__device__ __forceinline__ Exp(const Exp& other) {}
__host__ __device__ __forceinline__ Exp() {}
__host__ __device__ __forceinline__ Exp(const Exp&) {}
};
}
@ -228,7 +228,7 @@ namespace arithm
{
float power;
PowOp(double power_) : power(static_cast<float>(power_)) {}
__host__ explicit PowOp(double power_) : power(static_cast<float>(power_)) {}
__device__ __forceinline__ T operator()(T e) const
{
@ -239,7 +239,7 @@ namespace arithm
{
float power;
PowOp(double power_) : power(static_cast<float>(power_)) {}
__host__ explicit PowOp(double power_) : power(static_cast<float>(power_)) {}
__device__ __forceinline__ T operator()(T e) const
{
@ -255,7 +255,7 @@ namespace arithm
{
float power;
PowOp(double power_) : power(static_cast<float>(power_)) {}
__host__ explicit PowOp(double power_) : power(static_cast<float>(power_)) {}
__device__ __forceinline__ float operator()(float e) const
{
@ -266,7 +266,7 @@ namespace arithm
{
double power;
PowOp(double power_) : power(power_) {}
__host__ explicit PowOp(double power_) : power(power_) {}
__device__ __forceinline__ double operator()(double e) const
{

@ -45,6 +45,7 @@
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/limits.hpp"

@ -65,8 +65,8 @@ namespace arithm
return vmin4(a, b);
}
__device__ __forceinline__ VMin4() {}
__device__ __forceinline__ VMin4(const VMin4& other) {}
__host__ __device__ __forceinline__ VMin4() {}
__host__ __device__ __forceinline__ VMin4(const VMin4&) {}
};
struct VMin2 : binary_function<uint, uint, uint>
@ -76,8 +76,8 @@ namespace arithm
return vmin2(a, b);
}
__device__ __forceinline__ VMin2() {}
__device__ __forceinline__ VMin2(const VMin2& other) {}
__host__ __device__ __forceinline__ VMin2() {}
__host__ __device__ __forceinline__ VMin2(const VMin2&) {}
};
}
@ -151,8 +151,8 @@ namespace arithm
return vmax4(a, b);
}
__device__ __forceinline__ VMax4() {}
__device__ __forceinline__ VMax4(const VMax4& other) {}
__host__ __device__ __forceinline__ VMax4() {}
__host__ __device__ __forceinline__ VMax4(const VMax4&) {}
};
struct VMax2 : binary_function<uint, uint, uint>
@ -162,8 +162,8 @@ namespace arithm
return vmax2(a, b);
}
__device__ __forceinline__ VMax2() {}
__device__ __forceinline__ VMax2(const VMax2& other) {}
__host__ __device__ __forceinline__ VMax2() {}
__host__ __device__ __forceinline__ VMax2(const VMax2&) {}
};
}

@ -45,6 +45,7 @@
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/limits.hpp"

@ -69,8 +69,8 @@ namespace arithm
return res;
}
__device__ __forceinline__ Mul_8uc4_32f() {}
__device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f& other) {}
__host__ __device__ __forceinline__ Mul_8uc4_32f() {}
__host__ __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f&) {}
};
struct Mul_16sc4_32f : binary_function<short4, float, short4>
@ -81,8 +81,8 @@ namespace arithm
saturate_cast<short>(a.z * b), saturate_cast<short>(a.w * b));
}
__device__ __forceinline__ Mul_16sc4_32f() {}
__device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f& other) {}
__host__ __device__ __forceinline__ Mul_16sc4_32f() {}
__host__ __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f&) {}
};
template <typename T, typename D> struct Mul : binary_function<T, T, D>
@ -92,15 +92,15 @@ namespace arithm
return saturate_cast<D>(a * b);
}
__device__ __forceinline__ Mul() {}
__device__ __forceinline__ Mul(const Mul& other) {}
__host__ __device__ __forceinline__ Mul() {}
__host__ __device__ __forceinline__ Mul(const Mul&) {}
};
template <typename T, typename S, typename D> struct MulScale : binary_function<T, T, D>
{
S scale;
explicit MulScale(S scale_) : scale(scale_) {}
__host__ explicit MulScale(S scale_) : scale(scale_) {}
__device__ __forceinline__ D operator ()(T a, T b) const
{

@ -59,7 +59,7 @@ namespace arithm
{
S val;
explicit MulScalar(S val_) : val(val_) {}
__host__ explicit MulScalar(S val_) : val(val_) {}
__device__ __forceinline__ D operator ()(T a) const
{

@ -46,6 +46,7 @@
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/limits.hpp"
@ -76,8 +77,8 @@ namespace reduce
return r;
}
__device__ __forceinline__ Sum() {}
__device__ __forceinline__ Sum(const Sum&) {}
__host__ __device__ __forceinline__ Sum() {}
__host__ __device__ __forceinline__ Sum(const Sum&) {}
};
struct Avg
@ -100,8 +101,8 @@ namespace reduce
return r / sz;
}
__device__ __forceinline__ Avg() {}
__device__ __forceinline__ Avg(const Avg&) {}
__host__ __device__ __forceinline__ Avg() {}
__host__ __device__ __forceinline__ Avg(const Avg&) {}
};
struct Min
@ -125,8 +126,8 @@ namespace reduce
return r;
}
__device__ __forceinline__ Min() {}
__device__ __forceinline__ Min(const Min&) {}
__host__ __device__ __forceinline__ Min() {}
__host__ __device__ __forceinline__ Min(const Min&) {}
};
struct Max
@ -150,8 +151,8 @@ namespace reduce
return r;
}
__device__ __forceinline__ Max() {}
__device__ __forceinline__ Max(const Max&) {}
__host__ __device__ __forceinline__ Max() {}
__host__ __device__ __forceinline__ Max(const Max&) {}
};
///////////////////////////////////////////////////////////

@ -62,8 +62,8 @@ namespace arithm
return vsub4(a, b);
}
__device__ __forceinline__ VSub4() {}
__device__ __forceinline__ VSub4(const VSub4& other) {}
__host__ __device__ __forceinline__ VSub4() {}
__host__ __device__ __forceinline__ VSub4(const VSub4&) {}
};
struct VSub2 : binary_function<uint, uint, uint>
@ -73,8 +73,8 @@ namespace arithm
return vsub2(a, b);
}
__device__ __forceinline__ VSub2() {}
__device__ __forceinline__ VSub2(const VSub2& other) {}
__host__ __device__ __forceinline__ VSub2() {}
__host__ __device__ __forceinline__ VSub2(const VSub2&) {}
};
template <typename T, typename D> struct SubMat : binary_function<T, T, D>
@ -84,8 +84,8 @@ namespace arithm
return saturate_cast<D>(a - b);
}
__device__ __forceinline__ SubMat() {}
__device__ __forceinline__ SubMat(const SubMat& other) {}
__host__ __device__ __forceinline__ SubMat() {}
__host__ __device__ __forceinline__ SubMat(const SubMat&) {}
};
}

@ -59,7 +59,7 @@ namespace arithm
{
S val;
explicit SubScalar(S val_) : val(val_) {}
__host__ explicit SubScalar(S val_) : val(val_) {}
__device__ __forceinline__ D operator ()(T a) const
{

@ -45,6 +45,7 @@
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/utility.hpp"

@ -72,7 +72,7 @@ PERF_TEST_P(Sz_Type_KernelSz, Blur,
TEST_CYCLE() cv::gpu::blur(d_src, dst, cv::Size(ksize, ksize));
GPU_SANITY_CHECK(dst);
GPU_SANITY_CHECK(dst, 1);
}
else
{

@ -48,6 +48,7 @@
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/limits.hpp"
#include "opencv2/core/cuda/dynamic_smem.hpp"
@ -811,7 +812,7 @@ namespace cv { namespace gpu { namespace cudev
const int ind = ::atomicAdd(r_sizes + n, 1);
if (ind < maxSize)
r_table(n, ind) = p - templCenter;
r_table(n, ind) = saturate_cast<short2>(p - templCenter);
}
void buildRTable_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
@ -855,7 +856,7 @@ namespace cv { namespace gpu { namespace cudev
for (int j = 0; j < r_row_size; ++j)
{
short2 c = p - r_row[j];
int2 c = p - r_row[j];
c.x = __float2int_rn(c.x * idp);
c.y = __float2int_rn(c.y * idp);

@ -84,7 +84,7 @@ PERF_TEST_P(ImagePair, InterpolateFrames,
TEST_CYCLE() cv::gpu::interpolateFrames(d_frame0, d_frame1, d_fu, d_fv, d_bu, d_bv, 0.5f, newFrame, d_buf);
GPU_SANITY_CHECK(newFrame);
GPU_SANITY_CHECK(newFrame, 1e-4);
}
else
{
@ -123,7 +123,7 @@ PERF_TEST_P(ImagePair, CreateOpticalFlowNeedleMap,
TEST_CYCLE() cv::gpu::createOpticalFlowNeedleMap(u, v, vertex, colors);
GPU_SANITY_CHECK(vertex);
GPU_SANITY_CHECK(vertex, 1e-6);
GPU_SANITY_CHECK(colors);
}
else
@ -161,8 +161,8 @@ PERF_TEST_P(ImagePair, BroxOpticalFlow,
TEST_CYCLE() d_flow(d_frame0, d_frame1, u, v);
GPU_SANITY_CHECK(u);
GPU_SANITY_CHECK(v);
GPU_SANITY_CHECK(u, 1e-1);
GPU_SANITY_CHECK(v, 1e-1);
}
else
{

@ -103,8 +103,8 @@ GPU_TEST_P(BroxOpticalFlow, Regression)
for (int i = 0; i < v_gold.rows; ++i)
f.read(v_gold.ptr<char>(i), v_gold.cols * sizeof(float));
EXPECT_MAT_NEAR(u_gold, u, 0);
EXPECT_MAT_NEAR(v_gold, v, 0);
EXPECT_MAT_SIMILAR(u_gold, u, 1e-3);
EXPECT_MAT_SIMILAR(v_gold, v, 1e-3);
#else
std::ofstream f(fname.c_str(), std::ios_base::binary);

Loading…
Cancel
Save