fixed several bugs in gpu arithm functions

refactored tests for them
pull/13383/head
Vladislav Vinogradov 13 years ago
parent f58c40bfab
commit 844bdea5ac
  1. 4
      modules/gpu/include/opencv2/gpu/gpu.hpp
  2. 4
      modules/gpu/src/cuda/element_operations.cu
  3. 26
      modules/gpu/src/element_operations.cpp
  4. 1804
      modules/gpu/test/test_arithm.cpp
  5. 31
      modules/gpu/test/utility.hpp

@ -638,11 +638,11 @@ CV_EXPORTS void bitwise_xor(const GpuMat& src1, const Scalar& sc, GpuMat& dst, S
//! pixel by pixel right shift of an image by a constant value //! pixel by pixel right shift of an image by a constant value
//! supports 1, 3 and 4 channels images with integers elements //! supports 1, 3 and 4 channels images with integers elements
CV_EXPORTS void rshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null()); CV_EXPORTS void rshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream = Stream::Null());
//! pixel by pixel left shift of an image by a constant value //! pixel by pixel left shift of an image by a constant value
//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth //! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
CV_EXPORTS void lshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null()); CV_EXPORTS void lshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream = Stream::Null());
//! computes per-element minimum of two arrays (dst = min(src1, src2)) //! computes per-element minimum of two arrays (dst = min(src1, src2))
CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null()); CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null());

@ -706,8 +706,8 @@ namespace cv { namespace gpu { namespace device
{ {
__device__ __forceinline__ short4 operator ()(short4 a, float b) const __device__ __forceinline__ short4 operator ()(short4 a, float b) const
{ {
return b != 0 ? make_short4(saturate_cast<short>(a.x / b), saturate_cast<uchar>(a.y / b), return b != 0 ? make_short4(saturate_cast<short>(a.x / b), saturate_cast<short>(a.y / b),
saturate_cast<short>(a.z / b), saturate_cast<uchar>(a.w / b)) saturate_cast<short>(a.z / b), saturate_cast<short>(a.w / b))
: make_short4(0,0,0,0); : make_short4(0,0,0,0);
} }
}; };

@ -71,8 +71,8 @@ void cv::gpu::bitwise_and(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&,
void cv::gpu::bitwise_and(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); } void cv::gpu::bitwise_and(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::bitwise_xor(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); } void cv::gpu::bitwise_xor(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::bitwise_xor(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); } void cv::gpu::bitwise_xor(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::rshift(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); } void cv::gpu::rshift(const GpuMat&, Scalar_<int>, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::lshift(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); } void cv::gpu::lshift(const GpuMat&, Scalar_<int>, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::min(const GpuMat&, double, GpuMat&, Stream&) { throw_nogpu(); } void cv::gpu::min(const GpuMat&, double, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
@ -462,15 +462,14 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons
{0/*subtract_gpu<double, unsigned char>*/, 0/*subtract_gpu<double, signed char>*/, 0/*subtract_gpu<double, unsigned short>*/, 0/*subtract_gpu<double, short>*/, 0/*subtract_gpu<double, int>*/, 0/*subtract_gpu<double, float>*/, subtract_gpu<double, double>} {0/*subtract_gpu<double, unsigned char>*/, 0/*subtract_gpu<double, signed char>*/, 0/*subtract_gpu<double, unsigned short>*/, 0/*subtract_gpu<double, short>*/, 0/*subtract_gpu<double, int>*/, 0/*subtract_gpu<double, float>*/, subtract_gpu<double, double>}
}; };
static const func_t npp_funcs[7] = static const func_t npp_funcs[6] =
{ {
NppArithm<CV_8U, nppiSub_8u_C1RSfs>::call, NppArithm<CV_8U, nppiSub_8u_C1RSfs>::call,
0, 0,
NppArithm<CV_16U, nppiSub_16u_C1RSfs>::call, NppArithm<CV_16U, nppiSub_16u_C1RSfs>::call,
NppArithm<CV_16S, nppiSub_16s_C1RSfs>::call, NppArithm<CV_16S, nppiSub_16s_C1RSfs>::call,
NppArithm<CV_32S, nppiSub_32s_C1RSfs>::call, NppArithm<CV_32S, nppiSub_32s_C1RSfs>::call,
NppArithm<CV_32F, nppiSub_32f_C1R>::call, NppArithm<CV_32F, nppiSub_32f_C1R>::call
subtract_gpu<double, double>
}; };
CV_Assert(src1.type() != CV_8S); CV_Assert(src1.type() != CV_8S);
@ -484,7 +483,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons
cudaStream_t stream = StreamAccessor::getStream(s); cudaStream_t stream = StreamAccessor::getStream(s);
if (mask.empty() && dst.type() == src1.type()) if (mask.empty() && dst.type() == src1.type() && src1.depth() <= CV_32F)
{ {
npp_funcs[src1.depth()](src2.reshape(1), src1.reshape(1), dst.reshape(1), PtrStepb(), stream); npp_funcs[src1.depth()](src2.reshape(1), src1.reshape(1), dst.reshape(1), PtrStepb(), stream);
return; return;
@ -734,15 +733,14 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
{0/*divide_gpu<double, unsigned char>*/, 0/*divide_gpu<double, signed char>*/, 0/*divide_gpu<double, unsigned short>*/, 0/*divide_gpu<double, short>*/, 0/*divide_gpu<double, int>*/, 0/*divide_gpu<double, float>*/, divide_gpu<double, double>} {0/*divide_gpu<double, unsigned char>*/, 0/*divide_gpu<double, signed char>*/, 0/*divide_gpu<double, unsigned short>*/, 0/*divide_gpu<double, short>*/, 0/*divide_gpu<double, int>*/, 0/*divide_gpu<double, float>*/, divide_gpu<double, double>}
}; };
static const func_t npp_funcs[7] = static const func_t npp_funcs[6] =
{ {
NppArithm<CV_8U, nppiDiv_8u_C1RSfs>::call, NppArithm<CV_8U, nppiDiv_8u_C1RSfs>::call,
0, 0,
NppArithm<CV_16U, nppiDiv_16u_C1RSfs>::call, NppArithm<CV_16U, nppiDiv_16u_C1RSfs>::call,
NppArithm<CV_16S, nppiDiv_16s_C1RSfs>::call, NppArithm<CV_16S, nppiDiv_16s_C1RSfs>::call,
NppArithm<CV_32S, nppiDiv_32s_C1RSfs>::call, NppArithm<CV_32S, nppiDiv_32s_C1RSfs>::call,
NppArithm<CV_32F, nppiDiv_32f_C1R>::call, NppArithm<CV_32F, nppiDiv_32f_C1R>::call
divide_gpu<double, double>
}; };
cudaStream_t stream = StreamAccessor::getStream(s); cudaStream_t stream = StreamAccessor::getStream(s);
@ -753,7 +751,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
dst.create(src1.size(), src1.type()); dst.create(src1.size(), src1.type());
multiply_gpu(static_cast<DevMem2D_<uchar4> >(src1), static_cast<DevMem2Df>(src2), static_cast<DevMem2D_<uchar4> >(dst), stream); divide_gpu(static_cast<DevMem2D_<uchar4> >(src1), static_cast<DevMem2Df>(src2), static_cast<DevMem2D_<uchar4> >(dst), stream);
} }
else if (src1.type() == CV_16SC4 && src2.type() == CV_32FC1) else if (src1.type() == CV_16SC4 && src2.type() == CV_32FC1)
{ {
@ -761,7 +759,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
dst.create(src1.size(), src1.type()); dst.create(src1.size(), src1.type());
multiply_gpu(static_cast<DevMem2D_<short4> >(src1), static_cast<DevMem2Df>(src2), static_cast<DevMem2D_<short4> >(dst), stream); divide_gpu(static_cast<DevMem2D_<short4> >(src1), static_cast<DevMem2Df>(src2), static_cast<DevMem2D_<short4> >(dst), stream);
} }
else else
{ {
@ -773,7 +771,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels())); dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels()));
if (scale == 1 && dst.type() == src1.type()) if (scale == 1 && dst.type() == src1.type() && src1.depth() <= CV_32F)
{ {
npp_funcs[src1.depth()](src2.reshape(1), src1.reshape(1), dst.reshape(1), 1, stream); npp_funcs[src1.depth()](src2.reshape(1), src1.reshape(1), dst.reshape(1), 1, stream);
return; return;
@ -1729,7 +1727,7 @@ namespace
}; };
} }
void cv::gpu::rshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) void cv::gpu::rshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream)
{ {
typedef void (*func_t)(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream); typedef void (*func_t)(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[5][4] = static const func_t funcs[5][4] =
@ -1749,7 +1747,7 @@ void cv::gpu::rshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& s
funcs[src.depth()][src.channels() - 1](src, sc, dst, StreamAccessor::getStream(stream)); funcs[src.depth()][src.channels() - 1](src, sc, dst, StreamAccessor::getStream(stream));
} }
void cv::gpu::lshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) void cv::gpu::lshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream)
{ {
typedef void (*func_t)(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream); typedef void (*func_t)(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[5][4] = static const func_t funcs[5][4] =

File diff suppressed because it is too large Load Diff

@ -162,10 +162,37 @@ CV_FLAGS(DftFlags, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX
#define ALL_DEVICES testing::ValuesIn(devices()) #define ALL_DEVICES testing::ValuesIn(devices())
#define DEVICES(feature) testing::ValuesIn(devices(feature)) #define DEVICES(feature) testing::ValuesIn(devices(feature))
#define DIFFERENT_SIZES testing::Values(cv::Size(128, 128), cv::Size(113, 113))
#define ALL_DEPTH testing::Values(MatDepth(CV_8U), MatDepth(CV_8S), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32S), MatDepth(CV_32F), MatDepth(CV_64F))
#define ALL_TYPES testing::ValuesIn(all_types()) #define ALL_TYPES testing::ValuesIn(all_types())
#define TYPES(depth_start, depth_end, cn_start, cn_end) testing::ValuesIn(types(depth_start, depth_end, cn_start, cn_end)) #define TYPES(depth_start, depth_end, cn_start, cn_end) testing::ValuesIn(types(depth_start, depth_end, cn_start, cn_end))
#define DIFFERENT_SIZES testing::Values(cv::Size(128, 128), cv::Size(113, 113)) #define DEPTH_PAIRS testing::Values(std::make_pair(MatDepth(CV_8U), MatDepth(CV_8U)), \
std::make_pair(MatDepth(CV_8U), MatDepth(CV_16U)), \
std::make_pair(MatDepth(CV_8U), MatDepth(CV_16S)), \
std::make_pair(MatDepth(CV_8U), MatDepth(CV_32S)), \
std::make_pair(MatDepth(CV_8U), MatDepth(CV_32F)), \
std::make_pair(MatDepth(CV_8U), MatDepth(CV_64F)), \
\
std::make_pair(MatDepth(CV_16U), MatDepth(CV_16U)), \
std::make_pair(MatDepth(CV_16U), MatDepth(CV_32S)), \
std::make_pair(MatDepth(CV_16U), MatDepth(CV_32F)), \
std::make_pair(MatDepth(CV_16U), MatDepth(CV_64F)), \
\
std::make_pair(MatDepth(CV_16S), MatDepth(CV_16S)), \
std::make_pair(MatDepth(CV_16S), MatDepth(CV_32S)), \
std::make_pair(MatDepth(CV_16S), MatDepth(CV_32F)), \
std::make_pair(MatDepth(CV_16S), MatDepth(CV_64F)), \
\
std::make_pair(MatDepth(CV_32S), MatDepth(CV_32S)), \
std::make_pair(MatDepth(CV_32S), MatDepth(CV_32F)), \
std::make_pair(MatDepth(CV_32S), MatDepth(CV_64F)), \
\
std::make_pair(MatDepth(CV_32F), MatDepth(CV_32F)), \
std::make_pair(MatDepth(CV_32F), MatDepth(CV_64F)), \
\
std::make_pair(MatDepth(CV_64F), MatDepth(CV_64F)))
#define WHOLE testing::Values(UseRoi(false)) #define WHOLE testing::Values(UseRoi(false))
#define SUBMAT testing::Values(UseRoi(true)) #define SUBMAT testing::Values(UseRoi(true))
@ -173,4 +200,6 @@ CV_FLAGS(DftFlags, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX
#define DIRECT_INVERSE testing::Values(Inverse(false), Inverse(true)) #define DIRECT_INVERSE testing::Values(Inverse(false), Inverse(true))
#define ALL_CMP_CODES testing::Values(CmpCode(cv::CMP_EQ), CmpCode(cv::CMP_NE), CmpCode(cv::CMP_GT), CmpCode(cv::CMP_GE), CmpCode(cv::CMP_LT), CmpCode(cv::CMP_LE))
#endif // __OPENCV_TEST_UTILITY_HPP__ #endif // __OPENCV_TEST_UTILITY_HPP__

Loading…
Cancel
Save