|
|
|
@ -71,8 +71,8 @@ void cv::gpu::bitwise_and(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, |
|
|
|
|
void cv::gpu::bitwise_and(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); } |
|
|
|
|
void cv::gpu::bitwise_xor(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); } |
|
|
|
|
void cv::gpu::bitwise_xor(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); } |
|
|
|
|
void cv::gpu::rshift(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); } |
|
|
|
|
void cv::gpu::lshift(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); } |
|
|
|
|
void cv::gpu::rshift(const GpuMat&, Scalar_<int>, GpuMat&, Stream&) { throw_nogpu(); } |
|
|
|
|
void cv::gpu::lshift(const GpuMat&, Scalar_<int>, GpuMat&, Stream&) { throw_nogpu(); } |
|
|
|
|
void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } |
|
|
|
|
void cv::gpu::min(const GpuMat&, double, GpuMat&, Stream&) { throw_nogpu(); } |
|
|
|
|
void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } |
|
|
|
@ -462,15 +462,14 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons |
|
|
|
|
{0/*subtract_gpu<double, unsigned char>*/, 0/*subtract_gpu<double, signed char>*/, 0/*subtract_gpu<double, unsigned short>*/, 0/*subtract_gpu<double, short>*/, 0/*subtract_gpu<double, int>*/, 0/*subtract_gpu<double, float>*/, subtract_gpu<double, double>} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
static const func_t npp_funcs[7] =
|
|
|
|
|
static const func_t npp_funcs[6] = |
|
|
|
|
{ |
|
|
|
|
NppArithm<CV_8U, nppiSub_8u_C1RSfs>::call, |
|
|
|
|
0, |
|
|
|
|
NppArithm<CV_16U, nppiSub_16u_C1RSfs>::call, |
|
|
|
|
NppArithm<CV_16S, nppiSub_16s_C1RSfs>::call, |
|
|
|
|
NppArithm<CV_32S, nppiSub_32s_C1RSfs>::call, |
|
|
|
|
NppArithm<CV_32F, nppiSub_32f_C1R>::call, |
|
|
|
|
subtract_gpu<double, double> |
|
|
|
|
NppArithm<CV_32F, nppiSub_32f_C1R>::call |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
CV_Assert(src1.type() != CV_8S); |
|
|
|
@ -484,7 +483,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons |
|
|
|
|
|
|
|
|
|
cudaStream_t stream = StreamAccessor::getStream(s); |
|
|
|
|
|
|
|
|
|
if (mask.empty() && dst.type() == src1.type()) |
|
|
|
|
if (mask.empty() && dst.type() == src1.type() && src1.depth() <= CV_32F) |
|
|
|
|
{ |
|
|
|
|
npp_funcs[src1.depth()](src2.reshape(1), src1.reshape(1), dst.reshape(1), PtrStepb(), stream); |
|
|
|
|
return; |
|
|
|
@ -734,15 +733,14 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double |
|
|
|
|
{0/*divide_gpu<double, unsigned char>*/, 0/*divide_gpu<double, signed char>*/, 0/*divide_gpu<double, unsigned short>*/, 0/*divide_gpu<double, short>*/, 0/*divide_gpu<double, int>*/, 0/*divide_gpu<double, float>*/, divide_gpu<double, double>} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
static const func_t npp_funcs[7] =
|
|
|
|
|
static const func_t npp_funcs[6] = |
|
|
|
|
{ |
|
|
|
|
NppArithm<CV_8U, nppiDiv_8u_C1RSfs>::call, |
|
|
|
|
0, |
|
|
|
|
NppArithm<CV_16U, nppiDiv_16u_C1RSfs>::call, |
|
|
|
|
NppArithm<CV_16S, nppiDiv_16s_C1RSfs>::call, |
|
|
|
|
NppArithm<CV_32S, nppiDiv_32s_C1RSfs>::call, |
|
|
|
|
NppArithm<CV_32F, nppiDiv_32f_C1R>::call, |
|
|
|
|
divide_gpu<double, double> |
|
|
|
|
NppArithm<CV_32F, nppiDiv_32f_C1R>::call |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
cudaStream_t stream = StreamAccessor::getStream(s); |
|
|
|
@ -753,7 +751,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double |
|
|
|
|
|
|
|
|
|
dst.create(src1.size(), src1.type()); |
|
|
|
|
|
|
|
|
|
multiply_gpu(static_cast<DevMem2D_<uchar4> >(src1), static_cast<DevMem2Df>(src2), static_cast<DevMem2D_<uchar4> >(dst), stream); |
|
|
|
|
divide_gpu(static_cast<DevMem2D_<uchar4> >(src1), static_cast<DevMem2Df>(src2), static_cast<DevMem2D_<uchar4> >(dst), stream); |
|
|
|
|
} |
|
|
|
|
else if (src1.type() == CV_16SC4 && src2.type() == CV_32FC1) |
|
|
|
|
{ |
|
|
|
@ -761,7 +759,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double |
|
|
|
|
|
|
|
|
|
dst.create(src1.size(), src1.type()); |
|
|
|
|
|
|
|
|
|
multiply_gpu(static_cast<DevMem2D_<short4> >(src1), static_cast<DevMem2Df>(src2), static_cast<DevMem2D_<short4> >(dst), stream); |
|
|
|
|
divide_gpu(static_cast<DevMem2D_<short4> >(src1), static_cast<DevMem2Df>(src2), static_cast<DevMem2D_<short4> >(dst), stream); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
@ -773,7 +771,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double |
|
|
|
|
|
|
|
|
|
dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels())); |
|
|
|
|
|
|
|
|
|
if (scale == 1 && dst.type() == src1.type()) |
|
|
|
|
if (scale == 1 && dst.type() == src1.type() && src1.depth() <= CV_32F) |
|
|
|
|
{ |
|
|
|
|
npp_funcs[src1.depth()](src2.reshape(1), src1.reshape(1), dst.reshape(1), 1, stream); |
|
|
|
|
return; |
|
|
|
@ -1729,7 +1727,7 @@ namespace |
|
|
|
|
}; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void cv::gpu::rshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) |
|
|
|
|
void cv::gpu::rshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream) |
|
|
|
|
{ |
|
|
|
|
typedef void (*func_t)(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream); |
|
|
|
|
static const func_t funcs[5][4] = |
|
|
|
@ -1749,7 +1747,7 @@ void cv::gpu::rshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& s |
|
|
|
|
funcs[src.depth()][src.channels() - 1](src, sc, dst, StreamAccessor::getStream(stream)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void cv::gpu::lshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) |
|
|
|
|
void cv::gpu::lshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream) |
|
|
|
|
{ |
|
|
|
|
typedef void (*func_t)(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream); |
|
|
|
|
static const func_t funcs[5][4] = |
|
|
|
|