diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index a58d4b0b13..c3dfcd744b 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -348,48 +348,74 @@ namespace cv ////////////////////////////// Arithmetics /////////////////////////////////// //! adds one matrix to another (c = a + b) + //! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types CV_EXPORTS void add(const GpuMat& a, const GpuMat& b, GpuMat& c); + //! adds scalar to a matrix (c = a + s) + //! supports only CV_32FC1 type + CV_EXPORTS void add(const GpuMat& a, const Scalar& sc, GpuMat& c); //! subtracts one matrix from another (c = a - b) + //! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types CV_EXPORTS void subtract(const GpuMat& a, const GpuMat& b, GpuMat& c); + //! subtracts scalar from a matrix (c = a - s) + //! supports only CV_32FC1 type + CV_EXPORTS void subtract(const GpuMat& a, const Scalar& sc, GpuMat& c); //! computes element-wise product of the two arrays (c = a * b) + //! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types CV_EXPORTS void multiply(const GpuMat& a, const GpuMat& b, GpuMat& c); + //! multiplies matrix to a scalar (c = a * s) + //! supports only CV_32FC1 type + CV_EXPORTS void multiply(const GpuMat& a, const Scalar& sc, GpuMat& c); //! computes element-wise quotient of the two arrays (c = a / b) + //! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types CV_EXPORTS void divide(const GpuMat& a, const GpuMat& b, GpuMat& c); + //! computes element-wise quotient of matrix and scalar (c = a / s) + //! supports only CV_32FC1 type + CV_EXPORTS void divide(const GpuMat& a, const Scalar& sc, GpuMat& c); //! transposes the matrix + //! supports only CV_8UC1 type CV_EXPORTS void transpose(const GpuMat& src1, GpuMat& dst); //! computes element-wise absolute difference of two arrays (c = abs(a - b)) - CV_EXPORTS void absdiff(const GpuMat& a, const GpuMat& b, GpuMat& c); + //! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types + CV_EXPORTS void absdiff(const GpuMat& a, const GpuMat& b, GpuMat& c); + //! computes element-wise absolute difference of array and scalar (c = abs(a - s)) + //! supports only CV_32FC1 type + CV_EXPORTS void absdiff(const GpuMat& a, const Scalar& s, GpuMat& c); //! compares elements of two arrays (c = a b) - //! Now doesn't support CMP_NE. + //! supports CV_8UC4, CV_32FC1 types CV_EXPORTS void compare(const GpuMat& a, const GpuMat& b, GpuMat& c, int cmpop); //! computes mean value and standard deviation of all or selected array elements + //! supports only CV_8UC1 type CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev); //! computes norm of array - //! Supports NORM_INF, NORM_L1, NORM_L2 + //! supports NORM_INF, NORM_L1, NORM_L2 + //! supports only CV_8UC1 type CV_EXPORTS double norm(const GpuMat& src1, int normType=NORM_L2); //! computes norm of the difference between two arrays - //! Supports NORM_INF, NORM_L1, NORM_L2 + //! supports NORM_INF, NORM_L1, NORM_L2 + //! supports only CV_8UC1 type CV_EXPORTS double norm(const GpuMat& src1, const GpuMat& src2, int normType=NORM_L2); //! reverses the order of the rows, columns or both in a matrix + //! supports CV_8UC1, CV_8UC4 types CV_EXPORTS void flip(const GpuMat& a, GpuMat& b, int flipCode); //! computes sum of array elements + //! supports CV_8UC1, CV_8UC4 types CV_EXPORTS Scalar sum(const GpuMat& m); //! finds global minimum and maximum array elements and returns their values + //! supports only CV_8UC1 type CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal = 0); //! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i)) - //! supports only single channels source - //! destination array will have the same type as source - //! lut must hase CV_32S depth and the same number of channels as in the source array + //! destination array will have the depth type as lut and the same channels number as source + //! supports CV_8UC1, CV_8UC3 types CV_EXPORTS void LUT(const GpuMat& src, const Mat& lut, GpuMat& dst); //! makes multi-channel array out of several single-channel arrays @@ -416,10 +442,21 @@ namespace cv //! copies each plane of a multi-channel array to a dedicated array (async version) CV_EXPORTS void split(const GpuMat& src, vector& dst, const Stream& stream); + //! computes exponent of each matrix element (b = e**a) + //! supports only CV_32FC1 type + CV_EXPORTS void exp(const GpuMat& a, GpuMat& b); + + //! computes natural logarithm of absolute value of each matrix element: b = log(abs(a)) + //! supports only CV_32FC1 type + CV_EXPORTS void log(const GpuMat& a, GpuMat& b); + + //! computes magnitude (magnitude(i)) of each (x(i), y(i)) vector + CV_EXPORTS void magnitude(const GpuMat& x, const GpuMat& y, GpuMat& magnitude); + ////////////////////////////// Image processing ////////////////////////////// //! DST[x,y] = SRC[xmap[x,y],ymap[x,y]] with bilinear interpolation. - //! xymap.type() == xymap.type() == CV_32FC1 + //! supports CV_8UC1, CV_8UC3 source types and CV_32FC1 map type CV_EXPORTS void remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap); //! Does mean shift filtering on GPU. @@ -452,7 +489,8 @@ namespace cv CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh); //! resizes the image - //! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_LANCZOS4 + //! Supports INTER_NEAREST, INTER_LINEAR + //! supports CV_8UC1, CV_8UC4 types CV_EXPORTS void resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR); //! warps the image using affine transformation @@ -465,16 +503,20 @@ namespace cv //! rotate 8bit single or four channel image //! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC + //! supports CV_8UC1, CV_8UC4 types CV_EXPORTS void rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0, int interpolation = INTER_LINEAR); //! copies 2D array to a larger destination array and pads borders with user-specifiable constant + //! supports CV_8UC1, CV_8UC4, CV_32SC1 types CV_EXPORTS void copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, const Scalar& value = Scalar()); //! computes the integral image and integral for the squared image //! sum will have CV_32S type, sqsum - CV32F type + //! supports only CV_32FC1 source type CV_EXPORTS void integral(GpuMat& src, GpuMat& sum, GpuMat& sqsum); //! smooths the image using the normalized box filter + //! supports CV_8UC1, CV_8UC4 types and kernel size 3, 5, 7 CV_EXPORTS void boxFilter(const GpuMat& src, GpuMat& dst, Size ksize, Point anchor = Point(-1,-1)); //! a synonym for normalized box filter diff --git a/modules/gpu/src/arithm.cpp b/modules/gpu/src/arithm.cpp index bfaa6c7c39..7f999122de 100644 --- a/modules/gpu/src/arithm.cpp +++ b/modules/gpu/src/arithm.cpp @@ -49,11 +49,16 @@ using namespace std; #if !defined (HAVE_CUDA) void cv::gpu::add(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } +void cv::gpu::add(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); } void cv::gpu::subtract(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } +void cv::gpu::subtract(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); } void cv::gpu::multiply(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } +void cv::gpu::multiply(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); } void cv::gpu::divide(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } +void cv::gpu::divide(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); } void cv::gpu::transpose(const GpuMat&, GpuMat&) { throw_nogpu(); } void cv::gpu::absdiff(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } +void cv::gpu::absdiff(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); } void cv::gpu::compare(const GpuMat&, const GpuMat&, GpuMat&, int) { throw_nogpu(); } void cv::gpu::meanStdDev(const GpuMat&, Scalar&, Scalar&) { throw_nogpu(); } double cv::gpu::norm(const GpuMat&, int) { throw_nogpu(); return 0.0; } @@ -61,7 +66,10 @@ double cv::gpu::norm(const GpuMat&, const GpuMat&, int) { throw_nogpu(); return void cv::gpu::flip(const GpuMat&, GpuMat&, int) { throw_nogpu(); } Scalar cv::gpu::sum(const GpuMat&) { throw_nogpu(); return Scalar(); } void cv::gpu::minMax(const GpuMat&, double*, double*) { throw_nogpu(); } -void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst) { throw_nogpu(); } +void cv::gpu::LUT(const GpuMat&, const Mat&, GpuMat&) { throw_nogpu(); } +void cv::gpu::exp(const GpuMat&, GpuMat&) { throw_nogpu(); } +void cv::gpu::log(const GpuMat&, GpuMat&) { throw_nogpu(); } +void cv::gpu::magnitude(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } #else /* !defined (HAVE_CUDA) */ @@ -72,15 +80,18 @@ namespace { typedef NppStatus (*npp_arithm_8u_t)(const Npp8u* pSrc1, int nSrc1Step, const Npp8u* pSrc2, int nSrc2Step, Npp8u* pDst, int nDstStep, NppiSize oSizeROI, int nScaleFactor); + typedef NppStatus (*npp_arithm_32s_t)(const Npp32s* pSrc1, int nSrc1Step, const Npp32s* pSrc2, int nSrc2Step, Npp32s* pDst, + int nDstStep, NppiSize oSizeROI); typedef NppStatus (*npp_arithm_32f_t)(const Npp32f* pSrc1, int nSrc1Step, const Npp32f* pSrc2, int nSrc2Step, Npp32f* pDst, - int nDstStep, NppiSize oSizeROI); + int nDstStep, NppiSize oSizeROI); - void nppFuncCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, - npp_arithm_8u_t npp_func_8uc1, npp_arithm_8u_t npp_func_8uc4, npp_arithm_32f_t npp_func_32fc1) + void nppArithmCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, + npp_arithm_8u_t npp_func_8uc1, npp_arithm_8u_t npp_func_8uc4, + npp_arithm_32s_t npp_func_32sc1, npp_arithm_32f_t npp_func_32fc1) { CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type()); - CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32FC1); + CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32SC1 || src1.type() == CV_32FC1); dst.create( src1.size(), src1.type() ); @@ -100,6 +111,11 @@ namespace src2.ptr(), src2.step, dst.ptr(), dst.step, sz, 0) ); break; + case CV_32SC1: + nppSafeCall( npp_func_32sc1(src1.ptr(), src1.step, + src2.ptr(), src2.step, + dst.ptr(), dst.step, sz) ); + break; case CV_32FC1: nppSafeCall( npp_func_32fc1(src1.ptr(), src1.step, src2.ptr(), src2.step, @@ -109,26 +125,63 @@ namespace CV_Assert(!"Unsupported source type"); } } + + typedef NppStatus (*npp_arithm_scalar_32f_t)(const Npp32f *pSrc, int nSrcStep, Npp32f nValue, Npp32f *pDst, + int nDstStep, NppiSize oSizeROI); + + void nppArithmCaller(const GpuMat& src1, const Scalar& sc, GpuMat& dst, + npp_arithm_scalar_32f_t npp_func) + { + CV_Assert(src1.type() == CV_32FC1); + + dst.create(src1.size(), src1.type()); + + NppiSize sz; + sz.width = src1.cols; + sz.height = src1.rows; + + nppSafeCall( npp_func(src1.ptr(), src1.step, (Npp32f)sc[0], dst.ptr(), dst.step, sz) ); + } } void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) { - nppFuncCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, nppiAdd_32f_C1R); + nppArithmCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, nppiAdd_32s_C1R, nppiAdd_32f_C1R); } void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) { - nppFuncCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, nppiSub_32f_C1R); + nppArithmCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, nppiSub_32s_C1R, nppiSub_32f_C1R); } void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) { - nppFuncCaller(src1, src2, dst, nppiMul_8u_C1RSfs, nppiMul_8u_C4RSfs, nppiMul_32f_C1R); + nppArithmCaller(src1, src2, dst, nppiMul_8u_C1RSfs, nppiMul_8u_C4RSfs, nppiMul_32s_C1R, nppiMul_32f_C1R); } void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) { - nppFuncCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, nppiDiv_32f_C1R); + nppArithmCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, nppiDiv_32s_C1R, nppiDiv_32f_C1R); +} + +void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst) +{ + nppArithmCaller(src, sc, dst, nppiAddC_32f_C1R); +} + +void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst) +{ + nppArithmCaller(src, sc, dst, nppiSubC_32f_C1R); +} + +void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst) +{ + nppArithmCaller(src, sc, dst, nppiMulC_32f_C1R); +} + +void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst) +{ + nppArithmCaller(src, sc, dst, nppiDivC_32f_C1R); } //////////////////////////////////////////////////////////////////////// @@ -154,7 +207,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) { CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type()); - CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_32FC1); + CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32SC1 || src1.type() == CV_32FC1); dst.create( src1.size(), src1.type() ); @@ -162,20 +215,46 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) sz.width = src1.cols; sz.height = src1.rows; - if (src1.type() == CV_8UC1) + switch (src1.type()) { + case CV_8UC1: nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz) ); - } - else - { + break; + case CV_8UC4: + nppSafeCall( nppiAbsDiff_8u_C4R(src1.ptr(), src1.step, + src2.ptr(), src2.step, + dst.ptr(), dst.step, sz) ); + break; + case CV_32SC1: + nppSafeCall( nppiAbsDiff_32s_C1R(src1.ptr(), src1.step, + src2.ptr(), src2.step, + dst.ptr(), dst.step, sz) ); + break; + case CV_32FC1: nppSafeCall( nppiAbsDiff_32f_C1R(src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz) ); + break; + default: + CV_Assert(!"Unsupported source type"); } } +void cv::gpu::absdiff(const GpuMat& src, const Scalar& s, GpuMat& dst) +{ + CV_Assert(src.type() == CV_32FC1); + + dst.create( src.size(), src.type() ); + + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + + nppSafeCall( nppiAbsDiffC_32f_C1R(src.ptr(), src.step, dst.ptr(), dst.step, sz, (Npp32f)s[0]) ); +} + //////////////////////////////////////////////////////////////////////// // compare @@ -416,4 +495,57 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst) } } +//////////////////////////////////////////////////////////////////////// +// exp + +void cv::gpu::exp(const GpuMat& src, GpuMat& dst) +{ + CV_Assert(src.type() == CV_32FC1); + + dst.create(src.size(), src.type()); + + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + + nppSafeCall( nppiExp_32f_C1R(src.ptr(), src.step, dst.ptr(), dst.step, sz) ); +} + +//////////////////////////////////////////////////////////////////////// +// log + +void cv::gpu::log(const GpuMat& src, GpuMat& dst) +{ + CV_Assert(src.type() == CV_32FC1); + + dst.create(src.size(), src.type()); + + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + + nppSafeCall( nppiLn_32f_C1R(src.ptr(), src.step, dst.ptr(), dst.step, sz) ); +} + +//////////////////////////////////////////////////////////////////////// +// magnitude + +void cv::gpu::magnitude(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) +{ + CV_DbgAssert(src1.type() == src2.type() && src1.size() == src2.size()); + CV_Assert(src1.type() == CV_32FC1); + + GpuMat src(src1.size(), CV_32FC2); + GpuMat srcs[] = {src1, src2}; + cv::gpu::merge(srcs, 2, src); + + dst.create(src1.size(), src1.type()); + + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + + nppSafeCall( nppiMagnitude_32fc32f_C1R(src.ptr(), src.step, dst.ptr(), dst.step, sz) ); +} + #endif /* !defined (HAVE_CUDA) */ \ No newline at end of file diff --git a/modules/gpu/src/beliefpropagation_gpu.cpp b/modules/gpu/src/beliefpropagation_gpu.cpp index 31fe7c19a5..6bfc3aa439 100644 --- a/modules/gpu/src/beliefpropagation_gpu.cpp +++ b/modules/gpu/src/beliefpropagation_gpu.cpp @@ -89,7 +89,7 @@ void cv::gpu::StereoBeliefPropagation::estimateRecommendedParams(int width, int int mm = ::max(width, height); iters = mm / 100 + 2; - levels = (int)(log(static_cast(mm)) + 1) * 4 / 5; + levels = (int)(::log(static_cast(mm)) + 1) * 4 / 5; if (levels == 0) levels++; } diff --git a/modules/gpu/src/constantspacebp_gpu.cpp b/modules/gpu/src/constantspacebp_gpu.cpp index 30b5c5d7bd..a7012882c1 100644 --- a/modules/gpu/src/constantspacebp_gpu.cpp +++ b/modules/gpu/src/constantspacebp_gpu.cpp @@ -116,7 +116,7 @@ void cv::gpu::StereoConstantSpaceBP::estimateRecommendedParams(int width, int he int mm = ::max(width, height); iters = mm / 100 + ((mm > 1200)? - 4 : 4); - levels = (int)log(static_cast(mm)) * 2 / 3; + levels = (int)::log(static_cast(mm)) * 2 / 3; if (levels == 0) levels++; nr_plane = (int) ((float) ndisp / pow(2.0, levels + 1)); diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp index 85766e9712..750044af34 100644 --- a/modules/gpu/src/imgproc_gpu.cpp +++ b/modules/gpu/src/imgproc_gpu.cpp @@ -592,10 +592,10 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh) void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation) { - static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS}; + static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR/*, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS*/}; CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4); - CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4); + CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR/* || interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4*/); CV_Assert( src.size().area() > 0 ); CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) ); diff --git a/modules/gpu/src/matrix_operations.cpp b/modules/gpu/src/matrix_operations.cpp index 8026b540b7..13451d7189 100644 --- a/modules/gpu/src/matrix_operations.cpp +++ b/modules/gpu/src/matrix_operations.cpp @@ -151,15 +151,19 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be GpuMat& GpuMat::operator = (const Scalar& s) { - matrix_operations::set_to_without_mask( *this, depth(), s.val, channels()); + setTo(s); return *this; } GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask) { - //CV_Assert(mask.type() == CV_8U); + CV_Assert(mask.type() == CV_8UC1); CV_DbgAssert(!this->empty()); + + NppiSize sz; + sz.width = cols; + sz.height = rows; if (mask.empty()) { @@ -167,38 +171,74 @@ GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask) { case CV_8UC1: { - NppiSize sz; - sz.width = cols; - sz.height = rows; Npp8u nVal = (Npp8u)s[0]; - nppSafeCall( nppiSet_8u_C1R(nVal, (Npp8u*)ptr(), step, sz) ); + nppSafeCall( nppiSet_8u_C1R(nVal, ptr(), step, sz) ); break; } case CV_8UC4: { - NppiSize sz; - sz.width = cols; - sz.height = rows; - Npp8u nVal[] = {(Npp8u)s[0], (Npp8u)s[1], (Npp8u)s[2], (Npp8u)s[3]}; - nppSafeCall( nppiSet_8u_C4R(nVal, (Npp8u*)ptr(), step, sz) ); + Scalar_ nVal = s; + nppSafeCall( nppiSet_8u_C4R(nVal.val, ptr(), step, sz) ); + break; + } + case CV_16UC1: + { + Npp16u nVal = (Npp16u)s[0]; + nppSafeCall( nppiSet_16u_C1R(nVal, ptr(), step, sz) ); + break; + } + /*case CV_16UC2: + { + Scalar_ nVal = s; + nppSafeCall( nppiSet_16u_C2R(nVal.val, ptr(), step, sz) ); + break; + }*/ + case CV_16UC4: + { + Scalar_ nVal = s; + nppSafeCall( nppiSet_16u_C4R(nVal.val, ptr(), step, sz) ); + break; + } + case CV_16SC1: + { + Npp16s nVal = (Npp16s)s[0]; + nppSafeCall( nppiSet_16s_C1R(nVal, ptr(), step, sz) ); + break; + } + /*case CV_16SC2: + { + Scalar_ nVal = s; + nppSafeCall( nppiSet_16s_C2R(nVal.val, ptr(), step, sz) ); + break; + }*/ + case CV_16SC4: + { + Scalar_ nVal = s; + nppSafeCall( nppiSet_16s_C4R(nVal.val, ptr(), step, sz) ); break; } case CV_32SC1: { - NppiSize sz; - sz.width = cols; - sz.height = rows; Npp32s nVal = (Npp32s)s[0]; - nppSafeCall( nppiSet_32s_C1R(nVal, (Npp32s*)ptr(), step, sz) ); + nppSafeCall( nppiSet_32s_C1R(nVal, ptr(), step, sz) ); + break; + } + case CV_32SC4: + { + Scalar_ nVal = s; + nppSafeCall( nppiSet_32s_C4R(nVal.val, ptr(), step, sz) ); break; } case CV_32FC1: { - NppiSize sz; - sz.width = cols; - sz.height = rows; Npp32f nVal = (Npp32f)s[0]; - nppSafeCall( nppiSet_32f_C1R(nVal, (Npp32f*)ptr(), step, sz) ); + nppSafeCall( nppiSet_32f_C1R(nVal, ptr(), step, sz) ); + break; + } + case CV_32FC4: + { + Scalar_ nVal = s; + nppSafeCall( nppiSet_32f_C4R(nVal.val, ptr(), step, sz) ); break; } default: @@ -206,7 +246,73 @@ GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask) } } else - matrix_operations::set_to_with_mask( *this, depth(), s.val, mask, channels()); + { + switch (type()) + { + case CV_8UC1: + { + Npp8u nVal = (Npp8u)s[0]; + nppSafeCall( nppiSet_8u_C1MR(nVal, ptr(), step, sz, mask.ptr(), mask.step) ); + break; + } + case CV_8UC4: + { + Scalar_ nVal = s; + nppSafeCall( nppiSet_8u_C4MR(nVal.val, ptr(), step, sz, mask.ptr(), mask.step) ); + break; + } + case CV_16UC1: + { + Npp16u nVal = (Npp16u)s[0]; + nppSafeCall( nppiSet_16u_C1MR(nVal, ptr(), step, sz, mask.ptr(), mask.step) ); + break; + } + case CV_16UC4: + { + Scalar_ nVal = s; + nppSafeCall( nppiSet_16u_C4MR(nVal.val, ptr(), step, sz, mask.ptr(), mask.step) ); + break; + } + case CV_16SC1: + { + Npp16s nVal = (Npp16s)s[0]; + nppSafeCall( nppiSet_16s_C1MR(nVal, ptr(), step, sz, mask.ptr(), mask.step) ); + break; + } + case CV_16SC4: + { + Scalar_ nVal = s; + nppSafeCall( nppiSet_16s_C4MR(nVal.val, ptr(), step, sz, mask.ptr(), mask.step) ); + break; + } + case CV_32SC1: + { + Npp32s nVal = (Npp32s)s[0]; + nppSafeCall( nppiSet_32s_C1MR(nVal, ptr(), step, sz, mask.ptr(), mask.step) ); + break; + } + case CV_32SC4: + { + Scalar_ nVal = s; + nppSafeCall( nppiSet_32s_C4MR(nVal.val, ptr(), step, sz, mask.ptr(), mask.step) ); + break; + } + case CV_32FC1: + { + Npp32f nVal = (Npp32f)s[0]; + nppSafeCall( nppiSet_32f_C1MR(nVal, ptr(), step, sz, mask.ptr(), mask.step) ); + break; + } + case CV_32FC4: + { + Scalar_ nVal = s; + nppSafeCall( nppiSet_32f_C4MR(nVal.val, ptr(), step, sz, mask.ptr(), mask.step) ); + break; + } + default: + matrix_operations::set_to_with_mask( *this, depth(), s.val, mask, channels()); + } + } return *this; } diff --git a/tests/gpu/src/arithm.cpp b/tests/gpu/src/arithm.cpp index f4168436a3..e906288c70 100644 --- a/tests/gpu/src/arithm.cpp +++ b/tests/gpu/src/arithm.cpp @@ -74,8 +74,8 @@ int CV_GpuArithmTest::test(int type) cv::Size sz(200, 200); cv::Mat mat1(sz, type), mat2(sz, type); cv::RNG rng(*ts->get_rng()); - rng.fill(mat1, cv::RNG::UNIFORM, cv::Scalar::all(10), cv::Scalar::all(100)); - rng.fill(mat2, cv::RNG::UNIFORM, cv::Scalar::all(10), cv::Scalar::all(100)); + rng.fill(mat1, cv::RNG::UNIFORM, cv::Scalar::all(1), cv::Scalar::all(20)); + rng.fill(mat2, cv::RNG::UNIFORM, cv::Scalar::all(1), cv::Scalar::all(20)); return test(mat1, mat2); } @@ -114,8 +114,8 @@ void CV_GpuArithmTest::run( int ) int testResult = CvTS::OK; try { - const int types[] = {CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1}; - const char* type_names[] = {"CV_8UC1", "CV_8UC3", "CV_8UC4", "CV_32FC1"}; + const int types[] = {CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32FC1}; + const char* type_names[] = {"CV_8UC1", "CV_8UC3", "CV_8UC4", "CV_32SC1", "CV_32FC1"}; const int type_count = sizeof(types)/sizeof(types[0]); //run tests @@ -151,7 +151,7 @@ struct CV_GpuNppImageAddTest : public CV_GpuArithmTest virtual int test(const Mat& mat1, const Mat& mat2) { - if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32FC1) + if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32SC1 && mat1.type() != CV_32FC1) { ts->printf(CvTS::LOG, "\nUnsupported type\n"); return CvTS::OK; @@ -177,7 +177,7 @@ struct CV_GpuNppImageSubtractTest : public CV_GpuArithmTest int test( const Mat& mat1, const Mat& mat2 ) { - if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32FC1) + if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32SC1 && mat1.type() != CV_32FC1) { ts->printf(CvTS::LOG, "\nUnsupported type\n"); return CvTS::OK; @@ -203,7 +203,7 @@ struct CV_GpuNppImageMultiplyTest : public CV_GpuArithmTest int test( const Mat& mat1, const Mat& mat2 ) { - if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32FC1) + if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32SC1 && mat1.type() != CV_32FC1) { ts->printf(CvTS::LOG, "\nUnsupported type\n"); return CvTS::OK; @@ -229,7 +229,7 @@ struct CV_GpuNppImageDivideTest : public CV_GpuArithmTest int test( const Mat& mat1, const Mat& mat2 ) { - if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32FC1) + if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32SC1 && mat1.type() != CV_32FC1) { ts->printf(CvTS::LOG, "\nUnsupported type\n"); return CvTS::OK; @@ -280,7 +280,7 @@ struct CV_GpuNppImageAbsdiffTest : public CV_GpuArithmTest int test( const Mat& mat1, const Mat& mat2 ) { - if (mat1.type() != CV_8UC1 && mat1.type() != CV_32FC1) + if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32SC1 && mat1.type() != CV_32FC1) { ts->printf(CvTS::LOG, "\nUnsupported type\n"); return CvTS::OK; @@ -532,6 +532,82 @@ struct CV_GpuNppImageLUTTest : public CV_GpuArithmTest } }; +//////////////////////////////////////////////////////////////////////////////// +// exp +struct CV_GpuNppImageExpTest : public CV_GpuArithmTest +{ + CV_GpuNppImageExpTest() : CV_GpuArithmTest( "GPU-NppImageExp", "exp" ) {} + + int test( const Mat& mat1, const Mat& ) + { + if (mat1.type() != CV_32FC1) + { + ts->printf(CvTS::LOG, "\nUnsupported type\n"); + return CvTS::OK; + } + + cv::Mat cpuRes; + cv::exp(mat1, cpuRes); + + GpuMat gpu1(mat1); + GpuMat gpuRes; + cv::gpu::exp(gpu1, gpuRes); + + return CheckNorm(cpuRes, gpuRes); + } +}; + +//////////////////////////////////////////////////////////////////////////////// +// log +struct CV_GpuNppImageLogTest : public CV_GpuArithmTest +{ + CV_GpuNppImageLogTest() : CV_GpuArithmTest( "GPU-NppImageLog", "log" ) {} + + int test( const Mat& mat1, const Mat& ) + { + if (mat1.type() != CV_32FC1) + { + ts->printf(CvTS::LOG, "\nUnsupported type\n"); + return CvTS::OK; + } + + cv::Mat cpuRes; + cv::log(mat1, cpuRes); + + GpuMat gpu1(mat1); + GpuMat gpuRes; + cv::gpu::log(gpu1, gpuRes); + + return CheckNorm(cpuRes, gpuRes); + } +}; + +//////////////////////////////////////////////////////////////////////////////// +// magnitude +struct CV_GpuNppImageMagnitudeTest : public CV_GpuArithmTest +{ + CV_GpuNppImageMagnitudeTest() : CV_GpuArithmTest( "GPU-NppImageMagnitude", "magnitude" ) {} + + int test( const Mat& mat1, const Mat& mat2 ) + { + if (mat1.type() != CV_32FC1) + { + ts->printf(CvTS::LOG, "\nUnsupported type\n"); + return CvTS::OK; + } + + cv::Mat cpuRes; + cv::magnitude(mat1, mat2, cpuRes); + + GpuMat gpu1(mat1); + GpuMat gpu2(mat2); + GpuMat gpuRes; + cv::gpu::magnitude(gpu1, gpu2, gpuRes); + + return CheckNorm(cpuRes, gpuRes); + } +}; + ///////////////////////////////////////////////////////////////////////////// /////////////////// tests registration ///////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// @@ -552,4 +628,7 @@ CV_GpuNppImageNormTest CV_GpuNppImageNorm_test; CV_GpuNppImageFlipTest CV_GpuNppImageFlip_test; CV_GpuNppImageSumTest CV_GpuNppImageSum_test; CV_GpuNppImageMinNaxTest CV_GpuNppImageMinNax_test; -CV_GpuNppImageLUTTest CV_GpuNppImageLUT_test; \ No newline at end of file +CV_GpuNppImageLUTTest CV_GpuNppImageLUT_test; +CV_GpuNppImageExpTest CV_GpuNppImageExp_test; +CV_GpuNppImageLogTest CV_GpuNppImageLog_test; +CV_GpuNppImageMagnitudeTest CV_GpuNppImageMagnitude_test; diff --git a/tests/gpu/src/gputest_main.cpp b/tests/gpu/src/gputest_main.cpp index 143d380fcc..d4b9b3f5eb 100644 --- a/tests/gpu/src/gputest_main.cpp +++ b/tests/gpu/src/gputest_main.cpp @@ -45,19 +45,22 @@ CvTS test_system; const char* blacklist[] = { - "GPU-NppImageSum", - "GPU-MatOperatorAsyncCall", - //"GPU-NppErode", - //"GPU-NppDilate", - //"GPU-NppMorphologyEx", - //"GPU-NppImageDivide", - //"GPU-NppImageMeanStdDev", - //"GPU-NppImageMinNax", - //"GPU-NppImageResize", - //"GPU-NppImageWarpAffine", - //"GPU-NppImageWarpPerspective", - //"GPU-NppImageIntegral", - //"GPU-NppImageBlur", + "GPU-NppImageSum", // crash + "GPU-MatOperatorAsyncCall", // crash + //"GPU-NppErode", // npp func returns error code (CUDA_KERNEL_LAUNCH_ERROR or TEXTURE_BIND_ERROR) + //"GPU-NppDilate", // npp func returns error code (CUDA_KERNEL_LAUNCH_ERROR or TEXTURE_BIND_ERROR) + //"GPU-NppMorphologyEx", // npp func returns error code (CUDA_KERNEL_LAUNCH_ERROR or TEXTURE_BIND_ERROR) + //"GPU-NppImageDivide", // different round mode + //"GPU-NppImageMeanStdDev", // different precision + //"GPU-NppImageMinNax", // npp bug + //"GPU-NppImageResize", // different precision in interpolation + //"GPU-NppImageWarpAffine", // different precision in interpolation + //"GPU-NppImageWarpPerspective", // different precision in interpolation + //"GPU-NppImageIntegral", // different precision + //"GPU-NppImageBlur", // different precision + //"GPU-NppImageExp", // different precision + //"GPU-NppImageLog", // different precision + //"GPU-NppImageMagnitude", // different precision 0 }; diff --git a/tests/gpu/src/operator_convert_to.cpp b/tests/gpu/src/operator_convert_to.cpp index 5b8111172a..7cdf66dd7d 100644 --- a/tests/gpu/src/operator_convert_to.cpp +++ b/tests/gpu/src/operator_convert_to.cpp @@ -68,7 +68,6 @@ void CV_GpuMatOpConvertToTest::run(int /* start_from */) const int types[] = {CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F}; const int types_num = sizeof(types) / sizeof(int); - const char* types_str[] = {"CV_8U", "CV_8S", "CV_16U", "CV_16S", "CV_32S", "CV_32F", "CV_64F"}; bool passed = true; @@ -78,17 +77,16 @@ void CV_GpuMatOpConvertToTest::run(int /* start_from */) { for (int j = 0; j < types_num && passed; ++j) { - for (int c = 1; c < 2 && passed; ++c) + for (int c = 1; c < 5 && passed; ++c) { const int src_type = CV_MAKETYPE(types[i], c); const int dst_type = types[j]; - const double alpha = (double)rand() / RAND_MAX * 2.0; - const double beta = (double)rand() / RAND_MAX * 150.0 - 75; cv::RNG rng(*ts->get_rng()); + const double alpha = rng.uniform(0.0, 2.0); + const double beta = rng.uniform(-75.0, 75.0); Mat cpumatsrc(img_size, src_type); - rng.fill(cpumatsrc, RNG::UNIFORM, Scalar::all(0), Scalar::all(300)); GpuMat gpumatsrc(cpumatsrc); diff --git a/tests/gpu/src/operator_set_to.cpp b/tests/gpu/src/operator_set_to.cpp index e5d28ca6dc..97a281b660 100644 --- a/tests/gpu/src/operator_set_to.cpp +++ b/tests/gpu/src/operator_set_to.cpp @@ -40,15 +40,7 @@ //M*/ #include "gputest.hpp" -#include "highgui.h" - -#include -#include -#include -#include #include -#include -#include // for cout << setw() using namespace cv; using namespace std; @@ -62,9 +54,8 @@ public: protected: void run(int); - void print_mat(cv::Mat & mat, std::string name = "cpu mat"); - void print_mat(gpu::GpuMat & mat, std::string name = "gpu mat"); - bool compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat); + + bool testSetTo(cv::Mat& cpumat, gpu::GpuMat& gpumat, const cv::Mat& cpumask = cv::Mat(), const cv::gpu::GpuMat& gpumask = cv::gpu::GpuMat()); private: int rows; @@ -74,51 +65,23 @@ private: CV_GpuMatOpSetToTest::CV_GpuMatOpSetToTest(): CvTest( "GPU-MatOperatorSetTo", "setTo" ) { - rows = 256; - cols = 124; + rows = 35; + cols = 67; s.val[0] = 127.0; s.val[1] = 127.0; s.val[2] = 127.0; s.val[3] = 127.0; - - //#define PRINT_MATRIX } - -void CV_GpuMatOpSetToTest::print_mat(cv::Mat & mat, std::string name ) +bool CV_GpuMatOpSetToTest::testSetTo(cv::Mat& cpumat, gpu::GpuMat& gpumat, const cv::Mat& cpumask, const cv::gpu::GpuMat& gpumask) { - cv::imshow(name, mat); -} + cpumat.setTo(s, cpumask); + gpumat.setTo(s, gpumask); -void CV_GpuMatOpSetToTest::print_mat(gpu::GpuMat & mat, std::string name) -{ - cv::Mat newmat; - mat.download(newmat); - print_mat(newmat, name); -} + double ret = norm(cpumat, gpumat, NORM_INF); -bool CV_GpuMatOpSetToTest::compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat) -{ - //int64 time = getTickCount(); - cpumat.setTo(s); - //int64 time1 = getTickCount(); - gpumat.setTo(s); - //int64 time2 = getTickCount(); - - //std::cout << "\ntime cpu: " << std::fixed << std::setprecision(12) << double((time1 - time) / (double)getTickFrequency()); - //std::cout << "\ntime gpu: " << std::fixed << std::setprecision(12) << double((time2 - time1) / (double)getTickFrequency()); - //std::cout << "\n"; - -#ifdef PRINT_MATRIX - print_mat(cpumat); - print_mat(gpumat); - cv::waitKey(0); -#endif - - double ret = norm(cpumat, gpumat); - - if (ret < 1.0) + if (ret < std::numeric_limits::epsilon()) return true; else { @@ -133,11 +96,20 @@ void CV_GpuMatOpSetToTest::run( int /* start_from */) try { + cv::Mat cpumask(rows, cols, CV_8UC1); + cv::RNG rng(*ts->get_rng()); + rng.fill(cpumask, RNG::UNIFORM, cv::Scalar::all(0.0), cv::Scalar(1.5)); + cv::gpu::GpuMat gpumask(cpumask); + for (int i = 0; i < 7; i++) { - Mat cpumat(rows, cols, i, Scalar::all(0)); - GpuMat gpumat(cpumat); - is_test_good &= compare_matrix(cpumat, gpumat); + for (int cn = 1; cn <= 4; ++cn) + { + int mat_type = CV_MAKETYPE(i, cn); + Mat cpumat(rows, cols, mat_type, Scalar::all(0)); + GpuMat gpumat(cpumat); + is_test_good &= testSetTo(cpumat, gpumat, cpumask, gpumask); + } } } catch(const cv::Exception& e)