diff --git a/modules/gpu/src/opencv2/gpu/device/filters.hpp b/modules/gpu/src/opencv2/gpu/device/filters.hpp index 9362fde390..537d6aff2f 100644 --- a/modules/gpu/src/opencv2/gpu/device/filters.hpp +++ b/modules/gpu/src/opencv2/gpu/device/filters.hpp @@ -59,7 +59,7 @@ namespace cv { namespace gpu { namespace device __device__ __forceinline__ elem_type operator ()(float y, float x) const { - return src(__float2int_rd(y), __float2int_rd(x)); + return src(__float2int_rn(y), __float2int_rn(x)); } const Ptr2D src; @@ -78,9 +78,6 @@ namespace cv { namespace gpu { namespace device work_type out = VecTraits::all(0); - x -= 0.5f; - y -= 0.5f; - const int x1 = __float2int_rd(x); const int y1 = __float2int_rd(y); const int x2 = x1 + 1; @@ -112,24 +109,47 @@ namespace cv { namespace gpu { namespace device explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_) : src(src_) {} - static __device__ __forceinline__ work_type cubicInterpolate(typename TypeTraits::ParameterType p0, typename TypeTraits::ParameterType p1, typename TypeTraits::ParameterType p2, typename TypeTraits::ParameterType p3, float x) + static __device__ __forceinline__ float bicubicCoeff(float x_) { - return p1 + 0.5f * x * (p2 - p0 + x * (2.0f * p0 - 5.0f * p1 + 4.0f * p2 - p3 + x * (3.0f * (p1 - p2) + p3 - p0))); + float x = fabsf(x_); + if (x <= 1.0f) + { + return x * x * (1.5f * x - 2.5f) + 1.0f; + } + else if (x < 2.0f) + { + return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f; + } + else + { + return 0.0f; + } } __device__ elem_type operator ()(float y, float x) const { - const int xi = __float2int_rn(x); - const int yi = __float2int_rn(y); + const float xmin = ::ceilf(x - 2.0f); + const float xmax = ::floorf(x + 2.0f); + + const float ymin = ::ceilf(y - 2.0f); + const float ymax = ::floorf(y + 2.0f); + + work_type sum = VecTraits::all(0); + float wsum = 0.0f; - work_type arr[4]; + for (float cy = ymin; cy <= ymax; cy += 1.0f) + { + for (float cx = xmin; cx <= xmax; cx += 1.0f) + { + const float w = bicubicCoeff(x - cx) * bicubicCoeff(y - cy); + sum = sum + w * src(__float2int_rd(cy), __float2int_rd(cx)); + wsum += w; + } + } - arr[0] = cubicInterpolate(saturate_cast(src(yi - 2, xi - 2)), saturate_cast(src(yi - 2, xi - 1)), saturate_cast(src(yi - 2, xi)), saturate_cast(src(yi - 2, xi + 1)), (x - xi + 2.0f) / 4.0f); - arr[1] = cubicInterpolate(saturate_cast(src(yi - 1, xi - 2)), saturate_cast(src(yi - 1, xi - 1)), saturate_cast(src(yi - 1, xi)), saturate_cast(src(yi - 1, xi + 1)), (x - xi + 2.0f) / 4.0f); - arr[2] = cubicInterpolate(saturate_cast(src(yi , xi - 2)), saturate_cast(src(yi , xi - 1)), saturate_cast(src(yi , xi)), saturate_cast(src(yi , xi + 1)), (x - xi + 2.0f) / 4.0f); - arr[3] = cubicInterpolate(saturate_cast(src(yi + 1, xi - 2)), saturate_cast(src(yi + 1, xi - 1)), saturate_cast(src(yi + 1, xi)), saturate_cast(src(yi + 1, xi + 1)), (x - xi + 2.0f) / 4.0f); + work_type res = (!wsum)? VecTraits::all(0) : sum / wsum; - return saturate_cast(cubicInterpolate(arr[0], arr[1], arr[2], arr[3], (y - yi + 2.0f) / 4.0f)); + return saturate_cast(res); } const Ptr2D src; diff --git a/modules/gpu/test/interpolation.hpp b/modules/gpu/test/interpolation.hpp index 995b91e19b..e38dc4c969 100644 --- a/modules/gpu/test/interpolation.hpp +++ b/modules/gpu/test/interpolation.hpp @@ -54,7 +54,7 @@ template struct NearestInterpolator { static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) { - return readVal(src, cvFloor(y), cvFloor(x), c, border_type, borderVal); + return readVal(src, cvRound(y), cvRound(x), c, border_type, borderVal); } }; @@ -62,9 +62,6 @@ template struct LinearInterpolator { static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) { - x -= 0.5f; - y -= 0.5f; - int x1 = cvFloor(x); int y1 = cvFloor(y); int x2 = x1 + 1; @@ -83,37 +80,47 @@ template struct LinearInterpolator template struct CubicInterpolator { - static float getValue(float p[4], float x) + static float bicubicCoeff(float x_) { - return static_cast(p[1] + 0.5 * x * (p[2] - p[0] + x*(2.0*p[0] - 5.0*p[1] + 4.0*p[2] - p[3] + x*(3.0*(p[1] - p[2]) + p[3] - p[0])))); + float x = fabsf(x_); + if (x <= 1.0f) + { + return x * x * (1.5f * x - 2.5f) + 1.0f; + } + else if (x < 2.0f) + { + return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f; + } + else + { + return 0.0f; + } } - static float getValue(float p[4][4], float x, float y) + static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) { - float arr[4]; + const float xmin = ceilf(x - 2.0f); + const float xmax = floorf(x + 2.0f); - arr[0] = getValue(p[0], x); - arr[1] = getValue(p[1], x); - arr[2] = getValue(p[2], x); - arr[3] = getValue(p[3], x); + const float ymin = ceilf(y - 2.0f); + const float ymax = floorf(y + 2.0f); - return getValue(arr, y); - } + float sum = 0.0f; + float wsum = 0.0f; - static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) - { - int ix = cvRound(x); - int iy = cvRound(y); - - float vals[4][4] = + for (float cy = ymin; cy <= ymax; cy += 1.0f) { - {(float)readVal(src, iy - 2, ix - 2, c, border_type, borderVal), (float)readVal(src, iy - 2, ix - 1, c, border_type, borderVal), (float)readVal(src, iy - 2, ix, c, border_type, borderVal), (float)readVal(src, iy - 2, ix + 1, c, border_type, borderVal)}, - {(float)readVal(src, iy - 1, ix - 2, c, border_type, borderVal), (float)readVal(src, iy - 1, ix - 1, c, border_type, borderVal), (float)readVal(src, iy - 1, ix, c, border_type, borderVal), (float)readVal(src, iy - 1, ix + 1, c, border_type, borderVal)}, - {(float)readVal(src, iy , ix - 2, c, border_type, borderVal), (float)readVal(src, iy , ix - 1, c, border_type, borderVal), (float)readVal(src, iy , ix, c, border_type, borderVal), (float)readVal(src, iy , ix + 1, c, border_type, borderVal)}, - {(float)readVal(src, iy + 1, ix - 2, c, border_type, borderVal), (float)readVal(src, iy + 1, ix - 1, c, border_type, borderVal), (float)readVal(src, iy + 1, ix, c, border_type, borderVal), (float)readVal(src, iy + 1, ix + 1, c, border_type, borderVal)}, - }; + for (float cx = xmin; cx <= xmax; cx += 1.0f) + { + const float w = bicubicCoeff(x - cx) * bicubicCoeff(y - cy); + sum += w * readVal(src, cvFloor(cy), cvFloor(cx), c, border_type, borderVal); + wsum += w; + } + } - return cv::saturate_cast(getValue(vals, static_cast((x - ix + 2.0) / 4.0), static_cast((y - iy + 2.0) / 4.0))); + float res = (!wsum)? 0 : sum / wsum; + + return cv::saturate_cast(res); } }; diff --git a/modules/gpu/test/test_remap.cpp b/modules/gpu/test/test_remap.cpp index 84fde5adba..c61a899142 100644 --- a/modules/gpu/test/test_remap.cpp +++ b/modules/gpu/test/test_remap.cpp @@ -163,7 +163,7 @@ TEST_P(Remap, Accuracy) cv::Mat dst_gold; remapGold(src, xmap, ymap, dst_gold, interpolation, borderType, val); - EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-4 : 1.0); + EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-3 : 1.0); } INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Remap, testing::Combine(