Merge pull request #6790 from terfendail:linearresize_accuracy

9 years ago · 9ed1474340
parent 88e3d07178 04c7d03188
commit 9ed1474340
2 changed files with 17 additions and 17 deletions
--- a/3rdparty/carotene/hal/tegra_hal.hpp
+++ b/3rdparty/carotene/hal/tegra_hal.hpp
@ -1433,7 +1433,7 @@ inline int TEGRA_MORPHFREE(cvhalFilter2D *context)

 #define TEGRA_RESIZE(src_type, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, dst_height, inv_scale_x, inv_scale_y, interpolation) \
 ( \
-    /*interpolation == CV_HAL_INTER_LINEAR ? \
+    interpolation == CV_HAL_INTER_LINEAR ? \
        CV_MAT_DEPTH(src_type) == CV_8U && CAROTENE_NS::isResizeLinearOpenCVSupported(CAROTENE_NS::Size2D(src_width, src_height), CAROTENE_NS::Size2D(dst_width, dst_height), ((src_type >> CV_CN_SHIFT) + 1)) && \
        inv_scale_x > 0 && inv_scale_y > 0 && \
        (dst_width - 0.5)/inv_scale_x - 0.5 < src_width && (dst_height - 0.5)/inv_scale_y - 0.5 < src_height && \
@ -1441,7 +1441,7 @@ inline int TEGRA_MORPHFREE(cvhalFilter2D *context)
        std::abs(dst_width / inv_scale_x - src_width) < 0.1 && std::abs(dst_height / inv_scale_y - src_height) < 0.1 ? \
            CAROTENE_NS::resizeLinearOpenCV(CAROTENE_NS::Size2D(src_width, src_height), CAROTENE_NS::Size2D(dst_width, dst_height), \
                                            src_data, src_step, dst_data, dst_step, 1.0/inv_scale_x, 1.0/inv_scale_y, ((src_type >> CV_CN_SHIFT) + 1)), \
-            CV_HAL_ERROR_OK : CV_HAL_ERROR_NOT_IMPLEMENTED :*/ \
+            CV_HAL_ERROR_OK : CV_HAL_ERROR_NOT_IMPLEMENTED : \
    interpolation == CV_HAL_INTER_AREA ? \
        CV_MAT_DEPTH(src_type) == CV_8U && CAROTENE_NS::isResizeAreaSupported(1.0/inv_scale_x, 1.0/inv_scale_y, ((src_type >> CV_CN_SHIFT) + 1)) && \
        std::abs(dst_width / inv_scale_x - src_width) < 0.1 && std::abs(dst_height / inv_scale_y - src_height) < 0.1 ? \
--- a/3rdparty/carotene/src/resize.cpp
+++ b/3rdparty/carotene/src/resize.cpp
@ -1681,15 +1681,15 @@ void downsample_bilinear_8uc1(const Size2D &ssize, const Size2D &dsize,
                vst1_u8(internal::getRowPtr(dstBase, dstStride, row) + col, vres);
 #else
                /* ugly version matching to OpenCV's SSE optimization */
-                int16x4_t v1Ls = vshrn_n_s32(v1L, 5);
-                int16x4_t v1Hs = vshrn_n_s32(v1H, 5);
-                int16x4_t v2Ls = vshrn_n_s32(v2L, 5);
-                int16x4_t v2Hs = vshrn_n_s32(v2H, 5);
+                int16x4_t v1Ls = vshrn_n_s32(v1L, 4);
+                int16x4_t v1Hs = vshrn_n_s32(v1H, 4);
+                int16x4_t v2Ls = vshrn_n_s32(v2L, 4);
+                int16x4_t v2Hs = vshrn_n_s32(v2H, 4);

                int16x8_t v1s = vqdmulhq_s16(vcombine_s16(v1Ls, v1Hs), vrw);
                int16x8_t v2s = vqdmulhq_s16(vcombine_s16(v2Ls, v2Hs), vrW);

-                int16x8_t vsum = vaddq_s16(v1s, v2s);
+                int16x8_t vsum = vaddq_s16(vshrq_n_s16(v1s,1), vshrq_n_s16(v2s,1));
                uint8x8_t vres = vqrshrun_n_s16(vsum, 2);

                vst1_u8(internal::getRowPtr(dstBase, dstStride, row) + col, vres);
@ -1736,15 +1736,15 @@ void downsample_bilinear_8uc1(const Size2D &ssize, const Size2D &dsize,
                vst1_u8(internal::getRowPtr(dstBase, dstStride, row) + col + 8, vres);
 #else
                /* ugly version matching to OpenCV's SSE optimization */
-                int16x4_t v1Ls = vshrn_n_s32(v1L, 5);
-                int16x4_t v1Hs = vshrn_n_s32(v1H, 5);
-                int16x4_t v2Ls = vshrn_n_s32(v2L, 5);
-                int16x4_t v2Hs = vshrn_n_s32(v2H, 5);
+                int16x4_t v1Ls = vshrn_n_s32(v1L, 4);
+                int16x4_t v1Hs = vshrn_n_s32(v1H, 4);
+                int16x4_t v2Ls = vshrn_n_s32(v2L, 4);
+                int16x4_t v2Hs = vshrn_n_s32(v2H, 4);

                int16x8_t v1s = vqdmulhq_s16(vcombine_s16(v1Ls, v1Hs), vrw);
                int16x8_t v2s = vqdmulhq_s16(vcombine_s16(v2Ls, v2Hs), vrW);

-                int16x8_t vsum = vaddq_s16(v1s, v2s);
+                int16x8_t vsum = vaddq_s16(vshrq_n_s16(v1s,1), vshrq_n_s16(v2s,1));
                uint8x8_t vres = vqrshrun_n_s16(vsum, 2);

                vst1_u8(internal::getRowPtr(dstBase, dstStride, row) + col + 8, vres);
@ -1836,15 +1836,15 @@ downsample_bilinear_8uc1_col_loop8:
            vst1_u8(internal::getRowPtr(dstBase, dstStride, row) + col, vres);
 #else
            /* ugly version matching to OpenCV's SSE optimization */
-            int16x4_t v1Ls = vshrn_n_s32(v1L, 5);
-            int16x4_t v1Hs = vshrn_n_s32(v1H, 5);
-            int16x4_t v2Ls = vshrn_n_s32(v2L, 5);
-            int16x4_t v2Hs = vshrn_n_s32(v2H, 5);
+            int16x4_t v1Ls = vshrn_n_s32(v1L, 4);
+            int16x4_t v1Hs = vshrn_n_s32(v1H, 4);
+            int16x4_t v2Ls = vshrn_n_s32(v2L, 4);
+            int16x4_t v2Hs = vshrn_n_s32(v2H, 4);

            int16x8_t v1s = vqdmulhq_s16(vcombine_s16(v1Ls, v1Hs), vrw);
            int16x8_t v2s = vqdmulhq_s16(vcombine_s16(v2Ls, v2Hs), vrW);

-            int16x8_t vsum = vaddq_s16(v1s, v2s);
+            int16x8_t vsum = vaddq_s16(vshrq_n_s16(v1s,1), vshrq_n_s16(v2s,1));
            uint8x8_t vres = vqrshrun_n_s16(vsum, 2);

            vst1_u8(internal::getRowPtr(dstBase, dstStride, row) + col, vres);