Merge pull request #12945 from terfendail:core_wintr_full

7 years ago · cdf906b233
parent ead7bc883d 6ad8a9c09d
commit cdf906b233
1 changed files with 43 additions and 37 deletions
--- a/modules/core/src/arithm.cpp
+++ b/modules/core/src/arithm.cpp
@ -1379,7 +1379,7 @@ struct InRange_SIMD
    }
 };

-#if CV_SIMD128
+#if CV_SIMD

 template <>
 struct InRange_SIMD<uchar>
@ -1388,16 +1388,17 @@ struct InRange_SIMD<uchar>
        uchar * dst, int len) const
    {
        int x = 0;
-        const int width = v_uint8x16::nlanes;
+        const int width = v_uint8::nlanes;

        for (; x <= len - width; x += width)
        {
-            v_uint8x16 values = v_load(src1 + x);
-            v_uint8x16 low = v_load(src2 + x);
-            v_uint8x16 high = v_load(src3 + x);
+            v_uint8 values = vx_load(src1 + x);
+            v_uint8 low = vx_load(src2 + x);
+            v_uint8 high = vx_load(src3 + x);

            v_store(dst + x, (values >= low) & (high >= values));
        }
+        vx_cleanup();
        return x;
    }
 };
@ -1409,16 +1410,17 @@ struct InRange_SIMD<schar>
        uchar * dst, int len) const
    {
        int x = 0;
-        const int width = v_int8x16::nlanes;
+        const int width = v_int8::nlanes;

        for (; x <= len - width; x += width)
        {
-            v_int8x16 values = v_load(src1 + x);
-            v_int8x16 low = v_load(src2 + x);
-            v_int8x16 high = v_load(src3 + x);
+            v_int8 values = vx_load(src1 + x);
+            v_int8 low = vx_load(src2 + x);
+            v_int8 high = vx_load(src3 + x);

            v_store((schar*)(dst + x), (values >= low) & (high >= values));
        }
+        vx_cleanup();
        return x;
    }
 };
@ -1430,20 +1432,21 @@ struct InRange_SIMD<ushort>
        uchar * dst, int len) const
    {
        int x = 0;
-        const int width = v_uint16x8::nlanes * 2;
+        const int width = v_uint16::nlanes * 2;

        for (; x <= len - width; x += width)
        {
-            v_uint16x8 values1 = v_load(src1 + x);
-            v_uint16x8 low1 = v_load(src2 + x);
-            v_uint16x8 high1 = v_load(src3 + x);
+            v_uint16 values1 = vx_load(src1 + x);
+            v_uint16 low1 = vx_load(src2 + x);
+            v_uint16 high1 = vx_load(src3 + x);

-            v_uint16x8 values2 = v_load(src1 + x + v_uint16x8::nlanes);
-            v_uint16x8 low2 = v_load(src2 + x + v_uint16x8::nlanes);
-            v_uint16x8 high2 = v_load(src3 + x + v_uint16x8::nlanes);
+            v_uint16 values2 = vx_load(src1 + x + v_uint16::nlanes);
+            v_uint16 low2 = vx_load(src2 + x + v_uint16::nlanes);
+            v_uint16 high2 = vx_load(src3 + x + v_uint16::nlanes);

            v_store(dst + x, v_pack((values1 >= low1) & (high1 >= values1), (values2 >= low2) & (high2 >= values2)));
        }
+        vx_cleanup();
        return x;
    }
 };
@ -1455,20 +1458,21 @@ struct InRange_SIMD<short>
        uchar * dst, int len) const
    {
        int x = 0;
-        const int width = (int)v_int16x8::nlanes * 2;
+        const int width = (int)v_int16::nlanes * 2;

        for (; x <= len - width; x += width)
        {
-            v_int16x8 values1 = v_load(src1 + x);
-            v_int16x8 low1 = v_load(src2 + x);
-            v_int16x8 high1 = v_load(src3 + x);
+            v_int16 values1 = vx_load(src1 + x);
+            v_int16 low1 = vx_load(src2 + x);
+            v_int16 high1 = vx_load(src3 + x);

-            v_int16x8 values2 = v_load(src1 + x + v_int16x8::nlanes);
-            v_int16x8 low2 = v_load(src2 + x + v_int16x8::nlanes);
-            v_int16x8 high2 = v_load(src3 + x + v_int16x8::nlanes);
+            v_int16 values2 = vx_load(src1 + x + v_int16::nlanes);
+            v_int16 low2 = vx_load(src2 + x + v_int16::nlanes);
+            v_int16 high2 = vx_load(src3 + x + v_int16::nlanes);

            v_store((schar*)(dst + x), v_pack((values1 >= low1) & (high1 >= values1), (values2 >= low2) & (high2 >= values2)));
        }
+        vx_cleanup();
        return x;
    }
 };
@ -1480,20 +1484,21 @@ struct InRange_SIMD<int>
        uchar * dst, int len) const
    {
        int x = 0;
-        const int width = (int)v_int32x4::nlanes * 2;
+        const int width = (int)v_int32::nlanes * 2;

        for (; x <= len - width; x += width)
        {
-            v_int32x4 values1 = v_load(src1 + x);
-            v_int32x4 low1 = v_load(src2 + x);
-            v_int32x4 high1 = v_load(src3 + x);
+            v_int32 values1 = vx_load(src1 + x);
+            v_int32 low1 = vx_load(src2 + x);
+            v_int32 high1 = vx_load(src3 + x);

-            v_int32x4 values2 = v_load(src1 + x + v_int32x4::nlanes);
-            v_int32x4 low2 = v_load(src2 + x + v_int32x4::nlanes);
-            v_int32x4 high2 = v_load(src3 + x + v_int32x4::nlanes);
+            v_int32 values2 = vx_load(src1 + x + v_int32::nlanes);
+            v_int32 low2 = vx_load(src2 + x + v_int32::nlanes);
+            v_int32 high2 = vx_load(src3 + x + v_int32::nlanes);

            v_pack_store(dst + x, v_reinterpret_as_u16(v_pack((values1 >= low1) & (high1 >= values1), (values2 >= low2) & (high2 >= values2))));
        }
+        vx_cleanup();
        return x;
    }
 };
@ -1505,20 +1510,21 @@ struct InRange_SIMD<float>
        uchar * dst, int len) const
    {
        int x = 0;
-        const int width = (int)v_float32x4::nlanes * 2;
+        const int width = (int)v_float32::nlanes * 2;

        for (; x <= len - width; x += width)
        {
-            v_float32x4 values1 = v_load(src1 + x);
-            v_float32x4 low1 = v_load(src2 + x);
-            v_float32x4 high1 = v_load(src3 + x);
+            v_float32 values1 = vx_load(src1 + x);
+            v_float32 low1 = vx_load(src2 + x);
+            v_float32 high1 = vx_load(src3 + x);

-            v_float32x4 values2 = v_load(src1 + x + v_float32x4::nlanes);
-            v_float32x4 low2 = v_load(src2 + x + v_float32x4::nlanes);
-            v_float32x4 high2 = v_load(src3 + x + v_float32x4::nlanes);
+            v_float32 values2 = vx_load(src1 + x + v_float32::nlanes);
+            v_float32 low2 = vx_load(src2 + x + v_float32::nlanes);
+            v_float32 high2 = vx_load(src3 + x + v_float32::nlanes);

            v_pack_store(dst + x, v_pack(v_reinterpret_as_u32((values1 >= low1) & (high1 >= values1)), v_reinterpret_as_u32((values2 >= low2) & (high2 >= values2))));
        }
+        vx_cleanup();
        return x;
    }
 };