From 5e89b9a45558b9fbf751748d3db1a13b118cf7aa Mon Sep 17 00:00:00 2001 From: Aleksei Trutnev Date: Tue, 25 Jan 2022 18:13:30 +0300 Subject: [PATCH] Dynamic Dispatching for split3 --- .../gapi/src/backends/fluid/gfluidcore.cpp | 24 ++++++------------ .../fluid/gfluidcore_func.dispatch.cpp | 7 ++++++ .../src/backends/fluid/gfluidcore_func.hpp | 3 +++ .../backends/fluid/gfluidcore_func.simd.hpp | 25 +++++++++++++++++++ 4 files changed, 43 insertions(+), 16 deletions(-) diff --git a/modules/gapi/src/backends/fluid/gfluidcore.cpp b/modules/gapi/src/backends/fluid/gfluidcore.cpp index c33129a0f1..d701a2ac24 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore.cpp +++ b/modules/gapi/src/backends/fluid/gfluidcore.cpp @@ -2509,26 +2509,18 @@ GAPI_FLUID_KERNEL(GFluidSplit3, cv::gapi::core::GSplit3, false) static void run(const View &src, Buffer &dst1, Buffer &dst2, Buffer &dst3) { - const auto *in = src.InLine(0); - auto *out1 = dst1.OutLine(); - auto *out2 = dst2.OutLine(); - auto *out3 = dst3.OutLine(); + const auto *in = src.InLine(0); + auto *out1 = dst1.OutLine(); + auto *out2 = dst2.OutLine(); + auto *out3 = dst3.OutLine(); GAPI_Assert(3 == src.meta().chan); int width = src.length(); + int w = 0; - int w = 0; // cycle counter - - #if CV_SIMD128 - for (; w <= width-16; w+=16) - { - v_uint8x16 a, b, c; - v_load_deinterleave(&in[3*w], a, b, c); - v_store(&out1[w], a); - v_store(&out2[w], b); - v_store(&out3[w], c); - } - #endif +#if CV_SIMD + w = split3_simd(in, out1, out2, out3, width); +#endif for (; w < width; w++) { diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp index 348c00ed12..8ba99bae5e 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp @@ -207,6 +207,13 @@ ABSDIFFC_SIMD(float) #undef ABSDIFFC_SIMD +int split3_simd(const uchar in[], uchar out1[], uchar out2[], + uchar out3[], const int width) +{ + CV_CPU_DISPATCH(split3_simd, (in, out1, out2, out3, width), + CV_CPU_DISPATCH_MODES_ALL); +} + } // namespace fluid } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp index 6023a879d9..f0a16e8829 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp @@ -163,6 +163,9 @@ ABSDIFFC_SIMD(float) #undef ABSDIFFC_SIMD +int split3_simd(const uchar in[], uchar out1[], uchar out2[], + uchar out3[], const int width); + } // namespace fluid } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp index 38c47072f4..2f41aa46ea 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp @@ -184,6 +184,9 @@ ABSDIFFC_SIMD(float) #undef ABSDIFFC_SIMD +int split3_simd(const uchar in[], uchar out1[], uchar out2[], + uchar out3[], const int width); + #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY struct scale_tag {}; @@ -1568,6 +1571,28 @@ ABSDIFFC_SIMD(float) #undef ABSDIFFC_SIMD +//------------------------- +// +// Fluid kernels: Split3 +// +//------------------------- + +int split3_simd(const uchar in[], uchar out1[], uchar out2[], + uchar out3[], const int width) +{ + constexpr int nlanes = v_uint8::nlanes; + int x = 0; + for (; x <= width - nlanes; x += nlanes) + { + v_uint8 a, b, c; + v_load_deinterleave(&in[3 * x], a, b, c); + vx_store(&out1[x], a); + vx_store(&out2[x], b); + vx_store(&out3[x], c); + } + return x; +} + #endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY CV_CPU_OPTIMIZATION_NAMESPACE_END