Merge pull request #21530 from anna-khakimova:ak/simd_divrc

* GAPI Fluid: SIMD for DivRC kernel.

* Fluid: Div kernel's SIMD refactoring

* SIMD for DivRC 3 channel case

* Applied comments
pull/21674/head
Anna Khakimova 3 years ago committed by GitHub
parent ebb6915e58
commit 9c7adb7248
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 4
      modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp
  2. 4
      modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp
  3. 77
      modules/gapi/src/backends/fluid/gfluidcore.cpp
  4. 27
      modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp
  5. 23
      modules/gapi/src/backends/fluid/gfluidcore_func.hpp
  6. 853
      modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp

@ -528,6 +528,10 @@ PERF_TEST_P_(DivRCPerfTest, TestPerformance)
// FIXIT Unstable input data for divide // FIXIT Unstable input data for divide
initMatsRandU(type, sz, dtype, false); initMatsRandU(type, sz, dtype, false);
//This condition need as workaround the bug in the OpenCV.
//It reinitializes divider matrix without zero values for CV_16S DST type.
if (dtype == CV_16S || (type == CV_16S && dtype == -1))
cv::randu(in_mat1, cv::Scalar::all(1), cv::Scalar::all(255));
// OpenCV code /////////////////////////////////////////////////////////// // OpenCV code ///////////////////////////////////////////////////////////
cv::divide(sc, in_mat1, out_mat_ocv, scale, dtype); cv::divide(sc, in_mat1, out_mat_ocv, scale, dtype);

@ -101,8 +101,8 @@ INSTANTIATE_TEST_CASE_P(DivCPerfTestFluid, DivCPerfTest,
INSTANTIATE_TEST_CASE_P(DivRCPerfTestFluid, DivRCPerfTest, INSTANTIATE_TEST_CASE_P(DivRCPerfTestFluid, DivRCPerfTest,
Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()), Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()),
Values(szSmall128, szVGA, sz720p, sz1080p), Values(szSmall128, szVGA, sz720p, sz1080p),
Values(CV_8UC1, CV_8UC3, CV_32FC1), Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
Values(-1, CV_8U, CV_32F), Values(-1, CV_8U, CV_16U, CV_16S, CV_32F),
Values(1.0), Values(1.0),
Values(cv::compile_args(CORE_FLUID)))); Values(cv::compile_args(CORE_FLUID))));

@ -936,8 +936,8 @@ CV_ALWAYS_INLINE void run_arithm_s(Buffer &dst, const View &src, const float sca
} }
template<typename DST, typename SRC> template<typename DST, typename SRC>
static void run_arithm_rs(Buffer &dst, const View &src, const float scalar[4], Arithm arithm, CV_ALWAYS_INLINE void run_arithm_rs(Buffer &dst, const View &src, const float scalar[],
float scale=1) Arithm arithm, float scale=1)
{ {
const auto *in = src.InLine<SRC>(0); const auto *in = src.InLine<SRC>(0);
auto *out = dst.OutLine<DST>(); auto *out = dst.OutLine<DST>();
@ -955,15 +955,23 @@ static void run_arithm_rs(Buffer &dst, const View &src, const float scalar[4], A
w = subrc_simd(scalar, in, out, length, chan); w = subrc_simd(scalar, in, out, length, chan);
#endif #endif
for (; w < length; ++w) for (; w < length; ++w)
{
out[w] = subr<DST>(in[w], scalar[w % chan]); out[w] = subr<DST>(in[w], scalar[w % chan]);
}
break; break;
} }
// TODO: optimize division
case ARITHM_DIVIDE: case ARITHM_DIVIDE:
for (int w=0; w < width; w++) {
for (int c=0; c < chan; c++) int w = 0;
out[chan*w + c] = div<DST>(scalar[c], in[chan*w + c], scale); #if CV_SIMD
w = divrc_simd(scalar, in, out, length, chan, scale);
#endif
for (; w < length; ++w)
{
out[w] = div<DST>(scalar[w % chan], in[w], scale);
}
break; break;
}
default: CV_Error(cv::Error::StsBadArg, "unsupported arithmetic operation"); default: CV_Error(cv::Error::StsBadArg, "unsupported arithmetic operation");
} }
} }
@ -1319,7 +1327,9 @@ CV_ALWAYS_INLINE void run_divc(Buffer& dst, const View& src, Buffer& scratch,
#endif #endif
for (; w < length; ++w) for (; w < length; ++w)
{
out[w] = div<DST>(in[w], scalar[w % chan], scale); out[w] = div<DST>(in[w], scalar[w % chan], scale);
}
} }
GAPI_FLUID_KERNEL(GFluidDivC, cv::gapi::core::GDivC, true) GAPI_FLUID_KERNEL(GFluidDivC, cv::gapi::core::GDivC, true)
@ -1402,32 +1412,55 @@ GAPI_FLUID_KERNEL(GFluidDivC, cv::gapi::core::GDivC, true)
} }
}; };
GAPI_FLUID_KERNEL(GFluidDivRC, cv::gapi::core::GDivRC, false) GAPI_FLUID_KERNEL(GFluidDivRC, cv::gapi::core::GDivRC, true)
{ {
static const int Window = 1; static const int Window = 1;
static void run(const cv::Scalar &_scalar, const View &src, double _scale, int /*dtype*/, static void run(const cv::Scalar& _scalar, const View& src, double _scale, int /*dtype*/,
Buffer &dst) Buffer& dst, Buffer& scratch)
{ {
const float scalar[4] = { GAPI_Assert(src.meta().chan <= 4);
static_cast<float>(_scalar[0]),
static_cast<float>(_scalar[1]), if (dst.y() == 0)
static_cast<float>(_scalar[2]), {
static_cast<float>(_scalar[3]) const int chan = src.meta().chan;
}; float* _scratch = scratch.OutLine<float>();
scalar_to_scratch(_scalar, _scratch, scratch.length(), chan);
}
const float* scalar = scratch.OutLine<float>();
const float scale = static_cast<float>(_scale); const float scale = static_cast<float>(_scale);
// DST SRC OP __VA_ARGS__ // DST SRC OP __VA_ARGS__
UNARY_(uchar , uchar , run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); UNARY_(uchar, uchar, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(uchar , short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); UNARY_(uchar, ushort, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(uchar , float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); UNARY_(uchar, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_( short, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); UNARY_(uchar, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_( float, uchar , run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); UNARY_(ushort, ushort, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_( float, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); UNARY_(ushort, uchar, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_( float, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); UNARY_(ushort, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(ushort, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(short, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(short, uchar, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(short, ushort, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(short, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(float, uchar, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(float, ushort, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(float, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(float, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
CV_Error(cv::Error::StsBadArg, "unsupported combination of types"); CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
} }
static void initScratch(const GScalarDesc&, const GMatDesc&, double, int, Buffer& scratch)
{
initScratchBuffer(scratch);
}
static void resetScratch(Buffer& /*scratch*/)
{
}
}; };
//------------------- //-------------------

@ -235,6 +235,33 @@ ABSDIFFC_SIMD(float)
#undef ABSDIFFC_SIMD #undef ABSDIFFC_SIMD
#define DIVRC_SIMD(SRC, DST) \
int divrc_simd(const float scalar[], const SRC in[], DST out[], \
const int length, const int chan, const float scale) \
{ \
CV_CPU_DISPATCH(divrc_simd, (scalar, in, out, length, chan, scale), \
CV_CPU_DISPATCH_MODES_ALL); \
}
DIVRC_SIMD(uchar, uchar)
DIVRC_SIMD(ushort, uchar)
DIVRC_SIMD(short, uchar)
DIVRC_SIMD(float, uchar)
DIVRC_SIMD(short, short)
DIVRC_SIMD(ushort, short)
DIVRC_SIMD(uchar, short)
DIVRC_SIMD(float, short)
DIVRC_SIMD(ushort, ushort)
DIVRC_SIMD(uchar, ushort)
DIVRC_SIMD(short, ushort)
DIVRC_SIMD(float, ushort)
DIVRC_SIMD(uchar, float)
DIVRC_SIMD(ushort, float)
DIVRC_SIMD(short, float)
DIVRC_SIMD(float, float)
#undef DIVRC_SIMD
int split3_simd(const uchar in[], uchar out1[], uchar out2[], int split3_simd(const uchar in[], uchar out1[], uchar out2[],
uchar out3[], const int width) uchar out3[], const int width)
{ {

@ -187,6 +187,29 @@ ABSDIFFC_SIMD(float)
#undef ABSDIFFC_SIMD #undef ABSDIFFC_SIMD
#define DIVRC_SIMD(SRC, DST) \
int divrc_simd(const float scalar[], const SRC in[], DST out[], \
const int length, const int chan, const float scale);
DIVRC_SIMD(uchar, uchar)
DIVRC_SIMD(ushort, uchar)
DIVRC_SIMD(short, uchar)
DIVRC_SIMD(float, uchar)
DIVRC_SIMD(short, short)
DIVRC_SIMD(ushort, short)
DIVRC_SIMD(uchar, short)
DIVRC_SIMD(float, short)
DIVRC_SIMD(ushort, ushort)
DIVRC_SIMD(uchar, ushort)
DIVRC_SIMD(short, ushort)
DIVRC_SIMD(float, ushort)
DIVRC_SIMD(uchar, float)
DIVRC_SIMD(ushort, float)
DIVRC_SIMD(short, float)
DIVRC_SIMD(float, float)
#undef DIVRC_SIMD
int split3_simd(const uchar in[], uchar out1[], uchar out2[], int split3_simd(const uchar in[], uchar out1[], uchar out2[],
uchar out3[], const int width); uchar out3[], const int width);

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save