Merge pull request #13879 from chacha21:REDUCE_SUM2

add REDUCE_SUM2 #13879 

proposal to add REDUCE_SUM2 to cv::reduce, an operation that sums up the square of elements
pull/23563/head
Pierre Chatelier 2 years ago committed by GitHub
parent 23b819efb8
commit 6dd8a9b6ad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 5
      modules/core/include/opencv2/core.hpp
  2. 5
      modules/core/perf/opencl/perf_arithm.cpp
  3. 10
      modules/core/perf/perf_reduce.cpp
  4. 237
      modules/core/src/matrix_operations.cpp
  5. 3
      modules/core/src/opencl/reduce2.cl
  6. 24
      modules/core/src/precomp.hpp
  7. 16
      modules/core/test/ocl/test_arithm.cpp
  8. 44
      modules/core/test/test_mat.cpp
  9. 2
      modules/core/test/test_math.cpp

@ -230,7 +230,8 @@ enum KmeansFlags {
enum ReduceTypes { REDUCE_SUM = 0, //!< the output is the sum of all rows/columns of the matrix.
REDUCE_AVG = 1, //!< the output is the mean vector of all rows/columns of the matrix.
REDUCE_MAX = 2, //!< the output is the maximum (column/row-wise) of all rows/columns of the matrix.
REDUCE_MIN = 3 //!< the output is the minimum (column/row-wise) of all rows/columns of the matrix.
REDUCE_MIN = 3, //!< the output is the minimum (column/row-wise) of all rows/columns of the matrix.
REDUCE_SUM2 = 4 //!< the output is the sum of all squared rows/columns of the matrix.
};
//! @} core_array
@ -903,7 +904,7 @@ The function #reduce reduces the matrix to a vector by treating the matrix rows/
1D vectors and performing the specified operation on the vectors until a single row/column is
obtained. For example, the function can be used to compute horizontal and vertical projections of a
raster image. In case of #REDUCE_MAX and #REDUCE_MIN , the output image should have the same type as the source one.
In case of #REDUCE_SUM and #REDUCE_AVG , the output may have a larger element bit-depth to preserve accuracy.
In case of #REDUCE_SUM, #REDUCE_SUM2 and #REDUCE_AVG , the output may have a larger element bit-depth to preserve accuracy.
And multi-channel arrays are also supported in these two reduction modes.
The following code demonstrates its usage for a single channel matrix.

@ -1150,7 +1150,7 @@ OCL_PERF_TEST_P(ReduceMinMaxFixture, Reduce,
SANITY_CHECK(dst, eps);
}
CV_ENUM(ReduceAccOp, CV_REDUCE_SUM, CV_REDUCE_AVG)
CV_ENUM(ReduceAccOp, REDUCE_SUM, REDUCE_AVG, REDUCE_SUM2)
typedef tuple<Size, std::pair<MatType, MatType>, int, ReduceAccOp> ReduceAccParams;
typedef TestBaseWithParam<ReduceAccParams> ReduceAccFixture;
@ -1168,7 +1168,6 @@ OCL_PERF_TEST_P(ReduceAccFixture, Reduce,
dim = get<2>(params), op = get<3>(params);
const Size srcSize = get<0>(params),
dstSize(dim == 0 ? srcSize.width : 1, dim == 0 ? 1 : srcSize.height);
const double eps = CV_MAT_DEPTH(dtype) <= CV_32S ? 1 : 3e-4;
checkDeviceMaxMemoryAllocSize(srcSize, stype);
checkDeviceMaxMemoryAllocSize(srcSize, dtype);
@ -1178,7 +1177,7 @@ OCL_PERF_TEST_P(ReduceAccFixture, Reduce,
OCL_TEST_CYCLE() cv::reduce(src, dst, dim, op, dtype);
SANITY_CHECK(dst, eps);
SANITY_CHECK_NOTHING();
}
} } // namespace opencv_test::ocl

@ -5,7 +5,7 @@ namespace opencv_test
{
using namespace perf;
CV_ENUM(ROp, CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN)
CV_ENUM(ROp, REDUCE_SUM, REDUCE_AVG, REDUCE_MAX, REDUCE_MIN, REDUCE_SUM2)
typedef tuple<Size, MatType, ROp> Size_MatType_ROp_t;
typedef perf::TestBaseWithParam<Size_MatType_ROp_t> Size_MatType_ROp;
@ -23,7 +23,7 @@ PERF_TEST_P(Size_MatType_ROp, reduceR,
int reduceOp = get<2>(GetParam());
int ddepth = -1;
if( CV_MAT_DEPTH(matType) < CV_32S && (reduceOp == REDUCE_SUM || reduceOp == REDUCE_AVG) )
if( CV_MAT_DEPTH(matType) < CV_32S && (reduceOp == REDUCE_SUM || reduceOp == REDUCE_AVG || reduceOp == REDUCE_SUM2) )
ddepth = CV_32S;
Mat src(sz, matType);
@ -35,7 +35,7 @@ PERF_TEST_P(Size_MatType_ROp, reduceR,
int runs = 15;
TEST_CYCLE_MULTIRUN(runs) reduce(src, vec, 0, reduceOp, ddepth);
SANITY_CHECK(vec, 1);
SANITY_CHECK_NOTHING();
}
PERF_TEST_P(Size_MatType_ROp, reduceC,
@ -51,7 +51,7 @@ PERF_TEST_P(Size_MatType_ROp, reduceC,
int reduceOp = get<2>(GetParam());
int ddepth = -1;
if( CV_MAT_DEPTH(matType)< CV_32S && (reduceOp == REDUCE_SUM || reduceOp == REDUCE_AVG) )
if( CV_MAT_DEPTH(matType)< CV_32S && (reduceOp == REDUCE_SUM || reduceOp == REDUCE_AVG || reduceOp == REDUCE_SUM2) )
ddepth = CV_32S;
Mat src(sz, matType);
@ -62,7 +62,7 @@ PERF_TEST_P(Size_MatType_ROp, reduceC,
TEST_CYCLE() reduce(src, vec, 1, reduceOp, ddepth);
SANITY_CHECK(vec, 1);
SANITY_CHECK_NOTHING();
}
typedef tuple<Size, MatType, int> Size_MatType_RMode_t;

@ -341,29 +341,32 @@ cv::Mat cv::Mat::cross(InputArray _m) const
namespace cv
{
template<typename T, typename ST, class Op> static void
reduceR_( const Mat& srcmat, Mat& dstmat )
template<typename T, typename ST, typename WT, class Op, class OpInit>
class ReduceR_Invoker : public ParallelLoopBody
{
typedef typename Op::rtype WT;
Size size = srcmat.size();
size.width *= srcmat.channels();
AutoBuffer<WT> buffer(size.width);
public:
ReduceR_Invoker(const Mat& aSrcmat, Mat& aDstmat, Op& aOp, OpInit& aOpInit)
:srcmat(aSrcmat),dstmat(aDstmat),op(aOp),opInit(aOpInit),buffer(srcmat.size().width*srcmat.channels())
{
}
void operator()(const Range& range) const CV_OVERRIDE
{
const T* src = srcmat.ptr<T>();
const size_t srcstep = srcmat.step/sizeof(src[0]);
WT* buf = buffer.data();
ST* dst = dstmat.ptr<ST>();
const T* src = srcmat.ptr<T>();
size_t srcstep = srcmat.step/sizeof(src[0]);
int i;
Op op;
int i = 0;
for( i = 0; i < size.width; i++ )
buf[i] = src[i];
for( i = range.start ; i < range.end; i++ )
buf[i] = opInit(src[i]);
for( ; --size.height; )
int height = srcmat.size().height;
for( ; --height; )
{
src += srcstep;
i = 0;
i = range.start;
#if CV_ENABLE_UNROLLED
for(; i <= size.width - 4; i += 4 )
for(; i <= range.end - 4; i += 4 )
{
WT s0, s1;
s0 = op(buf[i], (WT)src[i]);
@ -375,63 +378,94 @@ reduceR_( const Mat& srcmat, Mat& dstmat )
buf[i+2] = s0; buf[i+3] = s1;
}
#endif
for( ; i < size.width; i++ )
for( ; i < range.end; i++ )
buf[i] = op(buf[i], (WT)src[i]);
}
for( i = 0; i < size.width; i++ )
for( i = range.start ; i < range.end; i++ )
dst[i] = (ST)buf[i];
}
}
private:
const Mat& srcmat;
Mat& dstmat;
Op& op;
OpInit& opInit;
mutable AutoBuffer<WT> buffer;
};
template<typename T, typename ST, class Op> static void
reduceC_( const Mat& srcmat, Mat& dstmat )
template<typename T, typename ST, class Op, class OpInit = OpNop<ST> > static void
reduceR_( const Mat& srcmat, Mat& dstmat)
{
typedef typename Op::rtype WT;
Size size = srcmat.size();
int cn = srcmat.channels();
size.width *= cn;
Op op;
OpInit opInit;
ReduceR_Invoker<T, ST, WT, Op, OpInit> body(srcmat, dstmat, op, opInit);
//group columns by 64 bytes for data locality
parallel_for_(Range(0, srcmat.size().width*srcmat.channels()), body, srcmat.size().width*CV_ELEM_SIZE(srcmat.depth())/64);
}
for( int y = 0; y < size.height; y++ )
template<typename T, typename ST, typename WT, class Op, class OpInit>
class ReduceC_Invoker : public ParallelLoopBody
{
public:
ReduceC_Invoker(const Mat& aSrcmat, Mat& aDstmat, Op& aOp, OpInit& aOpInit)
:srcmat(aSrcmat),dstmat(aDstmat),op(aOp),opInit(aOpInit)
{
}
void operator()(const Range& range) const CV_OVERRIDE
{
const int cn = srcmat.channels();
const int width = srcmat.size().width*cn;
AutoBuffer<WT> cumul(cn);
for( int y = range.start; y < range.end; y++ )
{
const T* src = srcmat.ptr<T>(y);
ST* dst = dstmat.ptr<ST>(y);
if( size.width == cn )
for( int k = 0; k < cn; k++ )
dst[k] = src[k];
if( width == cn )
{
for( int k = 0; k < cn; k++ )
dst[k] = (ST)opInit(src[k]);
}
else
{
for( int k = 0; k < cn; k++ )
for(int k = 0; k < cn ; ++k )
cumul[k] = opInit(src[k]);
for(int k = cn ; k < width ; k += cn )
{
WT a0 = src[k], a1 = src[k+cn];
int i;
for( i = 2*cn; i <= size.width - 4*cn; i += 4*cn )
{
a0 = op(a0, (WT)src[i+k]);
a1 = op(a1, (WT)src[i+k+cn]);
a0 = op(a0, (WT)src[i+k+cn*2]);
a1 = op(a1, (WT)src[i+k+cn*3]);
}
for( ; i < size.width; i += cn )
{
a0 = op(a0, (WT)src[i+k]);
}
a0 = op(a0, a1);
dst[k] = (ST)a0;
for (int c = 0 ; c < cn ; ++c)
cumul[c] = op(cumul[c], src[k+c]);
}
for(int k = 0 ; k < cn ; ++k )
dst[k] = (ST)cumul[k];
}
}
}
private:
const Mat& srcmat;
Mat& dstmat;
Op& op;
OpInit& opInit;
};
template<typename T, typename ST, class Op, class OpInit = OpNop<ST> > static void
reduceC_( const Mat& srcmat, Mat& dstmat)
{
typedef typename Op::rtype WT;
Op op;
OpInit opInit;
ReduceC_Invoker<T, ST, WT, Op, OpInit> body(srcmat, dstmat, op, opInit);
parallel_for_(Range(0, srcmat.size().height), body);
}
typedef void (*ReduceFunc)( const Mat& src, Mat& dst );
}
#define reduceSumR8u32s reduceR_<uchar, int, OpAdd<int> >
#define reduceSumR8u32f reduceR_<uchar, float, OpAdd<int> >
#define reduceSumR8u64f reduceR_<uchar, double,OpAdd<int> >
#define reduceSumR8u32s reduceR_<uchar, int, OpAdd<int>, OpNop<int> >
#define reduceSumR8u32f reduceR_<uchar, float, OpAdd<int>, OpNop<int> >
#define reduceSumR8u64f reduceR_<uchar, double,OpAdd<int>, OpNop<int> >
#define reduceSumR16u32f reduceR_<ushort,float, OpAdd<float> >
#define reduceSumR16u64f reduceR_<ushort,double,OpAdd<double> >
#define reduceSumR16s32f reduceR_<short, float, OpAdd<float> >
@ -440,6 +474,17 @@ typedef void (*ReduceFunc)( const Mat& src, Mat& dst );
#define reduceSumR32f64f reduceR_<float, double,OpAdd<double> >
#define reduceSumR64f64f reduceR_<double,double,OpAdd<double> >
#define reduceSum2R8u32s reduceR_<uchar, int, OpAddSqr<int>, OpSqr<int> >
#define reduceSum2R8u32f reduceR_<uchar, float, OpAddSqr<int>, OpSqr<int> >
#define reduceSum2R8u64f reduceR_<uchar, double,OpAddSqr<int>, OpSqr<int> >
#define reduceSum2R16u32f reduceR_<ushort,float, OpAddSqr<float>, OpSqr<float> >
#define reduceSum2R16u64f reduceR_<ushort,double,OpAddSqr<double>,OpSqr<double> >
#define reduceSum2R16s32f reduceR_<short, float, OpAddSqr<float>, OpSqr<float> >
#define reduceSum2R16s64f reduceR_<short, double,OpAddSqr<double>,OpSqr<double> >
#define reduceSum2R32f32f reduceR_<float, float, OpAddSqr<float>, OpSqr<float> >
#define reduceSum2R32f64f reduceR_<float, double,OpAddSqr<double>,OpSqr<double> >
#define reduceSum2R64f64f reduceR_<double,double,OpAddSqr<double>,OpSqr<double> >
#define reduceMaxR8u reduceR_<uchar, uchar, OpMax<uchar> >
#define reduceMaxR16u reduceR_<ushort,ushort,OpMax<ushort> >
#define reduceMaxR16s reduceR_<short, short, OpMax<short> >
@ -527,23 +572,35 @@ static inline void reduceSumC_8u16u16s32f_64f(const cv::Mat& srcmat, cv::Mat& ds
#endif
#define reduceSumC8u32s reduceC_<uchar, int, OpAdd<int> >
#define reduceSumC8u32f reduceC_<uchar, float, OpAdd<int> >
#define reduceSumC8u32s reduceC_<uchar, int, OpAdd<int>, OpNop<int> >
#define reduceSumC8u32f reduceC_<uchar, float, OpAdd<int>, OpNop<int> >
#define reduceSumC16u32f reduceC_<ushort,float, OpAdd<float> >
#define reduceSumC16s32f reduceC_<short, float, OpAdd<float> >
#define reduceSumC32f32f reduceC_<float, float, OpAdd<float> >
#define reduceSumC64f64f reduceC_<double,double,OpAdd<double> >
#define reduceSum2C8u32s reduceC_<uchar, int, OpAddSqr<int>, OpSqr<int> >
#define reduceSum2C8u32f reduceC_<uchar, float, OpAddSqr<int>, OpSqr<int> >
#define reduceSum2C16u32f reduceC_<ushort,float, OpAddSqr<float>, OpSqr<float> >
#define reduceSum2C16s32f reduceC_<short, float, OpAddSqr<float>, OpSqr<float> >
#define reduceSum2C32f32f reduceC_<float, float, OpAddSqr<float>, OpSqr<float> >
#define reduceSum2C64f64f reduceC_<double,double,OpAddSqr<double>,OpSqr<double> >
#ifdef HAVE_IPP
#define reduceSumC8u64f reduceSumC_8u16u16s32f_64f
#define reduceSumC16u64f reduceSumC_8u16u16s32f_64f
#define reduceSumC16s64f reduceSumC_8u16u16s32f_64f
#define reduceSumC32f64f reduceSumC_8u16u16s32f_64f
#else
#define reduceSumC8u64f reduceC_<uchar, double,OpAdd<int> >
#define reduceSumC8u64f reduceC_<uchar, double,OpAdd<int>, OpNop<int> >
#define reduceSumC16u64f reduceC_<ushort,double,OpAdd<double> >
#define reduceSumC16s64f reduceC_<short, double,OpAdd<double> >
#define reduceSumC32f64f reduceC_<float, double,OpAdd<double> >
#define reduceSum2C8u64f reduceC_<uchar, double,OpAddSqr<int>, OpSqr<int> >
#define reduceSum2C16u64f reduceC_<ushort,double,OpAddSqr<double>,OpSqr<double> >
#define reduceSum2C16s64f reduceC_<short, double,OpAddSqr<double>,OpSqr<double> >
#define reduceSum2C32f64f reduceC_<float, double,OpAddSqr<double>,OpSqr<double> >
#endif
#ifdef HAVE_IPP
@ -622,8 +679,9 @@ static bool ocl_reduce(InputArray _src, OutputArray _dst,
ddepth = CV_32S;
}
const char * const ops[4] = { "OCL_CV_REDUCE_SUM", "OCL_CV_REDUCE_AVG",
"OCL_CV_REDUCE_MAX", "OCL_CV_REDUCE_MIN" };
const char * const ops[5] = { "OCL_CV_REDUCE_SUM", "OCL_CV_REDUCE_AVG",
"OCL_CV_REDUCE_MAX", "OCL_CV_REDUCE_MIN",
"OCL_CV_REDUCE_SUM2"};
int wdepth = std::max(ddepth, CV_32F);
if (useOptimized)
{
@ -718,7 +776,8 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype)
CV_Assert( cn == CV_MAT_CN(dtype) );
CV_Assert( op == REDUCE_SUM || op == REDUCE_MAX ||
op == REDUCE_MIN || op == REDUCE_AVG );
op == REDUCE_MIN || op == REDUCE_AVG ||
op == REDUCE_SUM2);
CV_OCL_RUN(_dst.isUMat(),
ocl_reduce(_src, _dst, dim, op, op0, stype, dtype))
@ -748,7 +807,7 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype)
if( op == REDUCE_SUM )
{
if(sdepth == CV_8U && ddepth == CV_32S)
func = GET_OPTIMIZED(reduceSumR8u32s);
func = reduceSumR8u32s;
else if(sdepth == CV_8U && ddepth == CV_32F)
func = reduceSumR8u32f;
else if(sdepth == CV_8U && ddepth == CV_64F)
@ -762,7 +821,7 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype)
else if(sdepth == CV_16S && ddepth == CV_64F)
func = reduceSumR16s64f;
else if(sdepth == CV_32F && ddepth == CV_32F)
func = GET_OPTIMIZED(reduceSumR32f32f);
func = reduceSumR32f32f;
else if(sdepth == CV_32F && ddepth == CV_64F)
func = reduceSumR32f64f;
else if(sdepth == CV_64F && ddepth == CV_64F)
@ -771,36 +830,59 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype)
else if(op == REDUCE_MAX)
{
if(sdepth == CV_8U && ddepth == CV_8U)
func = GET_OPTIMIZED(reduceMaxR8u);
func = reduceMaxR8u;
else if(sdepth == CV_16U && ddepth == CV_16U)
func = reduceMaxR16u;
else if(sdepth == CV_16S && ddepth == CV_16S)
func = reduceMaxR16s;
else if(sdepth == CV_32F && ddepth == CV_32F)
func = GET_OPTIMIZED(reduceMaxR32f);
func = reduceMaxR32f;
else if(sdepth == CV_64F && ddepth == CV_64F)
func = reduceMaxR64f;
}
else if(op == REDUCE_MIN)
{
if(sdepth == CV_8U && ddepth == CV_8U)
func = GET_OPTIMIZED(reduceMinR8u);
func = reduceMinR8u;
else if(sdepth == CV_16U && ddepth == CV_16U)
func = reduceMinR16u;
else if(sdepth == CV_16S && ddepth == CV_16S)
func = reduceMinR16s;
else if(sdepth == CV_32F && ddepth == CV_32F)
func = GET_OPTIMIZED(reduceMinR32f);
func = reduceMinR32f;
else if(sdepth == CV_64F && ddepth == CV_64F)
func = reduceMinR64f;
}
else if( op == REDUCE_SUM2 )
{
if(sdepth == CV_8U && ddepth == CV_32S)
func = reduceSum2R8u32s;
else if(sdepth == CV_8U && ddepth == CV_32F)
func = reduceSum2R8u32f;
else if(sdepth == CV_8U && ddepth == CV_64F)
func = reduceSum2R8u64f;
else if(sdepth == CV_16U && ddepth == CV_32F)
func = reduceSum2R16u32f;
else if(sdepth == CV_16U && ddepth == CV_64F)
func = reduceSum2R16u64f;
else if(sdepth == CV_16S && ddepth == CV_32F)
func = reduceSum2R16s32f;
else if(sdepth == CV_16S && ddepth == CV_64F)
func = reduceSum2R16s64f;
else if(sdepth == CV_32F && ddepth == CV_32F)
func = reduceSum2R32f32f;
else if(sdepth == CV_32F && ddepth == CV_64F)
func = reduceSum2R32f64f;
else if(sdepth == CV_64F && ddepth == CV_64F)
func = reduceSum2R64f64f;
}
}
else
{
if(op == REDUCE_SUM)
{
if(sdepth == CV_8U && ddepth == CV_32S)
func = GET_OPTIMIZED(reduceSumC8u32s);
func = reduceSumC8u32s;
else if(sdepth == CV_8U && ddepth == CV_32F)
func = reduceSumC8u32f;
else if(sdepth == CV_8U && ddepth == CV_64F)
@ -814,7 +896,7 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype)
else if(sdepth == CV_16S && ddepth == CV_64F)
func = reduceSumC16s64f;
else if(sdepth == CV_32F && ddepth == CV_32F)
func = GET_OPTIMIZED(reduceSumC32f32f);
func = reduceSumC32f32f;
else if(sdepth == CV_32F && ddepth == CV_64F)
func = reduceSumC32f64f;
else if(sdepth == CV_64F && ddepth == CV_64F)
@ -823,29 +905,52 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype)
else if(op == REDUCE_MAX)
{
if(sdepth == CV_8U && ddepth == CV_8U)
func = GET_OPTIMIZED(reduceMaxC8u);
func = reduceMaxC8u;
else if(sdepth == CV_16U && ddepth == CV_16U)
func = reduceMaxC16u;
else if(sdepth == CV_16S && ddepth == CV_16S)
func = reduceMaxC16s;
else if(sdepth == CV_32F && ddepth == CV_32F)
func = GET_OPTIMIZED(reduceMaxC32f);
func = reduceMaxC32f;
else if(sdepth == CV_64F && ddepth == CV_64F)
func = reduceMaxC64f;
}
else if(op == REDUCE_MIN)
{
if(sdepth == CV_8U && ddepth == CV_8U)
func = GET_OPTIMIZED(reduceMinC8u);
func = reduceMinC8u;
else if(sdepth == CV_16U && ddepth == CV_16U)
func = reduceMinC16u;
else if(sdepth == CV_16S && ddepth == CV_16S)
func = reduceMinC16s;
else if(sdepth == CV_32F && ddepth == CV_32F)
func = GET_OPTIMIZED(reduceMinC32f);
func = reduceMinC32f;
else if(sdepth == CV_64F && ddepth == CV_64F)
func = reduceMinC64f;
}
else if(op == REDUCE_SUM2)
{
if(sdepth == CV_8U && ddepth == CV_32S)
func = reduceSum2C8u32s;
else if(sdepth == CV_8U && ddepth == CV_32F)
func = reduceSum2C8u32f;
else if(sdepth == CV_8U && ddepth == CV_64F)
func = reduceSum2C8u64f;
else if(sdepth == CV_16U && ddepth == CV_32F)
func = reduceSum2C16u32f;
else if(sdepth == CV_16U && ddepth == CV_64F)
func = reduceSum2C16u64f;
else if(sdepth == CV_16S && ddepth == CV_32F)
func = reduceSum2C16s32f;
else if(sdepth == CV_16S && ddepth == CV_64F)
func = reduceSum2C16s64f;
else if(sdepth == CV_32F && ddepth == CV_32F)
func = reduceSum2C32f32f;
else if(sdepth == CV_32F && ddepth == CV_64F)
func = reduceSum2C32f64f;
else if(sdepth == CV_64F && ddepth == CV_64F)
func = reduceSum2C64f64f;
}
}
if( !func )

@ -85,6 +85,9 @@
#elif defined OCL_CV_REDUCE_MIN
#define INIT_VALUE MAX_VAL
#define PROCESS_ELEM(acc, value) acc = min(value, acc)
#elif defined OCL_CV_REDUCE_SUM2
#define INIT_VALUE 0
#define PROCESS_ELEM(acc, value) acc += value*value
#else
#error "No operation is specified"
#endif

@ -108,6 +108,22 @@ extern const uchar g_Saturate8u[];
#define CV_MIN_8U(a,b) ((a) - CV_FAST_CAST_8U((a) - (b)))
#define CV_MAX_8U(a,b) ((a) + CV_FAST_CAST_8U((b) - (a)))
template<typename T1, typename T2=T1, typename T3=T1> struct OpNop
{
typedef T1 type1;
typedef T2 type2;
typedef T3 rtype;
T3 operator ()(const T1 a) const { return saturate_cast<T3>(a); }
};
template<typename T1, typename T2=T1, typename T3=T1> struct OpSqr
{
typedef T1 type1;
typedef T2 type2;
typedef T3 rtype;
T3 operator ()(const T1 a) const { return saturate_cast<T3>(a)*saturate_cast<T3>(a); }
};
template<typename T1, typename T2=T1, typename T3=T1> struct OpAdd
{
typedef T1 type1;
@ -116,6 +132,14 @@ template<typename T1, typename T2=T1, typename T3=T1> struct OpAdd
T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(a + b); }
};
template<typename T1, typename T2=T1, typename T3=T1> struct OpAddSqr
{
typedef T1 type1;
typedef T2 type2;
typedef T3 rtype;
T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(a + saturate_cast<T3>(b)*saturate_cast<T3>(b)); }
};
template<typename T1, typename T2=T1, typename T3=T1> struct OpSub
{
typedef T1 type1;

@ -1873,6 +1873,22 @@ OCL_TEST_P(ReduceAvg, Mat)
}
}
typedef Reduce ReduceSum2;
OCL_TEST_P(ReduceSum2, Mat)
{
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
OCL_OFF(cv::reduce(src_roi, dst_roi, dim, REDUCE_SUM2, dtype));
OCL_ON(cv::reduce(usrc_roi, udst_roi, dim, REDUCE_SUM2, dtype));
double eps = ddepth <= CV_32S ? 1 : 6e-6;
OCL_EXPECT_MATS_NEAR(dst, eps);
}
}
//////////////////////////////////////// Instantiation /////////////////////////////////////////
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(::testing::Values(CV_8U, CV_8S), OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool()));

@ -26,7 +26,7 @@ protected:
};
template<class Type>
void testReduce( const Mat& src, Mat& sum, Mat& avg, Mat& max, Mat& min, int dim )
void testReduce( const Mat& src, Mat& sum, Mat& avg, Mat& max, Mat& min, Mat& sum2, int dim )
{
CV_Assert( src.channels() == 1 );
if( dim == 0 ) // row
@ -34,21 +34,25 @@ void testReduce( const Mat& src, Mat& sum, Mat& avg, Mat& max, Mat& min, int dim
sum.create( 1, src.cols, CV_64FC1 );
max.create( 1, src.cols, CV_64FC1 );
min.create( 1, src.cols, CV_64FC1 );
sum2.create( 1, src.cols, CV_64FC1 );
}
else
{
sum.create( src.rows, 1, CV_64FC1 );
max.create( src.rows, 1, CV_64FC1 );
min.create( src.rows, 1, CV_64FC1 );
sum2.create( src.rows, 1, CV_64FC1 );
}
sum.setTo(Scalar(0));
max.setTo(Scalar(-DBL_MAX));
min.setTo(Scalar(DBL_MAX));
sum2.setTo(Scalar(0));
const Mat_<Type>& src_ = src;
Mat_<double>& sum_ = (Mat_<double>&)sum;
Mat_<double>& min_ = (Mat_<double>&)min;
Mat_<double>& max_ = (Mat_<double>&)max;
Mat_<double>& sum2_ = (Mat_<double>&)sum2;
if( dim == 0 )
{
@ -59,6 +63,7 @@ void testReduce( const Mat& src, Mat& sum, Mat& avg, Mat& max, Mat& min, int dim
sum_(0, ci) += src_(ri, ci);
max_(0, ci) = std::max( max_(0, ci), (double)src_(ri, ci) );
min_(0, ci) = std::min( min_(0, ci), (double)src_(ri, ci) );
sum2_(0, ci) += ((double)src_(ri, ci))*((double)src_(ri, ci));
}
}
}
@ -71,6 +76,7 @@ void testReduce( const Mat& src, Mat& sum, Mat& avg, Mat& max, Mat& min, int dim
sum_(ri, 0) += src_(ri, ci);
max_(ri, 0) = std::max( max_(ri, 0), (double)src_(ri, ci) );
min_(ri, 0) = std::min( min_(ri, 0), (double)src_(ri, ci) );
sum2_(ri, 0) += ((double)src_(ri, ci))*((double)src_(ri, ci));
}
}
}
@ -93,7 +99,7 @@ int Core_ReduceTest::checkOp( const Mat& src, int dstType, int opType, const Mat
{
int srcType = src.type();
bool support = false;
if( opType == REDUCE_SUM || opType == REDUCE_AVG )
if( opType == REDUCE_SUM || opType == REDUCE_AVG || opType == REDUCE_SUM2 )
{
if( srcType == CV_8U && (dstType == CV_32S || dstType == CV_32F || dstType == CV_64F) )
support = true;
@ -128,7 +134,7 @@ int Core_ReduceTest::checkOp( const Mat& src, int dstType, int opType, const Mat
return cvtest::TS::OK;
double eps = 0.0;
if ( opType == REDUCE_SUM || opType == REDUCE_AVG )
if ( opType == REDUCE_SUM || opType == REDUCE_AVG || opType == REDUCE_SUM2 )
{
if ( dstType == CV_32F )
eps = 1.e-5;
@ -152,10 +158,13 @@ int Core_ReduceTest::checkOp( const Mat& src, int dstType, int opType, const Mat
if( check )
{
char msg[100];
const char* opTypeStr = opType == REDUCE_SUM ? "REDUCE_SUM" :
opType == REDUCE_AVG ? "REDUCE_AVG" :
opType == REDUCE_MAX ? "REDUCE_MAX" :
opType == REDUCE_MIN ? "REDUCE_MIN" : "unknown operation type";
const char* opTypeStr =
opType == REDUCE_SUM ? "REDUCE_SUM" :
opType == REDUCE_AVG ? "REDUCE_AVG" :
opType == REDUCE_MAX ? "REDUCE_MAX" :
opType == REDUCE_MIN ? "REDUCE_MIN" :
opType == REDUCE_SUM2 ? "REDUCE_SUM2" :
"unknown operation type";
string srcTypeStr, dstTypeStr;
getMatTypeStr( src.type(), srcTypeStr );
getMatTypeStr( dstType, dstTypeStr );
@ -172,25 +181,25 @@ int Core_ReduceTest::checkOp( const Mat& src, int dstType, int opType, const Mat
int Core_ReduceTest::checkCase( int srcType, int dstType, int dim, Size sz )
{
int code = cvtest::TS::OK, tempCode;
Mat src, sum, avg, max, min;
Mat src, sum, avg, max, min, sum2;
src.create( sz, srcType );
randu( src, Scalar(0), Scalar(100) );
if( srcType == CV_8UC1 )
testReduce<uchar>( src, sum, avg, max, min, dim );
testReduce<uchar>( src, sum, avg, max, min, sum2, dim );
else if( srcType == CV_8SC1 )
testReduce<char>( src, sum, avg, max, min, dim );
testReduce<char>( src, sum, avg, max, min, sum2, dim );
else if( srcType == CV_16UC1 )
testReduce<unsigned short int>( src, sum, avg, max, min, dim );
testReduce<unsigned short int>( src, sum, avg, max, min, sum2, dim );
else if( srcType == CV_16SC1 )
testReduce<short int>( src, sum, avg, max, min, dim );
testReduce<short int>( src, sum, avg, max, min, sum2, dim );
else if( srcType == CV_32SC1 )
testReduce<int>( src, sum, avg, max, min, dim );
testReduce<int>( src, sum, avg, max, min, sum2, dim );
else if( srcType == CV_32FC1 )
testReduce<float>( src, sum, avg, max, min, dim );
testReduce<float>( src, sum, avg, max, min, sum2, dim );
else if( srcType == CV_64FC1 )
testReduce<double>( src, sum, avg, max, min, dim );
testReduce<double>( src, sum, avg, max, min, sum2, dim );
else
CV_Assert( 0 );
@ -210,6 +219,10 @@ int Core_ReduceTest::checkCase( int srcType, int dstType, int dim, Size sz )
tempCode = checkOp( src, dstType, REDUCE_MIN, min, dim );
code = tempCode != cvtest::TS::OK ? tempCode : code;
// 5. sum2
tempCode = checkOp( src, dstType, REDUCE_SUM2, sum2, dim );
code = tempCode != cvtest::TS::OK ? tempCode : code;
return code;
}
@ -1563,6 +1576,7 @@ TEST(Reduce, regression_should_fail_bug_4594)
EXPECT_THROW(cv::reduce(src, dst, 0, REDUCE_MAX, CV_32S), cv::Exception);
EXPECT_NO_THROW(cv::reduce(src, dst, 0, REDUCE_SUM, CV_32S));
EXPECT_NO_THROW(cv::reduce(src, dst, 0, REDUCE_AVG, CV_32S));
EXPECT_NO_THROW(cv::reduce(src, dst, 0, REDUCE_SUM2, CV_32S));
}
TEST(Mat, push_back_vector)

@ -3018,7 +3018,7 @@ TEST(CovariationMatrixVectorOfMatWithMean, accuracy)
cv::randu(src,cv::Scalar(-128), cv::Scalar(128));
cv::Mat goldMean;
cv::reduce(src,goldMean,0 ,REDUCE_AVG, CV_32F);
cv::reduce(src, goldMean, 0, REDUCE_AVG, CV_32F);
cv::calcCovarMatrix(src,gold,goldMean,singleMatFlags,CV_32F);

Loading…
Cancel
Save