reduce matrix_reductions instantiates for tiny build

pull/3785/head
Vladislav Vinogradov 10 years ago
parent 2e47a1a61b
commit b44b1ab47b
  1. 36
      modules/gpu/src/cuda/matrix_reductions.cu
  2. 262
      modules/gpu/src/matrix_reductions.cpp

@ -462,6 +462,7 @@ namespace sum
}
template void run<uchar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
#ifndef OPENCV_TINY_GPU_MODULE
template void run<uchar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
template void run<uchar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
template void run<uchar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
@ -485,8 +486,10 @@ namespace sum
template void run<int, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
template void run<int, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
template void run<int, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
#endif
template void run<float, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
#ifndef OPENCV_TINY_GPU_MODULE
template void run<float, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
template void run<float, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
template void run<float, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
@ -495,6 +498,7 @@ namespace sum
template void run<double, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
template void run<double, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
template void run<double, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
#endif
template <typename T, int cn>
void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask)
@ -504,6 +508,7 @@ namespace sum
}
template void runAbs<uchar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
#ifndef OPENCV_TINY_GPU_MODULE
template void runAbs<uchar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
template void runAbs<uchar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
template void runAbs<uchar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
@ -527,8 +532,10 @@ namespace sum
template void runAbs<int, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
template void runAbs<int, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
template void runAbs<int, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
#endif
template void runAbs<float, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
#ifndef OPENCV_TINY_GPU_MODULE
template void runAbs<float, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
template void runAbs<float, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
template void runAbs<float, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
@ -537,6 +544,7 @@ namespace sum
template void runAbs<double, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
template void runAbs<double, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
template void runAbs<double, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
#endif
template <typename T> struct Sqr : unary_function<T, T>
{
@ -553,6 +561,7 @@ namespace sum
}
template void runSqr<uchar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
#ifndef OPENCV_TINY_GPU_MODULE
template void runSqr<uchar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
template void runSqr<uchar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
template void runSqr<uchar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
@ -576,8 +585,10 @@ namespace sum
template void runSqr<int, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
template void runSqr<int, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
template void runSqr<int, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
#endif
template void runSqr<float, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
#ifndef OPENCV_TINY_GPU_MODULE
template void runSqr<float, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
template void runSqr<float, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
template void runSqr<float, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
@ -586,6 +597,7 @@ namespace sum
template void runSqr<double, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
template void runSqr<double, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
template void runSqr<double, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
#endif
}
/////////////////////////////////////////////////////////////
@ -773,12 +785,16 @@ namespace minMax
}
template void run<uchar >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
#ifndef OPENCV_TINY_GPU_MODULE
template void run<schar >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
template void run<ushort>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
template void run<short >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
template void run<int >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
#endif
template void run<float >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
#ifndef OPENCV_TINY_GPU_MODULE
template void run<double>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
#endif
}
/////////////////////////////////////////////////////////////
@ -955,12 +971,16 @@ namespace minMaxLoc
}
template void run<unsigned char >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
#ifndef OPENCV_TINY_GPU_MODULE
template void run<signed char >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
template void run<unsigned short>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
template void run<short >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
template void run<int >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
#endif
template void run<float >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
#ifndef OPENCV_TINY_GPU_MODULE
template void run<double>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
#endif
}
/////////////////////////////////////////////////////////////
@ -1079,12 +1099,16 @@ namespace countNonZero
}
template int run<uchar >(const PtrStepSzb src, PtrStep<unsigned int> buf);
#ifndef OPENCV_TINY_GPU_MODULE
template int run<schar >(const PtrStepSzb src, PtrStep<unsigned int> buf);
template int run<ushort>(const PtrStepSzb src, PtrStep<unsigned int> buf);
template int run<short >(const PtrStepSzb src, PtrStep<unsigned int> buf);
template int run<int >(const PtrStepSzb src, PtrStep<unsigned int> buf);
#endif
template int run<float >(const PtrStepSzb src, PtrStep<unsigned int> buf);
#ifndef OPENCV_TINY_GPU_MODULE
template int run<double>(const PtrStepSzb src, PtrStep<unsigned int> buf);
#endif
}
//////////////////////////////////////////////////////////////////////////////
@ -1257,6 +1281,11 @@ namespace reduce
funcs[op]((PtrStepSz<T>) src, (D*) dst, stream);
}
#ifdef OPENCV_TINY_GPU_MODULE
template void rows<unsigned char, int, unsigned char>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
template void rows<unsigned char, float, float>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
template void rows<float, float, float>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
#else
template void rows<unsigned char, int, unsigned char>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
template void rows<unsigned char, int, int>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
template void rows<unsigned char, float, float>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
@ -1280,6 +1309,7 @@ namespace reduce
template void rows<float, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
template void rows<double, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
#endif
///////////////////////////////////////////////////////////
@ -1338,6 +1368,11 @@ namespace reduce
funcs[cn][op](src, dst, stream);
}
#ifdef OPENCV_TINY_GPU_MODULE
template void cols<unsigned char, int, unsigned char>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
template void cols<unsigned char, float, float>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
template void cols<float, float, float>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
#else
template void cols<unsigned char, int, unsigned char>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
template void cols<unsigned char, int, int>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
template void cols<unsigned char, float, float>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
@ -1361,6 +1396,7 @@ namespace reduce
template void cols<float, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
template void cols<double, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
#endif
}
#endif /* CUDA_DISABLER */

@ -261,6 +261,18 @@ Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf)
Scalar cv::gpu::sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf)
{
typedef void (*func_t)(PtrStepSzb src, void* buf, double* sum, PtrStepSzb mask);
#ifdef OPENCV_TINY_GPU_MODULE
static const func_t funcs[7][5] =
{
{0, ::sum::run<uchar , 1>, 0, 0, 0},
{0, 0, 0, 0, 0},
{0, 0, 0, 0, 0},
{0, 0, 0, 0, 0},
{0, 0, 0, 0, 0},
{0, ::sum::run<float , 1>, 0, 0, 0},
{0, 0, 0, 0, 0},
};
#else
static const func_t funcs[7][5] =
{
{0, ::sum::run<uchar , 1>, ::sum::run<uchar , 2>, ::sum::run<uchar , 3>, ::sum::run<uchar , 4>},
@ -271,6 +283,7 @@ Scalar cv::gpu::sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf)
{0, ::sum::run<float , 1>, ::sum::run<float , 2>, ::sum::run<float , 3>, ::sum::run<float , 4>},
{0, ::sum::run<double, 1>, ::sum::run<double, 2>, ::sum::run<double, 3>, ::sum::run<double, 4>}
};
#endif
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
@ -286,6 +299,8 @@ Scalar cv::gpu::sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf)
buf.setTo(Scalar::all(0));
const func_t func = funcs[src.depth()][src.channels()];
if (!func)
CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");
double result[4];
func(src, buf.data, result, mask);
@ -307,6 +322,18 @@ Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf)
Scalar cv::gpu::absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf)
{
typedef void (*func_t)(PtrStepSzb src, void* buf, double* sum, PtrStepSzb mask);
#ifdef OPENCV_TINY_GPU_MODULE
static const func_t funcs[7][5] =
{
{0, ::sum::runAbs<uchar , 1>, 0, 0, 0},
{0, 0, 0, 0, 0},
{0, 0, 0, 0, 0},
{0, 0, 0, 0, 0},
{0, 0, 0, 0, 0},
{0, ::sum::runAbs<float , 1>, 0, 0, 0},
{0, 0, 0, 0, 0},
};
#else
static const func_t funcs[7][5] =
{
{0, ::sum::runAbs<uchar , 1>, ::sum::runAbs<uchar , 2>, ::sum::runAbs<uchar , 3>, ::sum::runAbs<uchar , 4>},
@ -317,6 +344,7 @@ Scalar cv::gpu::absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf)
{0, ::sum::runAbs<float , 1>, ::sum::runAbs<float , 2>, ::sum::runAbs<float , 3>, ::sum::runAbs<float , 4>},
{0, ::sum::runAbs<double, 1>, ::sum::runAbs<double, 2>, ::sum::runAbs<double, 3>, ::sum::runAbs<double, 4>}
};
#endif
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
@ -332,6 +360,8 @@ Scalar cv::gpu::absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf)
buf.setTo(Scalar::all(0));
const func_t func = funcs[src.depth()][src.channels()];
if (!func)
CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");
double result[4];
func(src, buf.data, result, mask);
@ -353,6 +383,18 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf)
Scalar cv::gpu::sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf)
{
typedef void (*func_t)(PtrStepSzb src, void* buf, double* sum, PtrStepSzb mask);
#ifdef OPENCV_TINY_GPU_MODULE
static const func_t funcs[7][5] =
{
{0, ::sum::runSqr<uchar , 1>, 0, 0, 0},
{0, 0, 0, 0, 0},
{0, 0, 0, 0, 0},
{0, 0, 0, 0, 0},
{0, 0, 0, 0, 0},
{0, ::sum::runSqr<float , 1>, 0, 0, 0},
{0, 0, 0, 0, 0},
};
#else
static const func_t funcs[7][5] =
{
{0, ::sum::runSqr<uchar , 1>, ::sum::runSqr<uchar , 2>, ::sum::runSqr<uchar , 3>, ::sum::runSqr<uchar , 4>},
@ -363,6 +405,7 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf)
{0, ::sum::runSqr<float , 1>, ::sum::runSqr<float , 2>, ::sum::runSqr<float , 3>, ::sum::runSqr<float , 4>},
{0, ::sum::runSqr<double, 1>, ::sum::runSqr<double, 2>, ::sum::runSqr<double, 3>, ::sum::runSqr<double, 4>}
};
#endif
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
@ -378,6 +421,8 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf)
buf.setTo(Scalar::all(0));
const func_t func = funcs[src.depth()][src.channels()];
if (!func)
CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");
double result[4];
func(src, buf.data, result, mask);
@ -405,6 +450,18 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf)
{
typedef void (*func_t)(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
#ifdef OPENCV_TINY_GPU_MODULE
static const func_t funcs[] =
{
::minMax::run<uchar>,
0/*::minMax::run<schar>*/,
0/*::minMax::run<ushort>*/,
0/*::minMax::run<short>*/,
0/*::minMax::run<int>*/,
::minMax::run<float>,
0/*::minMax::run<double>*/,
};
#else
static const func_t funcs[] =
{
::minMax::run<uchar>,
@ -413,8 +470,9 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
::minMax::run<short>,
::minMax::run<int>,
::minMax::run<float>,
::minMax::run<double>
::minMax::run<double>,
};
#endif
CV_Assert( src.channels() == 1 );
CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
@ -430,6 +488,8 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
ensureSizeIsEnough(buf_size, CV_8U, buf);
const func_t func = funcs[src.depth()];
if (!func)
CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");
double temp1, temp2;
func(src, mask, minVal ? minVal : &temp1, maxVal ? maxVal : &temp2, buf);
@ -456,6 +516,18 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
const GpuMat& mask, GpuMat& valBuf, GpuMat& locBuf)
{
typedef void (*func_t)(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
#ifdef OPENCV_TINY_GPU_MODULE
static const func_t funcs[] =
{
::minMaxLoc::run<uchar>,
0/*::minMaxLoc::run<schar>*/,
0/*::minMaxLoc::run<ushort>*/,
0/*::minMaxLoc::run<short>*/,
0/*::minMaxLoc::run<int>*/,
::minMaxLoc::run<float>,
0/*::minMaxLoc::run<double>*/,
};
#else
static const func_t funcs[] =
{
::minMaxLoc::run<uchar>,
@ -464,8 +536,9 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
::minMaxLoc::run<short>,
::minMaxLoc::run<int>,
::minMaxLoc::run<float>,
::minMaxLoc::run<double>
::minMaxLoc::run<double>,
};
#endif
CV_Assert( src.channels() == 1 );
CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
@ -482,6 +555,8 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
ensureSizeIsEnough(locbuf_size, CV_8U, locBuf);
const func_t func = funcs[src.depth()];
if (!func)
CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");
double temp1, temp2;
Point temp3, temp4;
@ -508,6 +583,18 @@ int cv::gpu::countNonZero(const GpuMat& src)
int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
{
typedef int (*func_t)(const PtrStepSzb src, PtrStep<unsigned int> buf);
#ifdef OPENCV_TINY_GPU_MODULE
static const func_t funcs[] =
{
::countNonZero::run<uchar>,
0/*::countNonZero::run<schar>*/,
0/*::countNonZero::run<ushort>*/,
0/*::countNonZero::run<short>*/,
0/*::countNonZero::run<int>*/,
::countNonZero::run<float>,
0/*::countNonZero::run<double>*/,
};
#else
static const func_t funcs[] =
{
::countNonZero::run<uchar>,
@ -516,8 +603,9 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
::countNonZero::run<short>,
::countNonZero::run<int>,
::countNonZero::run<float>,
::countNonZero::run<double>
::countNonZero::run<double>,
};
#endif
CV_Assert(src.channels() == 1);
@ -532,6 +620,8 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
ensureSizeIsEnough(buf_size, CV_8U, buf);
const func_t func = funcs[src.depth()];
if (!func)
CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");
return func(src, buf);
}
@ -562,6 +652,74 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
if (dim == 0)
{
typedef void (*func_t)(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
#ifdef OPENCV_TINY_GPU_MODULE
static const func_t funcs[7][7] =
{
{
::reduce::rows<unsigned char, int, unsigned char>,
0/*::reduce::rows<unsigned char, int, signed char>*/,
0/*::reduce::rows<unsigned char, int, unsigned short>*/,
0/*::reduce::rows<unsigned char, int, short>*/,
0/*::reduce::rows<unsigned char, int, int>*/,
::reduce::rows<unsigned char, float, float>,
0/*::reduce::rows<unsigned char, double, double>*/,
},
{
0/*::reduce::rows<signed char, int, unsigned char>*/,
0/*::reduce::rows<signed char, int, signed char>*/,
0/*::reduce::rows<signed char, int, unsigned short>*/,
0/*::reduce::rows<signed char, int, short>*/,
0/*::reduce::rows<signed char, int, int>*/,
0/*::reduce::rows<signed char, float, float>*/,
0/*::reduce::rows<signed char, double, double>*/,
},
{
0/*::reduce::rows<unsigned short, int, unsigned char>*/,
0/*::reduce::rows<unsigned short, int, signed char>*/,
0/*::reduce::rows<unsigned short, int, unsigned short>*/,
0/*::reduce::rows<unsigned short, int, short>*/,
0/*::reduce::rows<unsigned short, int, int>*/,
0/*::reduce::rows<unsigned short, float, float>*/,
0/*::reduce::rows<unsigned short, double, double>*/,
},
{
0/*::reduce::rows<short, int, unsigned char>*/,
0/*::reduce::rows<short, int, signed char>*/,
0/*::reduce::rows<short, int, unsigned short>*/,
0/*::reduce::rows<short, int, short>*/,
0/*::reduce::rows<short, int, int>*/,
0/*::reduce::rows<short, float, float>*/,
0/*::reduce::rows<short, double, double>*/,
},
{
0/*::reduce::rows<int, int, unsigned char>*/,
0/*::reduce::rows<int, int, signed char>*/,
0/*::reduce::rows<int, int, unsigned short>*/,
0/*::reduce::rows<int, int, short>*/,
0/*::reduce::rows<int, int, int>*/,
0/*::reduce::rows<int, float, float>*/,
0/*::reduce::rows<int, double, double>*/,
},
{
0/*::reduce::rows<float, float, unsigned char>*/,
0/*::reduce::rows<float, float, signed char>*/,
0/*::reduce::rows<float, float, unsigned short>*/,
0/*::reduce::rows<float, float, short>*/,
0/*::reduce::rows<float, float, int>*/,
::reduce::rows<float, float, float>,
0/*::reduce::rows<float, double, double>*/,
},
{
0/*::reduce::rows<double, double, unsigned char>*/,
0/*::reduce::rows<double, double, signed char>*/,
0/*::reduce::rows<double, double, unsigned short>*/,
0/*::reduce::rows<double, double, short>*/,
0/*::reduce::rows<double, double, int>*/,
0/*::reduce::rows<double, double, float>*/,
0/*::reduce::rows<double, double, double>*/,
}
};
#else
static const func_t funcs[7][7] =
{
{
@ -571,7 +729,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
0/*::reduce::rows<unsigned char, int, short>*/,
::reduce::rows<unsigned char, int, int>,
::reduce::rows<unsigned char, float, float>,
::reduce::rows<unsigned char, double, double>
::reduce::rows<unsigned char, double, double>,
},
{
0/*::reduce::rows<signed char, int, unsigned char>*/,
@ -580,7 +738,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
0/*::reduce::rows<signed char, int, short>*/,
0/*::reduce::rows<signed char, int, int>*/,
0/*::reduce::rows<signed char, float, float>*/,
0/*::reduce::rows<signed char, double, double>*/
0/*::reduce::rows<signed char, double, double>*/,
},
{
0/*::reduce::rows<unsigned short, int, unsigned char>*/,
@ -589,7 +747,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
0/*::reduce::rows<unsigned short, int, short>*/,
::reduce::rows<unsigned short, int, int>,
::reduce::rows<unsigned short, float, float>,
::reduce::rows<unsigned short, double, double>
::reduce::rows<unsigned short, double, double>,
},
{
0/*::reduce::rows<short, int, unsigned char>*/,
@ -598,7 +756,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
::reduce::rows<short, int, short>,
::reduce::rows<short, int, int>,
::reduce::rows<short, float, float>,
::reduce::rows<short, double, double>
::reduce::rows<short, double, double>,
},
{
0/*::reduce::rows<int, int, unsigned char>*/,
@ -607,7 +765,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
0/*::reduce::rows<int, int, short>*/,
::reduce::rows<int, int, int>,
::reduce::rows<int, float, float>,
::reduce::rows<int, double, double>
::reduce::rows<int, double, double>,
},
{
0/*::reduce::rows<float, float, unsigned char>*/,
@ -616,7 +774,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
0/*::reduce::rows<float, float, short>*/,
0/*::reduce::rows<float, float, int>*/,
::reduce::rows<float, float, float>,
::reduce::rows<float, double, double>
::reduce::rows<float, double, double>,
},
{
0/*::reduce::rows<double, double, unsigned char>*/,
@ -625,9 +783,10 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
0/*::reduce::rows<double, double, short>*/,
0/*::reduce::rows<double, double, int>*/,
0/*::reduce::rows<double, double, float>*/,
::reduce::rows<double, double, double>
::reduce::rows<double, double, double>,
}
};
#endif
const func_t func = funcs[src.depth()][dst.depth()];
@ -639,6 +798,74 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
else
{
typedef void (*func_t)(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
#ifdef OPENCV_TINY_GPU_MODULE
static const func_t funcs[7][7] =
{
{
::reduce::cols<unsigned char, int, unsigned char>,
0/*::reduce::cols<unsigned char, int, signed char>*/,
0/*::reduce::cols<unsigned char, int, unsigned short>*/,
0/*::reduce::cols<unsigned char, int, short>*/,
0/*::reduce::cols<unsigned char, int, int>*/,
::reduce::cols<unsigned char, float, float>,
0/*::reduce::cols<unsigned char, double, double>*/,
},
{
0/*::reduce::cols<signed char, int, unsigned char>*/,
0/*::reduce::cols<signed char, int, signed char>*/,
0/*::reduce::cols<signed char, int, unsigned short>*/,
0/*::reduce::cols<signed char, int, short>*/,
0/*::reduce::cols<signed char, int, int>*/,
0/*::reduce::cols<signed char, float, float>*/,
0/*::reduce::cols<signed char, double, double>*/,
},
{
0/*::reduce::cols<unsigned short, int, unsigned char>*/,
0/*::reduce::cols<unsigned short, int, signed char>*/,
0/*::reduce::cols<unsigned short, int, unsigned short>*/,
0/*::reduce::cols<unsigned short, int, short>*/,
0/*::reduce::cols<unsigned short, int, int>*/,
0/*::reduce::cols<unsigned short, float, float>*/,
0/*::reduce::cols<unsigned short, double, double>*/,
},
{
0/*::reduce::cols<short, int, unsigned char>*/,
0/*::reduce::cols<short, int, signed char>*/,
0/*::reduce::cols<short, int, unsigned short>*/,
0/*::reduce::cols<short, int, short>*/,
0/*::reduce::cols<short, int, int>*/,
0/*::reduce::cols<short, float, float>*/,
0/*::reduce::cols<short, double, double>*/,
},
{
0/*::reduce::cols<int, int, unsigned char>*/,
0/*::reduce::cols<int, int, signed char>*/,
0/*::reduce::cols<int, int, unsigned short>*/,
0/*::reduce::cols<int, int, short>*/,
0/*::reduce::cols<int, int, int>*/,
0/*::reduce::cols<int, float, float>*/,
0/*::reduce::cols<int, double, double>*/,
},
{
0/*::reduce::cols<float, float, unsigned char>*/,
0/*::reduce::cols<float, float, signed char>*/,
0/*::reduce::cols<float, float, unsigned short>*/,
0/*::reduce::cols<float, float, short>*/,
0/*::reduce::cols<float, float, int>*/,
::reduce::cols<float, float, float>,
0/*::reduce::cols<float, double, double>*/,
},
{
0/*::reduce::cols<double, double, unsigned char>*/,
0/*::reduce::cols<double, double, signed char>*/,
0/*::reduce::cols<double, double, unsigned short>*/,
0/*::reduce::cols<double, double, short>*/,
0/*::reduce::cols<double, double, int>*/,
0/*::reduce::cols<double, double, float>*/,
0/*::reduce::cols<double, double, double>*/,
}
};
#else
static const func_t funcs[7][7] =
{
{
@ -648,7 +875,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
0/*::reduce::cols<unsigned char, int, short>*/,
::reduce::cols<unsigned char, int, int>,
::reduce::cols<unsigned char, float, float>,
::reduce::cols<unsigned char, double, double>
::reduce::cols<unsigned char, double, double>,
},
{
0/*::reduce::cols<signed char, int, unsigned char>*/,
@ -657,7 +884,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
0/*::reduce::cols<signed char, int, short>*/,
0/*::reduce::cols<signed char, int, int>*/,
0/*::reduce::cols<signed char, float, float>*/,
0/*::reduce::cols<signed char, double, double>*/
0/*::reduce::cols<signed char, double, double>*/,
},
{
0/*::reduce::cols<unsigned short, int, unsigned char>*/,
@ -666,7 +893,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
0/*::reduce::cols<unsigned short, int, short>*/,
::reduce::cols<unsigned short, int, int>,
::reduce::cols<unsigned short, float, float>,
::reduce::cols<unsigned short, double, double>
::reduce::cols<unsigned short, double, double>,
},
{
0/*::reduce::cols<short, int, unsigned char>*/,
@ -675,7 +902,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
::reduce::cols<short, int, short>,
::reduce::cols<short, int, int>,
::reduce::cols<short, float, float>,
::reduce::cols<short, double, double>
::reduce::cols<short, double, double>,
},
{
0/*::reduce::cols<int, int, unsigned char>*/,
@ -684,7 +911,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
0/*::reduce::cols<int, int, short>*/,
::reduce::cols<int, int, int>,
::reduce::cols<int, float, float>,
::reduce::cols<int, double, double>
::reduce::cols<int, double, double>,
},
{
0/*::reduce::cols<float, float, unsigned char>*/,
@ -693,7 +920,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
0/*::reduce::cols<float, float, short>*/,
0/*::reduce::cols<float, float, int>*/,
::reduce::cols<float, float, float>,
::reduce::cols<float, double, double>
::reduce::cols<float, double, double>,
},
{
0/*::reduce::cols<double, double, unsigned char>*/,
@ -702,9 +929,10 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
0/*::reduce::cols<double, double, short>*/,
0/*::reduce::cols<double, double, int>*/,
0/*::reduce::cols<double, double, float>*/,
::reduce::cols<double, double, double>
::reduce::cols<double, double, double>,
}
};
#endif
const func_t func = funcs[src.depth()][dst.depth()];

Loading…
Cancel
Save