diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu index e277d829a1..1f94f6a5c3 100644 --- a/modules/gpu/src/cuda/element_operations.cu +++ b/modules/gpu/src/cuda/element_operations.cu @@ -234,6 +234,7 @@ namespace arithm } template void addMat<uchar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void addMat<uchar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat<uchar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat<uchar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -278,7 +279,9 @@ namespace arithm //template void addMat<float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat<float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat<float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif template void addMat<float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void addMat<float, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat<double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -288,6 +291,7 @@ namespace arithm //template void addMat<double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat<double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat<double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -329,6 +333,7 @@ namespace arithm } template void addScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void addScalar<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -373,7 +378,9 @@ namespace arithm //template void addScalar<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif template void addScalar<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void addScalar<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -383,6 +390,7 @@ namespace arithm //template void addScalar<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -469,6 +477,7 @@ namespace arithm } template void subMat<uchar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void subMat<uchar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat<uchar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat<uchar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -513,7 +522,9 @@ namespace arithm //template void subMat<float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat<float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat<float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif template void subMat<float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void subMat<float, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat<double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -523,6 +534,7 @@ namespace arithm //template void subMat<double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat<double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat<double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -542,6 +554,7 @@ namespace arithm } template void subScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void subScalar<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -586,7 +599,9 @@ namespace arithm //template void subScalar<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif template void subScalar<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void subScalar<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -596,6 +611,7 @@ namespace arithm //template void subScalar<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -700,6 +716,7 @@ namespace arithm } template void mulMat<uchar, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void mulMat<uchar, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat<uchar, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat<uchar, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); @@ -744,7 +761,9 @@ namespace arithm //template void mulMat<float, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat<float, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat<float, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#endif template void mulMat<float, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void mulMat<float, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat<double, double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); @@ -754,6 +773,7 @@ namespace arithm //template void mulMat<double, double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat<double, double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat<double, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -791,6 +811,7 @@ namespace arithm } template void mulScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void mulScalar<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); @@ -835,7 +856,9 @@ namespace arithm //template void mulScalar<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#endif template void mulScalar<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void mulScalar<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); @@ -845,6 +868,7 @@ namespace arithm //template void mulScalar<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -968,6 +992,7 @@ namespace arithm } template void divMat<uchar, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void divMat<uchar, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat<uchar, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat<uchar, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); @@ -1012,7 +1037,9 @@ namespace arithm //template void divMat<float, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat<float, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat<float, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#endif template void divMat<float, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void divMat<float, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat<double, double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); @@ -1022,6 +1049,7 @@ namespace arithm //template void divMat<double, double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat<double, double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat<double, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1037,6 +1065,7 @@ namespace arithm } template void divScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void divScalar<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); @@ -1081,7 +1110,9 @@ namespace arithm //template void divScalar<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#endif template void divScalar<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void divScalar<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); @@ -1091,6 +1122,7 @@ namespace arithm //template void divScalar<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1128,6 +1160,7 @@ namespace arithm } template void divInv<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void divInv<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); @@ -1172,7 +1205,9 @@ namespace arithm //template void divInv<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#endif template void divInv<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void divInv<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); @@ -1182,6 +1217,7 @@ namespace arithm //template void divInv<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1278,12 +1314,16 @@ namespace arithm } template void absDiffMat<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void absDiffMat<schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffMat<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffMat<short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffMat<int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void absDiffMat<float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void absDiffMat<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1323,12 +1363,16 @@ namespace arithm } template void absDiffScalar<uchar, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void absDiffScalar<schar, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffScalar<ushort, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffScalar<short, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffScalar<int, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void absDiffScalar<float, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void absDiffScalar<double, double>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1349,13 +1393,17 @@ namespace arithm transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, abs_func<T>(), WithOutMask(), stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void absMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void absMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void absMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void absMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void absMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif template void absMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void absMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1390,13 +1438,17 @@ namespace arithm transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, Sqr<T>(), WithOutMask(), stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void sqrMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif template void sqrMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void sqrMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1417,13 +1469,17 @@ namespace arithm transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, sqrt_func<T>(), WithOutMask(), stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void sqrtMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrtMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrtMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrtMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrtMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif template void sqrtMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void sqrtMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1444,13 +1500,17 @@ namespace arithm transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, log_func<T>(), WithOutMask(), stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void logMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void logMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void logMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void logMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void logMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif template void logMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void logMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1486,13 +1546,17 @@ namespace arithm transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, Exp<T>(), WithOutMask(), stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void expMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void expMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void expMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void expMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void expMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif template void expMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void expMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////////////////// @@ -1620,36 +1684,52 @@ namespace arithm } template void cmpMatEq<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatEq<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatEq<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatEq<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatEq<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatEq<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatEq<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatNe<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatNe<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatNe<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatNe<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatNe<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatNe<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatNe<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatLt<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatLt<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatLt<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatLt<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatLt<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatLt<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatLt<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatLe<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatLe<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatLe<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatLe<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatLe<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatLe<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatLe<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////////////////// @@ -1824,52 +1904,76 @@ namespace arithm } template void cmpScalarEq<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarEq<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarEq<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarEq<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarEq<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarEq<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarEq<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarNe<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarNe<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarNe<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarNe<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarNe<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarNe<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarNe<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarLt<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarLt<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarLt<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarLt<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarLt<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarLt<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarLt<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarLe<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarLe<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarLe<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarLe<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarLe<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarLe<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarLe<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarGt<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarGt<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarGt<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarGt<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarGt<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarGt<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarGt<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarGe<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarGe<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarGe<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarGe<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarGe<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarGe<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarGe<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////////////////// @@ -1981,19 +2085,25 @@ namespace arithm } template void bitScalarAnd<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void bitScalarAnd<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarAnd<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarAnd<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void bitScalarOr<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void bitScalarOr<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarOr<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarOr<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void bitScalarXor<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void bitScalarXor<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarXor<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarXor<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -2067,12 +2177,16 @@ namespace arithm } template void minMat<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void minMat<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void minMat<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void minMat<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void minMat<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void minMat<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void minMat<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template <typename T> void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream) { @@ -2080,12 +2194,16 @@ namespace arithm } template void minScalar<uchar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void minScalar<schar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void minScalar<ushort>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void minScalar<short >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void minScalar<int >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void minScalar<float >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void minScalar<double>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -2159,12 +2277,16 @@ namespace arithm } template void maxMat<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void maxMat<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void maxMat<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void maxMat<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void maxMat<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void maxMat<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void maxMat<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template <typename T> void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream) { @@ -2172,12 +2294,16 @@ namespace arithm } template void maxScalar<uchar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void maxScalar<schar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void maxScalar<ushort>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void maxScalar<short >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void maxScalar<int >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void maxScalar<float >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void maxScalar<double>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -2233,12 +2359,16 @@ namespace arithm } template void threshold<uchar>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void threshold<schar>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); template void threshold<ushort>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); template void threshold<short>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); template void threshold<int>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); +#endif template void threshold<float>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void threshold<double>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -2312,13 +2442,17 @@ namespace arithm transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, PowOp<T>(power), WithOutMask(), stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void pow<uchar>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); template void pow<schar>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); template void pow<short>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); template void pow<ushort>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); template void pow<int>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); +#endif template void pow<float>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void pow<double>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// diff --git a/modules/gpu/src/element_operations.cpp b/modules/gpu/src/element_operations.cpp index fa98520ff8..bd8ca81bf1 100644 --- a/modules/gpu/src/element_operations.cpp +++ b/modules/gpu/src/element_operations.cpp @@ -275,6 +275,75 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + addMat<unsigned char, unsigned char>, + 0/*addMat<unsigned char, signed char>*/, + 0/*addMat<unsigned char, unsigned short>*/, + 0/*addMat<unsigned char, short>*/, + 0/*addMat<unsigned char, int>*/, + 0/*addMat<unsigned char, float>*/, + 0/*addMat<unsigned char, double>*/, + }, + { + 0/*addMat<signed char, unsigned char>*/, + 0/*addMat<signed char, signed char>*/, + 0/*addMat<signed char, unsigned short>*/, + 0/*addMat<signed char, short>*/, + 0/*addMat<signed char, int>*/, + 0/*addMat<signed char, float>*/, + 0/*addMat<signed char, double>*/, + }, + { + 0 /*addMat<unsigned short, unsigned char>*/, + 0 /*addMat<unsigned short, signed char>*/, + 0/*addMat<unsigned short, unsigned short>*/, + 0/*addMat<unsigned short, short>*/, + 0/*addMat<unsigned short, int>*/, + 0/*addMat<unsigned short, float>*/, + 0/*addMat<unsigned short, double>*/, + }, + { + 0 /*addMat<short, unsigned char>*/, + 0 /*addMat<short, signed char>*/, + 0/*addMat<short, unsigned short>*/, + 0/*addMat<short, short>*/, + 0/*addMat<short, int>*/, + 0/*addMat<short, float>*/, + 0/*addMat<short, double>*/, + }, + { + 0 /*addMat<int, unsigned char>*/, + 0 /*addMat<int, signed char>*/, + 0 /*addMat<int, unsigned short>*/, + 0 /*addMat<int, short>*/, + 0/*addMat<int, int>*/, + 0/*addMat<int, float>*/, + 0/*addMat<int, double>*/, + }, + { + 0 /*addMat<float, unsigned char>*/, + 0 /*addMat<float, signed char>*/, + 0 /*addMat<float, unsigned short>*/, + 0 /*addMat<float, short>*/, + 0 /*addMat<float, int>*/, + addMat<float, float>, + 0/*addMat<float, double>*/, + }, + { + 0 /*addMat<double, unsigned char>*/, + 0 /*addMat<double, signed char>*/, + 0 /*addMat<double, unsigned short>*/, + 0 /*addMat<double, short>*/, + 0 /*addMat<double, int>*/, + 0 /*addMat<double, float>*/, + 0/*addMat<double, double>*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -284,7 +353,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu addMat<unsigned char, short>, addMat<unsigned char, int>, addMat<unsigned char, float>, - addMat<unsigned char, double> + addMat<unsigned char, double>, }, { addMat<signed char, unsigned char>, @@ -293,7 +362,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu addMat<signed char, short>, addMat<signed char, int>, addMat<signed char, float>, - addMat<signed char, double> + addMat<signed char, double>, }, { 0 /*addMat<unsigned short, unsigned char>*/, @@ -302,7 +371,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu addMat<unsigned short, short>, addMat<unsigned short, int>, addMat<unsigned short, float>, - addMat<unsigned short, double> + addMat<unsigned short, double>, }, { 0 /*addMat<short, unsigned char>*/, @@ -311,7 +380,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu addMat<short, short>, addMat<short, int>, addMat<short, float>, - addMat<short, double> + addMat<short, double>, }, { 0 /*addMat<int, unsigned char>*/, @@ -320,7 +389,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu 0 /*addMat<int, short>*/, addMat<int, int>, addMat<int, float>, - addMat<int, double> + addMat<int, double>, }, { 0 /*addMat<float, unsigned char>*/, @@ -329,7 +398,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu 0 /*addMat<float, short>*/, 0 /*addMat<float, int>*/, addMat<float, float>, - addMat<float, double> + addMat<float, double>, }, { 0 /*addMat<double, unsigned char>*/, @@ -338,9 +407,10 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu 0 /*addMat<double, short>*/, 0 /*addMat<double, int>*/, 0 /*addMat<double, float>*/, - addMat<double, double> + addMat<double, double>, } }; +#endif if (dtype < 0) dtype = src1.depth(); @@ -421,6 +491,75 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + addScalar<unsigned char, float, unsigned char>, + 0/*addScalar<unsigned char, float, signed char>*/, + 0/*addScalar<unsigned char, float, unsigned short>*/, + 0/*addScalar<unsigned char, float, short>*/, + 0/*addScalar<unsigned char, float, int>*/, + 0/*addScalar<unsigned char, float, float>*/, + 0/*addScalar<unsigned char, double, double>*/, + }, + { + 0/*addScalar<signed char, float, unsigned char>*/, + 0/*addScalar<signed char, float, signed char>*/, + 0/*addScalar<signed char, float, unsigned short>*/, + 0/*addScalar<signed char, float, short>*/, + 0/*addScalar<signed char, float, int>*/, + 0/*addScalar<signed char, float, float>*/, + 0/*addScalar<signed char, double, double>*/, + }, + { + 0 /*addScalar<unsigned short, float, unsigned char>*/, + 0 /*addScalar<unsigned short, float, signed char>*/, + 0/*addScalar<unsigned short, float, unsigned short>*/, + 0/*addScalar<unsigned short, float, short>*/, + 0/*addScalar<unsigned short, float, int>*/, + 0/*addScalar<unsigned short, float, float>*/, + 0/*addScalar<unsigned short, double, double>*/, + }, + { + 0 /*addScalar<short, float, unsigned char>*/, + 0 /*addScalar<short, float, signed char>*/, + 0/*addScalar<short, float, unsigned short>*/, + 0/*addScalar<short, float, short>*/, + 0/*addScalar<short, float, int>*/, + 0/*addScalar<short, float, float>*/, + 0/*addScalar<short, double, double>*/, + }, + { + 0 /*addScalar<int, float, unsigned char>*/, + 0 /*addScalar<int, float, signed char>*/, + 0 /*addScalar<int, float, unsigned short>*/, + 0 /*addScalar<int, float, short>*/, + 0/*addScalar<int, float, int>*/, + 0/*addScalar<int, float, float>*/, + 0/*addScalar<int, double, double>*/, + }, + { + 0 /*addScalar<float, float, unsigned char>*/, + 0 /*addScalar<float, float, signed char>*/, + 0 /*addScalar<float, float, unsigned short>*/, + 0 /*addScalar<float, float, short>*/, + 0 /*addScalar<float, float, int>*/, + addScalar<float, float, float>, + 0/*addScalar<float, double, double>*/, + }, + { + 0 /*addScalar<double, double, unsigned char>*/, + 0 /*addScalar<double, double, signed char>*/, + 0 /*addScalar<double, double, unsigned short>*/, + 0 /*addScalar<double, double, short>*/, + 0 /*addScalar<double, double, int>*/, + 0 /*addScalar<double, double, float>*/, + 0/*addScalar<double, double, double>*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -430,7 +569,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat addScalar<unsigned char, float, short>, addScalar<unsigned char, float, int>, addScalar<unsigned char, float, float>, - addScalar<unsigned char, double, double> + addScalar<unsigned char, double, double>, }, { addScalar<signed char, float, unsigned char>, @@ -439,7 +578,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat addScalar<signed char, float, short>, addScalar<signed char, float, int>, addScalar<signed char, float, float>, - addScalar<signed char, double, double> + addScalar<signed char, double, double>, }, { 0 /*addScalar<unsigned short, float, unsigned char>*/, @@ -448,7 +587,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat addScalar<unsigned short, float, short>, addScalar<unsigned short, float, int>, addScalar<unsigned short, float, float>, - addScalar<unsigned short, double, double> + addScalar<unsigned short, double, double>, }, { 0 /*addScalar<short, float, unsigned char>*/, @@ -457,7 +596,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat addScalar<short, float, short>, addScalar<short, float, int>, addScalar<short, float, float>, - addScalar<short, double, double> + addScalar<short, double, double>, }, { 0 /*addScalar<int, float, unsigned char>*/, @@ -466,7 +605,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat 0 /*addScalar<int, float, short>*/, addScalar<int, float, int>, addScalar<int, float, float>, - addScalar<int, double, double> + addScalar<int, double, double>, }, { 0 /*addScalar<float, float, unsigned char>*/, @@ -475,7 +614,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat 0 /*addScalar<float, float, short>*/, 0 /*addScalar<float, float, int>*/, addScalar<float, float, float>, - addScalar<float, double, double> + addScalar<float, double, double>, }, { 0 /*addScalar<double, double, unsigned char>*/, @@ -484,9 +623,10 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat 0 /*addScalar<double, double, short>*/, 0 /*addScalar<double, double, int>*/, 0 /*addScalar<double, double, float>*/, - addScalar<double, double, double> + addScalar<double, double, double>, } }; +#endif typedef void (*npp_func_t)(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream); static const npp_func_t npp_funcs[7][4] = @@ -555,6 +695,75 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + subMat<unsigned char, unsigned char>, + 0/*subMat<unsigned char, signed char>*/, + 0/*subMat<unsigned char, unsigned short>*/, + 0/*subMat<unsigned char, short>*/, + 0/*subMat<unsigned char, int>*/, + 0/*subMat<unsigned char, float>*/, + 0/*subMat<unsigned char, double>*/, + }, + { + 0/*subMat<signed char, unsigned char>*/, + 0/*subMat<signed char, signed char>*/, + 0/*subMat<signed char, unsigned short>*/, + 0/*subMat<signed char, short>*/, + 0/*subMat<signed char, int>*/, + 0/*subMat<signed char, float>*/, + 0/*subMat<signed char, double>*/, + }, + { + 0 /*subMat<unsigned short, unsigned char>*/, + 0 /*subMat<unsigned short, signed char>*/, + 0/*subMat<unsigned short, unsigned short>*/, + 0/*subMat<unsigned short, short>*/, + 0/*subMat<unsigned short, int>*/, + 0/*subMat<unsigned short, float>*/, + 0/*subMat<unsigned short, double>*/, + }, + { + 0 /*subMat<short, unsigned char>*/, + 0 /*subMat<short, signed char>*/, + 0/*subMat<short, unsigned short>*/, + 0/*subMat<short, short>*/, + 0/*subMat<short, int>*/, + 0/*subMat<short, float>*/, + 0/*subMat<short, double>*/, + }, + { + 0 /*subMat<int, unsigned char>*/, + 0 /*subMat<int, signed char>*/, + 0 /*subMat<int, unsigned short>*/, + 0 /*subMat<int, short>*/, + 0/*subMat<int, int>*/, + 0/*subMat<int, float>*/, + 0/*subMat<int, double>*/, + }, + { + 0 /*subMat<float, unsigned char>*/, + 0 /*subMat<float, signed char>*/, + 0 /*subMat<float, unsigned short>*/, + 0 /*subMat<float, short>*/, + 0 /*subMat<float, int>*/, + subMat<float, float>, + 0/*subMat<float, double>*/, + }, + { + 0 /*subMat<double, unsigned char>*/, + 0 /*subMat<double, signed char>*/, + 0 /*subMat<double, unsigned short>*/, + 0 /*subMat<double, short>*/, + 0 /*subMat<double, int>*/, + 0 /*subMat<double, float>*/, + 0/*subMat<double, double>*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -564,7 +773,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons subMat<unsigned char, short>, subMat<unsigned char, int>, subMat<unsigned char, float>, - subMat<unsigned char, double> + subMat<unsigned char, double>, }, { subMat<signed char, unsigned char>, @@ -573,7 +782,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons subMat<signed char, short>, subMat<signed char, int>, subMat<signed char, float>, - subMat<signed char, double> + subMat<signed char, double>, }, { 0 /*subMat<unsigned short, unsigned char>*/, @@ -582,7 +791,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons subMat<unsigned short, short>, subMat<unsigned short, int>, subMat<unsigned short, float>, - subMat<unsigned short, double> + subMat<unsigned short, double>, }, { 0 /*subMat<short, unsigned char>*/, @@ -591,7 +800,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons subMat<short, short>, subMat<short, int>, subMat<short, float>, - subMat<short, double> + subMat<short, double>, }, { 0 /*subMat<int, unsigned char>*/, @@ -600,7 +809,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons 0 /*subMat<int, short>*/, subMat<int, int>, subMat<int, float>, - subMat<int, double> + subMat<int, double>, }, { 0 /*subMat<float, unsigned char>*/, @@ -609,7 +818,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons 0 /*subMat<float, short>*/, 0 /*subMat<float, int>*/, subMat<float, float>, - subMat<float, double> + subMat<float, double>, }, { 0 /*subMat<double, unsigned char>*/, @@ -618,9 +827,10 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons 0 /*subMat<double, short>*/, 0 /*subMat<double, int>*/, 0 /*subMat<double, float>*/, - subMat<double, double> + subMat<double, double>, } }; +#endif if (dtype < 0) dtype = src1.depth(); @@ -701,6 +911,75 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + subScalar<unsigned char, float, unsigned char>, + 0/*subScalar<unsigned char, float, signed char>*/, + 0/*subScalar<unsigned char, float, unsigned short>*/, + 0/*subScalar<unsigned char, float, short>*/, + 0/*subScalar<unsigned char, float, int>*/, + 0/*subScalar<unsigned char, float, float>*/, + 0/*subScalar<unsigned char, double, double>*/, + }, + { + 0/*subScalar<signed char, float, unsigned char>*/, + 0/*subScalar<signed char, float, signed char>*/, + 0/*subScalar<signed char, float, unsigned short>*/, + 0/*subScalar<signed char, float, short>*/, + 0/*subScalar<signed char, float, int>*/, + 0/*subScalar<signed char, float, float>*/, + 0/*subScalar<signed char, double, double>*/, + }, + { + 0 /*subScalar<unsigned short, float, unsigned char>*/, + 0 /*subScalar<unsigned short, float, signed char>*/, + 0/*subScalar<unsigned short, float, unsigned short>*/, + 0/*subScalar<unsigned short, float, short>*/, + 0/*subScalar<unsigned short, float, int>*/, + 0/*subScalar<unsigned short, float, float>*/, + 0/*subScalar<unsigned short, double, double>*/, + }, + { + 0 /*subScalar<short, float, unsigned char>*/, + 0 /*subScalar<short, float, signed char>*/, + 0/*subScalar<short, float, unsigned short>*/, + 0/*subScalar<short, float, short>*/, + 0/*subScalar<short, float, int>*/, + 0/*subScalar<short, float, float>*/, + 0/*subScalar<short, double, double>*/, + }, + { + 0 /*subScalar<int, float, unsigned char>*/, + 0 /*subScalar<int, float, signed char>*/, + 0 /*subScalar<int, float, unsigned short>*/, + 0 /*subScalar<int, float, short>*/, + 0/*subScalar<int, float, int>*/, + 0/*subScalar<int, float, float>*/, + 0/*subScalar<int, double, double>*/, + }, + { + 0 /*subScalar<float, float, unsigned char>*/, + 0 /*subScalar<float, float, signed char>*/, + 0 /*subScalar<float, float, unsigned short>*/, + 0 /*subScalar<float, float, short>*/, + 0 /*subScalar<float, float, int>*/, + subScalar<float, float, float>, + 0/*subScalar<float, double, double>*/, + }, + { + 0 /*subScalar<double, double, unsigned char>*/, + 0 /*subScalar<double, double, signed char>*/, + 0 /*subScalar<double, double, unsigned short>*/, + 0 /*subScalar<double, double, short>*/, + 0 /*subScalar<double, double, int>*/, + 0 /*subScalar<double, double, float>*/, + 0/*subScalar<double, double, double>*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -710,7 +989,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G subScalar<unsigned char, float, short>, subScalar<unsigned char, float, int>, subScalar<unsigned char, float, float>, - subScalar<unsigned char, double, double> + subScalar<unsigned char, double, double>, }, { subScalar<signed char, float, unsigned char>, @@ -719,7 +998,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G subScalar<signed char, float, short>, subScalar<signed char, float, int>, subScalar<signed char, float, float>, - subScalar<signed char, double, double> + subScalar<signed char, double, double>, }, { 0 /*subScalar<unsigned short, float, unsigned char>*/, @@ -728,7 +1007,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G subScalar<unsigned short, float, short>, subScalar<unsigned short, float, int>, subScalar<unsigned short, float, float>, - subScalar<unsigned short, double, double> + subScalar<unsigned short, double, double>, }, { 0 /*subScalar<short, float, unsigned char>*/, @@ -737,7 +1016,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G subScalar<short, float, short>, subScalar<short, float, int>, subScalar<short, float, float>, - subScalar<short, double, double> + subScalar<short, double, double>, }, { 0 /*subScalar<int, float, unsigned char>*/, @@ -746,7 +1025,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G 0 /*subScalar<int, float, short>*/, subScalar<int, float, int>, subScalar<int, float, float>, - subScalar<int, double, double> + subScalar<int, double, double>, }, { 0 /*subScalar<float, float, unsigned char>*/, @@ -755,7 +1034,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G 0 /*subScalar<float, float, short>*/, 0 /*subScalar<float, float, int>*/, subScalar<float, float, float>, - subScalar<float, double, double> + subScalar<float, double, double>, }, { 0 /*subScalar<double, double, unsigned char>*/, @@ -764,9 +1043,10 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G 0 /*subScalar<double, double, short>*/, 0 /*subScalar<double, double, int>*/, 0 /*subScalar<double, double, float>*/, - subScalar<double, double, double> + subScalar<double, double, double>, } }; +#endif typedef void (*npp_func_t)(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream); static const npp_func_t npp_funcs[7][4] = @@ -856,6 +1136,75 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub else { typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + mulMat<unsigned char, float, unsigned char>, + 0/*mulMat<unsigned char, float, signed char>*/, + 0/*mulMat<unsigned char, float, unsigned short>*/, + 0/*mulMat<unsigned char, float, short>*/, + 0/*mulMat<unsigned char, float, int>*/, + 0/*mulMat<unsigned char, float, float>*/, + 0/*mulMat<unsigned char, double, double>*/, + }, + { + 0/*mulMat<signed char, float, unsigned char>*/, + 0/*mulMat<signed char, float, signed char>*/, + 0/*mulMat<signed char, float, unsigned short>*/, + 0/*mulMat<signed char, float, short>*/, + 0/*mulMat<signed char, float, int>*/, + 0/*mulMat<signed char, float, float>*/, + 0/*mulMat<signed char, double, double>*/, + }, + { + 0 /*mulMat<unsigned short, float, unsigned char>*/, + 0 /*mulMat<unsigned short, float, signed char>*/, + 0/*mulMat<unsigned short, float, unsigned short>*/, + 0/*mulMat<unsigned short, float, short>*/, + 0/*mulMat<unsigned short, float, int>*/, + 0/*mulMat<unsigned short, float, float>*/, + 0/*mulMat<unsigned short, double, double>*/, + }, + { + 0 /*mulMat<short, float, unsigned char>*/, + 0 /*mulMat<short, float, signed char>*/, + 0/*mulMat<short, float, unsigned short>*/, + 0/*mulMat<short, float, short>*/, + 0/*mulMat<short, float, int>*/, + 0/*mulMat<short, float, float>*/, + 0/*mulMat<short, double, double>*/, + }, + { + 0 /*mulMat<int, float, unsigned char>*/, + 0 /*mulMat<int, float, signed char>*/, + 0 /*mulMat<int, float, unsigned short>*/, + 0 /*mulMat<int, float, short>*/, + 0/*mulMat<int, float, int>*/, + 0/*mulMat<int, float, float>*/, + 0/*mulMat<int, double, double>*/, + }, + { + 0 /*mulMat<float, float, unsigned char>*/, + 0 /*mulMat<float, float, signed char>*/, + 0 /*mulMat<float, float, unsigned short>*/, + 0 /*mulMat<float, float, short>*/, + 0 /*mulMat<float, float, int>*/, + mulMat<float, float, float>, + 0/*mulMat<float, double, double>*/, + }, + { + 0 /*mulMat<double, double, unsigned char>*/, + 0 /*mulMat<double, double, signed char>*/, + 0 /*mulMat<double, double, unsigned short>*/, + 0 /*mulMat<double, double, short>*/, + 0 /*mulMat<double, double, int>*/, + 0 /*mulMat<double, double, float>*/, + 0/*mulMat<double, double, double>*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -865,7 +1214,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub mulMat<unsigned char, float, short>, mulMat<unsigned char, float, int>, mulMat<unsigned char, float, float>, - mulMat<unsigned char, double, double> + mulMat<unsigned char, double, double>, }, { mulMat<signed char, float, unsigned char>, @@ -874,7 +1223,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub mulMat<signed char, float, short>, mulMat<signed char, float, int>, mulMat<signed char, float, float>, - mulMat<signed char, double, double> + mulMat<signed char, double, double>, }, { 0 /*mulMat<unsigned short, float, unsigned char>*/, @@ -883,7 +1232,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub mulMat<unsigned short, float, short>, mulMat<unsigned short, float, int>, mulMat<unsigned short, float, float>, - mulMat<unsigned short, double, double> + mulMat<unsigned short, double, double>, }, { 0 /*mulMat<short, float, unsigned char>*/, @@ -892,7 +1241,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub mulMat<short, float, short>, mulMat<short, float, int>, mulMat<short, float, float>, - mulMat<short, double, double> + mulMat<short, double, double>, }, { 0 /*mulMat<int, float, unsigned char>*/, @@ -901,7 +1250,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub 0 /*mulMat<int, float, short>*/, mulMat<int, float, int>, mulMat<int, float, float>, - mulMat<int, double, double> + mulMat<int, double, double>, }, { 0 /*mulMat<float, float, unsigned char>*/, @@ -910,7 +1259,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub 0 /*mulMat<float, float, short>*/, 0 /*mulMat<float, float, int>*/, mulMat<float, float, float>, - mulMat<float, double, double> + mulMat<float, double, double>, }, { 0 /*mulMat<double, double, unsigned char>*/, @@ -919,9 +1268,10 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub 0 /*mulMat<double, double, short>*/, 0 /*mulMat<double, double, int>*/, 0 /*mulMat<double, double, float>*/, - mulMat<double, double, double> + mulMat<double, double, double>, } }; +#endif if (dtype < 0) dtype = src1.depth(); @@ -965,6 +1315,75 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + mulScalar<unsigned char, float, unsigned char>, + 0/*mulScalar<unsigned char, float, signed char>*/, + 0/*mulScalar<unsigned char, float, unsigned short>*/, + 0/*mulScalar<unsigned char, float, short>*/, + 0/*mulScalar<unsigned char, float, int>*/, + 0/*mulScalar<unsigned char, float, float>*/, + 0/*mulScalar<unsigned char, double, double>*/, + }, + { + 0/*mulScalar<signed char, float, unsigned char>*/, + 0/*mulScalar<signed char, float, signed char>*/, + 0/*mulScalar<signed char, float, unsigned short>*/, + 0/*mulScalar<signed char, float, short>*/, + 0/*mulScalar<signed char, float, int>*/, + 0/*mulScalar<signed char, float, float>*/, + 0/*mulScalar<signed char, double, double>*/, + }, + { + 0 /*mulScalar<unsigned short, float, unsigned char>*/, + 0 /*mulScalar<unsigned short, float, signed char>*/, + 0/*mulScalar<unsigned short, float, unsigned short>*/, + 0/*mulScalar<unsigned short, float, short>*/, + 0/*mulScalar<unsigned short, float, int>*/, + 0/*mulScalar<unsigned short, float, float>*/, + 0/*mulScalar<unsigned short, double, double>*/, + }, + { + 0 /*mulScalar<short, float, unsigned char>*/, + 0 /*mulScalar<short, float, signed char>*/, + 0/*mulScalar<short, float, unsigned short>*/, + 0/*mulScalar<short, float, short>*/, + 0/*mulScalar<short, float, int>*/, + 0/*mulScalar<short, float, float>*/, + 0/*mulScalar<short, double, double>*/, + }, + { + 0 /*mulScalar<int, float, unsigned char>*/, + 0 /*mulScalar<int, float, signed char>*/, + 0 /*mulScalar<int, float, unsigned short>*/, + 0 /*mulScalar<int, float, short>*/, + 0/*mulScalar<int, float, int>*/, + 0/*mulScalar<int, float, float>*/, + 0/*mulScalar<int, double, double>*/, + }, + { + 0 /*mulScalar<float, float, unsigned char>*/, + 0 /*mulScalar<float, float, signed char>*/, + 0 /*mulScalar<float, float, unsigned short>*/, + 0 /*mulScalar<float, float, short>*/, + 0 /*mulScalar<float, float, int>*/, + mulScalar<float, float, float>, + 0/*mulScalar<float, double, double>*/, + }, + { + 0 /*mulScalar<double, double, unsigned char>*/, + 0 /*mulScalar<double, double, signed char>*/, + 0 /*mulScalar<double, double, unsigned short>*/, + 0 /*mulScalar<double, double, short>*/, + 0 /*mulScalar<double, double, int>*/, + 0 /*mulScalar<double, double, float>*/, + 0/*mulScalar<double, double, double>*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -974,7 +1393,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double mulScalar<unsigned char, float, short>, mulScalar<unsigned char, float, int>, mulScalar<unsigned char, float, float>, - mulScalar<unsigned char, double, double> + mulScalar<unsigned char, double, double>, }, { mulScalar<signed char, float, unsigned char>, @@ -983,7 +1402,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double mulScalar<signed char, float, short>, mulScalar<signed char, float, int>, mulScalar<signed char, float, float>, - mulScalar<signed char, double, double> + mulScalar<signed char, double, double>, }, { 0 /*mulScalar<unsigned short, float, unsigned char>*/, @@ -992,7 +1411,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double mulScalar<unsigned short, float, short>, mulScalar<unsigned short, float, int>, mulScalar<unsigned short, float, float>, - mulScalar<unsigned short, double, double> + mulScalar<unsigned short, double, double>, }, { 0 /*mulScalar<short, float, unsigned char>*/, @@ -1001,7 +1420,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double mulScalar<short, float, short>, mulScalar<short, float, int>, mulScalar<short, float, float>, - mulScalar<short, double, double> + mulScalar<short, double, double>, }, { 0 /*mulScalar<int, float, unsigned char>*/, @@ -1010,7 +1429,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double 0 /*mulScalar<int, float, short>*/, mulScalar<int, float, int>, mulScalar<int, float, float>, - mulScalar<int, double, double> + mulScalar<int, double, double>, }, { 0 /*mulScalar<float, float, unsigned char>*/, @@ -1019,7 +1438,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double 0 /*mulScalar<float, float, short>*/, 0 /*mulScalar<float, float, int>*/, mulScalar<float, float, float>, - mulScalar<float, double, double> + mulScalar<float, double, double>, }, { 0 /*mulScalar<double, double, unsigned char>*/, @@ -1028,9 +1447,10 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double 0 /*mulScalar<double, double, short>*/, 0 /*mulScalar<double, double, int>*/, 0 /*mulScalar<double, double, float>*/, - mulScalar<double, double, double> + mulScalar<double, double, double>, } }; +#endif typedef void (*npp_func_t)(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream); static const npp_func_t npp_funcs[7][4] = @@ -1121,6 +1541,75 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double else { typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + divMat<unsigned char, float, unsigned char>, + 0/*divMat<unsigned char, float, signed char>*/, + 0/*divMat<unsigned char, float, unsigned short>*/, + 0/*divMat<unsigned char, float, short>*/, + 0/*divMat<unsigned char, float, int>*/, + 0/*divMat<unsigned char, float, float>*/, + 0/*divMat<unsigned char, double, double>*/, + }, + { + 0/*divMat<signed char, float, unsigned char>*/, + 0/*divMat<signed char, float, signed char>*/, + 0/*divMat<signed char, float, unsigned short>*/, + 0/*divMat<signed char, float, short>*/, + 0/*divMat<signed char, float, int>*/, + 0/*divMat<signed char, float, float>*/, + 0/*divMat<signed char, double, double>*/, + }, + { + 0 /*divMat<unsigned short, float, unsigned char>*/, + 0 /*divMat<unsigned short, float, signed char>*/, + 0/*divMat<unsigned short, float, unsigned short>*/, + 0/*divMat<unsigned short, float, short>*/, + 0/*divMat<unsigned short, float, int>*/, + 0/*divMat<unsigned short, float, float>*/, + 0/*divMat<unsigned short, double, double>*/, + }, + { + 0 /*divMat<short, float, unsigned char>*/, + 0 /*divMat<short, float, signed char>*/, + 0/*divMat<short, float, unsigned short>*/, + 0/*divMat<short, float, short>*/, + 0/*divMat<short, float, int>*/, + 0/*divMat<short, float, float>*/, + 0/*divMat<short, double, double>*/, + }, + { + 0 /*divMat<int, float, unsigned char>*/, + 0 /*divMat<int, float, signed char>*/, + 0 /*divMat<int, float, unsigned short>*/, + 0 /*divMat<int, float, short>*/, + 0/*divMat<int, float, int>*/, + 0/*divMat<int, float, float>*/, + 0/*divMat<int, double, double>*/, + }, + { + 0 /*divMat<float, float, unsigned char>*/, + 0 /*divMat<float, float, signed char>*/, + 0 /*divMat<float, float, unsigned short>*/, + 0 /*divMat<float, float, short>*/, + 0 /*divMat<float, float, int>*/, + divMat<float, float, float>, + 0/*divMat<float, double, double>*/, + }, + { + 0 /*divMat<double, double, unsigned char>*/, + 0 /*divMat<double, double, signed char>*/, + 0 /*divMat<double, double, unsigned short>*/, + 0 /*divMat<double, double, short>*/, + 0 /*divMat<double, double, int>*/, + 0 /*divMat<double, double, float>*/, + 0/*divMat<double, double, double>*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -1130,7 +1619,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double divMat<unsigned char, float, short>, divMat<unsigned char, float, int>, divMat<unsigned char, float, float>, - divMat<unsigned char, double, double> + divMat<unsigned char, double, double>, }, { divMat<signed char, float, unsigned char>, @@ -1139,7 +1628,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double divMat<signed char, float, short>, divMat<signed char, float, int>, divMat<signed char, float, float>, - divMat<signed char, double, double> + divMat<signed char, double, double>, }, { 0 /*divMat<unsigned short, float, unsigned char>*/, @@ -1148,7 +1637,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double divMat<unsigned short, float, short>, divMat<unsigned short, float, int>, divMat<unsigned short, float, float>, - divMat<unsigned short, double, double> + divMat<unsigned short, double, double>, }, { 0 /*divMat<short, float, unsigned char>*/, @@ -1157,7 +1646,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double divMat<short, float, short>, divMat<short, float, int>, divMat<short, float, float>, - divMat<short, double, double> + divMat<short, double, double>, }, { 0 /*divMat<int, float, unsigned char>*/, @@ -1166,7 +1655,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double 0 /*divMat<int, float, short>*/, divMat<int, float, int>, divMat<int, float, float>, - divMat<int, double, double> + divMat<int, double, double>, }, { 0 /*divMat<float, float, unsigned char>*/, @@ -1175,7 +1664,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double 0 /*divMat<float, float, short>*/, 0 /*divMat<float, float, int>*/, divMat<float, float, float>, - divMat<float, double, double> + divMat<float, double, double>, }, { 0 /*divMat<double, double, unsigned char>*/, @@ -1184,9 +1673,10 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double 0 /*divMat<double, double, short>*/, 0 /*divMat<double, double, int>*/, 0 /*divMat<double, double, float>*/, - divMat<double, double, double> + divMat<double, double, double>, } }; +#endif if (dtype < 0) dtype = src1.depth(); @@ -1230,6 +1720,75 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + divScalar<unsigned char, float, unsigned char>, + 0/*divScalar<unsigned char, float, signed char>*/, + 0/*divScalar<unsigned char, float, unsigned short>*/, + 0/*divScalar<unsigned char, float, short>*/, + 0/*divScalar<unsigned char, float, int>*/, + 0/*divScalar<unsigned char, float, float>*/, + 0/*divScalar<unsigned char, double, double>*/, + }, + { + 0/*divScalar<signed char, float, unsigned char>*/, + 0/*divScalar<signed char, float, signed char>*/, + 0/*divScalar<signed char, float, unsigned short>*/, + 0/*divScalar<signed char, float, short>*/, + 0/*divScalar<signed char, float, int>*/, + 0/*divScalar<signed char, float, float>*/, + 0/*divScalar<signed char, double, double>*/, + }, + { + 0 /*divScalar<unsigned short, float, unsigned char>*/, + 0 /*divScalar<unsigned short, float, signed char>*/, + 0/*divScalar<unsigned short, float, unsigned short>*/, + 0/*divScalar<unsigned short, float, short>*/, + 0/*divScalar<unsigned short, float, int>*/, + 0/*divScalar<unsigned short, float, float>*/, + 0/*divScalar<unsigned short, double, double>*/, + }, + { + 0 /*divScalar<short, float, unsigned char>*/, + 0 /*divScalar<short, float, signed char>*/, + 0/*divScalar<short, float, unsigned short>*/, + 0/*divScalar<short, float, short>*/, + 0/*divScalar<short, float, int>*/, + 0/*divScalar<short, float, float>*/, + 0/*divScalar<short, double, double>*/, + }, + { + 0 /*divScalar<int, float, unsigned char>*/, + 0 /*divScalar<int, float, signed char>*/, + 0 /*divScalar<int, float, unsigned short>*/, + 0 /*divScalar<int, float, short>*/, + 0/*divScalar<int, float, int>*/, + 0/*divScalar<int, float, float>*/, + 0/*divScalar<int, double, double>*/, + }, + { + 0 /*divScalar<float, float, unsigned char>*/, + 0 /*divScalar<float, float, signed char>*/, + 0 /*divScalar<float, float, unsigned short>*/, + 0 /*divScalar<float, float, short>*/, + 0 /*divScalar<float, float, int>*/, + divScalar<float, float, float>, + 0/*divScalar<float, double, double>*/, + }, + { + 0 /*divScalar<double, double, unsigned char>*/, + 0 /*divScalar<double, double, signed char>*/, + 0 /*divScalar<double, double, unsigned short>*/, + 0 /*divScalar<double, double, short>*/, + 0 /*divScalar<double, double, int>*/, + 0 /*divScalar<double, double, float>*/, + 0/*divScalar<double, double, double>*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -1239,7 +1798,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc divScalar<unsigned char, float, short>, divScalar<unsigned char, float, int>, divScalar<unsigned char, float, float>, - divScalar<unsigned char, double, double> + divScalar<unsigned char, double, double>, }, { divScalar<signed char, float, unsigned char>, @@ -1248,7 +1807,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc divScalar<signed char, float, short>, divScalar<signed char, float, int>, divScalar<signed char, float, float>, - divScalar<signed char, double, double> + divScalar<signed char, double, double>, }, { 0 /*divScalar<unsigned short, float, unsigned char>*/, @@ -1257,7 +1816,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc divScalar<unsigned short, float, short>, divScalar<unsigned short, float, int>, divScalar<unsigned short, float, float>, - divScalar<unsigned short, double, double> + divScalar<unsigned short, double, double>, }, { 0 /*divScalar<short, float, unsigned char>*/, @@ -1266,7 +1825,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc divScalar<short, float, short>, divScalar<short, float, int>, divScalar<short, float, float>, - divScalar<short, double, double> + divScalar<short, double, double>, }, { 0 /*divScalar<int, float, unsigned char>*/, @@ -1275,7 +1834,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc 0 /*divScalar<int, float, short>*/, divScalar<int, float, int>, divScalar<int, float, float>, - divScalar<int, double, double> + divScalar<int, double, double>, }, { 0 /*divScalar<float, float, unsigned char>*/, @@ -1284,7 +1843,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc 0 /*divScalar<float, float, short>*/, 0 /*divScalar<float, float, int>*/, divScalar<float, float, float>, - divScalar<float, double, double> + divScalar<float, double, double>, }, { 0 /*divScalar<double, double, unsigned char>*/, @@ -1293,9 +1852,10 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc 0 /*divScalar<double, double, short>*/, 0 /*divScalar<double, double, int>*/, 0 /*divScalar<double, double, float>*/, - divScalar<double, double, double> + divScalar<double, double, double>, } }; +#endif typedef void (*npp_func_t)(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream); static const npp_func_t npp_funcs[7][4] = @@ -1359,6 +1919,75 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + divInv<unsigned char, float, unsigned char>, + 0/*divInv<unsigned char, float, signed char>*/, + 0/*divInv<unsigned char, float, unsigned short>*/, + 0/*divInv<unsigned char, float, short>*/, + 0/*divInv<unsigned char, float, int>*/, + 0/*divInv<unsigned char, float, float>*/, + 0/*divInv<unsigned char, double, double>*/, + }, + { + 0/*divInv<signed char, float, unsigned char>*/, + 0/*divInv<signed char, float, signed char>*/, + 0/*divInv<signed char, float, unsigned short>*/, + 0/*divInv<signed char, float, short>*/, + 0/*divInv<signed char, float, int>*/, + 0/*divInv<signed char, float, float>*/, + 0/*divInv<signed char, double, double>*/, + }, + { + 0 /*divInv<unsigned short, float, unsigned char>*/, + 0 /*divInv<unsigned short, float, signed char>*/, + 0/*divInv<unsigned short, float, unsigned short>*/, + 0/*divInv<unsigned short, float, short>*/, + 0/*divInv<unsigned short, float, int>*/, + 0/*divInv<unsigned short, float, float>*/, + 0/*divInv<unsigned short, double, double>*/, + }, + { + 0 /*divInv<short, float, unsigned char>*/, + 0 /*divInv<short, float, signed char>*/, + 0/*divInv<short, float, unsigned short>*/, + 0/*divInv<short, float, short>*/, + 0/*divInv<short, float, int>*/, + 0/*divInv<short, float, float>*/, + 0/*divInv<short, double, double>*/, + }, + { + 0 /*divInv<int, float, unsigned char>*/, + 0 /*divInv<int, float, signed char>*/, + 0 /*divInv<int, float, unsigned short>*/, + 0 /*divInv<int, float, short>*/, + 0/*divInv<int, float, int>*/, + 0/*divInv<int, float, float>*/, + 0/*divInv<int, double, double>*/, + }, + { + 0 /*divInv<float, float, unsigned char>*/, + 0 /*divInv<float, float, signed char>*/, + 0 /*divInv<float, float, unsigned short>*/, + 0 /*divInv<float, float, short>*/, + 0 /*divInv<float, float, int>*/, + divInv<float, float, float>, + 0/*divInv<float, double, double>*/, + }, + { + 0 /*divInv<double, double, unsigned char>*/, + 0 /*divInv<double, double, signed char>*/, + 0 /*divInv<double, double, unsigned short>*/, + 0 /*divInv<double, double, short>*/, + 0 /*divInv<double, double, int>*/, + 0 /*divInv<double, double, float>*/, + 0/*divInv<double, double, double>*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -1368,7 +1997,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St divInv<unsigned char, float, short>, divInv<unsigned char, float, int>, divInv<unsigned char, float, float>, - divInv<unsigned char, double, double> + divInv<unsigned char, double, double>, }, { divInv<signed char, float, unsigned char>, @@ -1377,7 +2006,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St divInv<signed char, float, short>, divInv<signed char, float, int>, divInv<signed char, float, float>, - divInv<signed char, double, double> + divInv<signed char, double, double>, }, { 0 /*divInv<unsigned short, float, unsigned char>*/, @@ -1386,7 +2015,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St divInv<unsigned short, float, short>, divInv<unsigned short, float, int>, divInv<unsigned short, float, float>, - divInv<unsigned short, double, double> + divInv<unsigned short, double, double>, }, { 0 /*divInv<short, float, unsigned char>*/, @@ -1395,7 +2024,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St divInv<short, float, short>, divInv<short, float, int>, divInv<short, float, float>, - divInv<short, double, double> + divInv<short, double, double>, }, { 0 /*divInv<int, float, unsigned char>*/, @@ -1404,7 +2033,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St 0 /*divInv<int, float, short>*/, divInv<int, float, int>, divInv<int, float, float>, - divInv<int, double, double> + divInv<int, double, double>, }, { 0 /*divInv<float, float, unsigned char>*/, @@ -1413,7 +2042,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St 0 /*divInv<float, float, short>*/, 0 /*divInv<float, float, int>*/, divInv<float, float, float>, - divInv<float, double, double> + divInv<float, double, double>, }, { 0 /*divInv<double, double, unsigned char>*/, @@ -1422,9 +2051,10 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St 0 /*divInv<double, double, short>*/, 0 /*divInv<double, double, int>*/, 0 /*divInv<double, double, float>*/, - divInv<double, double, double> + divInv<double, double, double>, } }; +#endif if (dtype < 0) dtype = src.depth(); @@ -1471,6 +2101,19 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + absDiffMat<unsigned char>, + 0/*absDiffMat<signed char>*/, + 0/*absDiffMat<unsigned short>*/, + 0/*absDiffMat<short>*/, + 0/*absDiffMat<int>*/, + absDiffMat<float>, + 0/*absDiffMat<double>*/, + }; +#else static const func_t funcs[] = { absDiffMat<unsigned char>, @@ -1479,8 +2122,9 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea absDiffMat<short>, absDiffMat<int>, absDiffMat<float>, - absDiffMat<double> + absDiffMat<double>, }; +#endif const int depth = src1.depth(); const int cn = src1.channels(); @@ -1556,6 +2200,19 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + absDiffScalar<unsigned char, float>, + 0/*absDiffScalar<signed char, float>*/, + 0/*absDiffScalar<unsigned short, float>*/, + 0/*absDiffScalar<short, float>*/, + 0/*absDiffScalar<int, float>*/, + absDiffScalar<float, float>, + 0/*absDiffScalar<double, double>*/, + }; +#else static const func_t funcs[] = { absDiffScalar<unsigned char, float>, @@ -1564,8 +2221,9 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea absDiffScalar<short, float>, absDiffScalar<int, float>, absDiffScalar<float, float>, - absDiffScalar<double, double> + absDiffScalar<double, double>, }; +#endif const int depth = src1.depth(); @@ -1578,9 +2236,13 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src1.size(), src1.type()); - funcs[depth](src1, src2.val[0], dst, StreamAccessor::getStream(stream)); + func(src1, src2.val[0], dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// @@ -1597,6 +2259,19 @@ void cv::gpu::abs(const GpuMat& src, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + 0/*absMat<unsigned char>*/, + 0/*absMat<signed char>*/, + 0/*absMat<unsigned short>*/, + 0/*absMat<short>*/, + 0/*absMat<int>*/, + absMat<float>, + 0/*absMat<double>*/, + }; +#else static const func_t funcs[] = { absMat<unsigned char>, @@ -1605,8 +2280,9 @@ void cv::gpu::abs(const GpuMat& src, GpuMat& dst, Stream& stream) absMat<short>, absMat<int>, absMat<float>, - absMat<double> + absMat<double>, }; +#endif const int depth = src.depth(); @@ -1619,9 +2295,13 @@ void cv::gpu::abs(const GpuMat& src, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth](src, dst, StreamAccessor::getStream(stream)); + func(src, dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// @@ -1638,6 +2318,19 @@ void cv::gpu::sqr(const GpuMat& src, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + 0/*sqrMat<unsigned char>*/, + 0/*sqrMat<signed char>*/, + 0/*sqrMat<unsigned short>*/, + 0/*sqrMat<short>*/, + 0/*sqrMat<int>*/, + sqrMat<float>, + 0/*sqrMat<double>*/, + }; +#else static const func_t funcs[] = { sqrMat<unsigned char>, @@ -1646,8 +2339,9 @@ void cv::gpu::sqr(const GpuMat& src, GpuMat& dst, Stream& stream) sqrMat<short>, sqrMat<int>, sqrMat<float>, - sqrMat<double> + sqrMat<double>, }; +#endif const int depth = src.depth(); @@ -1660,9 +2354,13 @@ void cv::gpu::sqr(const GpuMat& src, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth](src, dst, StreamAccessor::getStream(stream)); + func(src, dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// @@ -1679,6 +2377,19 @@ void cv::gpu::sqrt(const GpuMat& src, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + 0/*sqrtMat<unsigned char>*/, + 0/*sqrtMat<signed char>*/, + 0/*sqrtMat<unsigned short>*/, + 0/*sqrtMat<short>*/, + 0/*sqrtMat<int>*/, + sqrtMat<float>, + 0/*sqrtMat<double>*/, + }; +#else static const func_t funcs[] = { sqrtMat<unsigned char>, @@ -1687,8 +2398,9 @@ void cv::gpu::sqrt(const GpuMat& src, GpuMat& dst, Stream& stream) sqrtMat<short>, sqrtMat<int>, sqrtMat<float>, - sqrtMat<double> + sqrtMat<double>, }; +#endif const int depth = src.depth(); @@ -1701,9 +2413,13 @@ void cv::gpu::sqrt(const GpuMat& src, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth](src, dst, StreamAccessor::getStream(stream)); + func(src, dst, StreamAccessor::getStream(stream)); } //////////////////////////////////////////////////////////////////////// @@ -1720,6 +2436,19 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + 0/*logMat<unsigned char>*/, + 0/*logMat<signed char>*/, + 0/*logMat<unsigned short>*/, + 0/*logMat<short>*/, + 0/*logMat<int>*/, + logMat<float>, + 0/*logMat<double>*/, + }; +#else static const func_t funcs[] = { logMat<unsigned char>, @@ -1728,8 +2457,9 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst, Stream& stream) logMat<short>, logMat<int>, logMat<float>, - logMat<double> + logMat<double>, }; +#endif const int depth = src.depth(); @@ -1742,9 +2472,13 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth](src, dst, StreamAccessor::getStream(stream)); + func(src, dst, StreamAccessor::getStream(stream)); } //////////////////////////////////////////////////////////////////////// @@ -1761,6 +2495,19 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + 0/*expMat<unsigned char>*/, + 0/*expMat<signed char>*/, + 0/*expMat<unsigned short>*/, + 0/*expMat<short>*/, + 0/*expMat<int>*/, + expMat<float>, + 0/*expMat<double>*/, + }; +#else static const func_t funcs[] = { expMat<unsigned char>, @@ -1769,8 +2516,9 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream) expMat<short>, expMat<int>, expMat<float>, - expMat<double> + expMat<double>, }; +#endif const int depth = src.depth(); @@ -1783,9 +2531,13 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth](src, dst, StreamAccessor::getStream(stream)); + func(src, dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// @@ -1809,6 +2561,19 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][4] = + { + {cmpMatEq<unsigned char> , cmpMatNe<unsigned char> , cmpMatLt<unsigned char> , cmpMatLe<unsigned char> }, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {cmpMatEq<float> , cmpMatNe<float> , cmpMatLt<float> , cmpMatLe<float> }, + {0, 0, 0, 0}, + }; +#else static const func_t funcs[7][4] = { {cmpMatEq<unsigned char> , cmpMatNe<unsigned char> , cmpMatLt<unsigned char> , cmpMatLe<unsigned char> }, @@ -1819,6 +2584,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c {cmpMatEq<float> , cmpMatNe<float> , cmpMatLt<float> , cmpMatLe<float> }, {cmpMatEq<double> , cmpMatNe<double> , cmpMatLt<double> , cmpMatLe<double> } }; +#endif typedef void (*func_v4_t)(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream); static const func_v4_t funcs_v4[] = @@ -1839,10 +2605,6 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } - dst.create(src1.size(), CV_MAKE_TYPE(CV_8U, cn)); - - cudaStream_t stream = StreamAccessor::getStream(s); - static const int codes[] = { 0, 2, 3, 2, 3, 1 @@ -1857,6 +2619,15 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c }; const int code = codes[cmpop]; + + const func_t func = funcs[depth][code]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + + dst.create(src1.size(), CV_MAKE_TYPE(CV_8U, cn)); + + cudaStream_t stream = StreamAccessor::getStream(s); + PtrStepSzb src1_(src1.rows, src1.cols * cn, psrc1[cmpop]->data, psrc1[cmpop]->step); PtrStepSzb src2_(src1.rows, src1.cols * cn, psrc2[cmpop]->data, psrc2[cmpop]->step); PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step); @@ -1882,8 +2653,6 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c } } - const func_t func = funcs[depth][code]; - func(src1_, src2_, dst_, stream); } @@ -1913,6 +2682,31 @@ void cv::gpu::compare(const GpuMat& src, Scalar sc, GpuMat& dst, int cmpop, Stre using namespace arithm; typedef void (*func_t)(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); + typedef void (*cast_func_t)(Scalar& sc); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][6] = + { + {cmpScalarEq<unsigned char> , cmpScalarGt<unsigned char> , cmpScalarGe<unsigned char> , cmpScalarLt<unsigned char> , cmpScalarLe<unsigned char> , cmpScalarNe<unsigned char> }, + {0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0}, + {cmpScalarEq<float> , cmpScalarGt<float> , cmpScalarGe<float> , cmpScalarLt<float> , cmpScalarLe<float> , cmpScalarNe<float> }, + {0, 0, 0, 0, 0, 0}, + }; + + static const cast_func_t cast_func[] = + { + castScalar<unsigned char>, + 0/*castScalar<signed char>*/, + 0/*castScalar<unsigned short>*/, + 0/*castScalar<short>*/, + 0/*castScalar<int>*/, + castScalar<float>, + 0/*castScalar<double>*/, + }; +#else static const func_t funcs[7][6] = { {cmpScalarEq<unsigned char> , cmpScalarGt<unsigned char> , cmpScalarGe<unsigned char> , cmpScalarLt<unsigned char> , cmpScalarLe<unsigned char> , cmpScalarNe<unsigned char> }, @@ -1924,11 +2718,11 @@ void cv::gpu::compare(const GpuMat& src, Scalar sc, GpuMat& dst, int cmpop, Stre {cmpScalarEq<double> , cmpScalarGt<double> , cmpScalarGe<double> , cmpScalarLt<double> , cmpScalarLe<double> , cmpScalarNe<double> } }; - typedef void (*cast_func_t)(Scalar& sc); static const cast_func_t cast_func[] = { castScalar<unsigned char>, castScalar<signed char>, castScalar<unsigned short>, castScalar<short>, castScalar<int>, castScalar<float>, castScalar<double> }; +#endif const int depth = src.depth(); const int cn = src.channels(); @@ -1943,11 +2737,15 @@ void cv::gpu::compare(const GpuMat& src, Scalar sc, GpuMat& dst, int cmpop, Stre CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth][cmpop]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), CV_MAKE_TYPE(CV_8U, cn)); cast_func[depth](sc); - funcs[depth][cmpop](src, cn, sc.val, dst, StreamAccessor::getStream(stream)); + func(src, cn, sc.val, dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// @@ -2391,14 +3189,56 @@ void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre using namespace arithm; typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[5][4] = + { + { + BitScalar<unsigned char, bitScalarAnd<unsigned char> >::call, + 0, + 0/*NppBitwiseC<CV_8U , 3, nppiAndC_8u_C3R >::call*/, + 0/*NppBitwiseC<CV_8U , 4, nppiAndC_8u_C4R >::call*/, + }, + {0,0,0,0}, + { + 0/*BitScalar<unsigned short, bitScalarAnd<unsigned short> >::call*/, + 0, + 0/*NppBitwiseC<CV_16U, 3, nppiAndC_16u_C3R>::call*/, + 0/*NppBitwiseC<CV_16U, 4, nppiAndC_16u_C4R>::call*/, + }, + {0,0,0,0}, + { + 0/*BitScalar<int, bitScalarAnd<int> >::call*/, + 0, + 0/*NppBitwiseC<CV_32S, 3, nppiAndC_32s_C3R>::call*/, + 0/*NppBitwiseC<CV_32S, 4, nppiAndC_32s_C4R>::call*/, + } + }; +#else static const func_t funcs[5][4] = { - {BitScalar<unsigned char, bitScalarAnd<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiAndC_8u_C3R >::call, BitScalar4< bitScalarAnd<unsigned int> >::call}, + { + BitScalar<unsigned char, bitScalarAnd<unsigned char> >::call, + 0, + NppBitwiseC<CV_8U , 3, nppiAndC_8u_C3R >::call, + BitScalar4< bitScalarAnd<unsigned int> >::call + }, {0,0,0,0}, - {BitScalar<unsigned short, bitScalarAnd<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiAndC_16u_C4R>::call}, + { + BitScalar<unsigned short, bitScalarAnd<unsigned short> >::call, + 0, + NppBitwiseC<CV_16U, 3, nppiAndC_16u_C3R>::call, + NppBitwiseC<CV_16U, 4, nppiAndC_16u_C4R>::call + }, {0,0,0,0}, - {BitScalar<int, bitScalarAnd<int> >::call , 0, NppBitwiseC<CV_32S, 3, nppiAndC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiAndC_32s_C4R>::call} + { + BitScalar<int, bitScalarAnd<int> >::call, + 0, + NppBitwiseC<CV_32S, 3, nppiAndC_32s_C3R>::call, + NppBitwiseC<CV_32S, 4, nppiAndC_32s_C4R>::call + } }; +#endif const int depth = src.depth(); const int cn = src.channels(); @@ -2406,9 +3246,13 @@ void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32S ); CV_Assert( cn == 1 || cn == 3 || cn == 4 ); + const func_t func = funcs[depth][cn - 1]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth][cn - 1](src, sc, dst, StreamAccessor::getStream(stream)); + func(src, sc, dst, StreamAccessor::getStream(stream)); } void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) @@ -2416,14 +3260,56 @@ void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Strea using namespace arithm; typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[5][4] = + { + { + BitScalar<unsigned char, bitScalarOr<unsigned char> >::call, + 0, + 0/*NppBitwiseC<CV_8U , 3, nppiOrC_8u_C3R >::call*/, + 0/*NppBitwiseC<CV_8U , 4, nppiOrC_8u_C4R >::call*/, + }, + {0,0,0,0}, + { + 0/*BitScalar<unsigned short, bitScalarOr<unsigned short> >::call*/, + 0, + 0/*NppBitwiseC<CV_16U, 3, nppiOrC_16u_C3R>::call*/, + 0/*NppBitwiseC<CV_16U, 4, nppiOrC_16u_C4R>::call*/, + }, + {0,0,0,0}, + { + 0/*BitScalar<int, bitScalarOr<int> >::call*/, + 0, + 0/*NppBitwiseC<CV_32S, 3, nppiOrC_32s_C3R>::call*/, + 0/*NppBitwiseC<CV_32S, 4, nppiOrC_32s_C4R>::call*/, + } + }; +#else static const func_t funcs[5][4] = { - {BitScalar<unsigned char, bitScalarOr<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiOrC_8u_C3R >::call, BitScalar4< bitScalarOr<unsigned int> >::call}, + { + BitScalar<unsigned char, bitScalarOr<unsigned char> >::call, + 0, + NppBitwiseC<CV_8U , 3, nppiOrC_8u_C3R >::call, + BitScalar4< bitScalarOr<unsigned int> >::call + }, {0,0,0,0}, - {BitScalar<unsigned short, bitScalarOr<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiOrC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiOrC_16u_C4R>::call}, + { + BitScalar<unsigned short, bitScalarOr<unsigned short> >::call, + 0, + NppBitwiseC<CV_16U, 3, nppiOrC_16u_C3R>::call, + NppBitwiseC<CV_16U, 4, nppiOrC_16u_C4R>::call + }, {0,0,0,0}, - {BitScalar<int, bitScalarOr<int> >::call , 0, NppBitwiseC<CV_32S, 3, nppiOrC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiOrC_32s_C4R>::call} + { + BitScalar<int, bitScalarOr<int> >::call, + 0, + NppBitwiseC<CV_32S, 3, nppiOrC_32s_C3R>::call, + NppBitwiseC<CV_32S, 4, nppiOrC_32s_C4R>::call + } }; +#endif const int depth = src.depth(); const int cn = src.channels(); @@ -2431,9 +3317,13 @@ void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Strea CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32S ); CV_Assert( cn == 1 || cn == 3 || cn == 4 ); + const func_t func = funcs[depth][cn - 1]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth][cn - 1](src, sc, dst, StreamAccessor::getStream(stream)); + func(src, sc, dst, StreamAccessor::getStream(stream)); } void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) @@ -2441,14 +3331,56 @@ void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre using namespace arithm; typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[5][4] = + { + { + BitScalar<unsigned char, bitScalarXor<unsigned char> >::call, + 0, + 0/*NppBitwiseC<CV_8U , 3, nppiXorC_8u_C3R >::call*/, + 0/*NppBitwiseC<CV_8U , 4, nppiXorC_8u_C4R >::call*/, + }, + {0,0,0,0}, + { + 0/*BitScalar<unsigned short, bitScalarXor<unsigned short> >::call*/, + 0, + 0/*NppBitwiseC<CV_16U, 3, nppiXorC_16u_C3R>::call*/, + 0/*NppBitwiseC<CV_16U, 4, nppiXorC_16u_C4R>::call*/, + }, + {0,0,0,0}, + { + 0/*BitScalar<int, bitScalarXor<int> >::call*/, + 0, + 0/*NppBitwiseC<CV_32S, 3, nppiXorC_32s_C3R>::call*/, + 0/*NppBitwiseC<CV_32S, 4, nppiXorC_32s_C4R>::call*/, + } + }; +#else static const func_t funcs[5][4] = { - {BitScalar<unsigned char, bitScalarXor<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiXorC_8u_C3R >::call, BitScalar4< bitScalarXor<unsigned int> >::call}, + { + BitScalar<unsigned char, bitScalarXor<unsigned char> >::call, + 0, + NppBitwiseC<CV_8U , 3, nppiXorC_8u_C3R >::call, + BitScalar4< bitScalarXor<unsigned int> >::call + }, {0,0,0,0}, - {BitScalar<unsigned short, bitScalarXor<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiXorC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiXorC_16u_C4R>::call}, + { + BitScalar<unsigned short, bitScalarXor<unsigned short> >::call, + 0, + NppBitwiseC<CV_16U, 3, nppiXorC_16u_C3R>::call, + NppBitwiseC<CV_16U, 4, nppiXorC_16u_C4R>::call + }, {0,0,0,0}, - {BitScalar<int, bitScalarXor<int> >::call , 0, NppBitwiseC<CV_32S, 3, nppiXorC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiXorC_32s_C4R>::call} + { + BitScalar<int, bitScalarXor<int> >::call, + 0, + NppBitwiseC<CV_32S, 3, nppiXorC_32s_C3R>::call, + NppBitwiseC<CV_32S, 4, nppiXorC_32s_C4R>::call + } }; +#endif const int depth = src.depth(); const int cn = src.channels(); @@ -2456,9 +3388,13 @@ void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32S ); CV_Assert( cn == 1 || cn == 3 || cn == 4 ); + const func_t func = funcs[depth][cn - 1]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth][cn - 1](src, sc, dst, StreamAccessor::getStream(stream)); + func(src, sc, dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// @@ -2578,6 +3514,19 @@ void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + minMat<unsigned char>, + 0/*minMat<signed char>*/, + 0/*minMat<unsigned short>*/, + 0/*minMat<short>*/, + 0/*minMat<int>*/, + minMat<float>, + 0/*minMat<double>*/, + }; +#else static const func_t funcs[] = { minMat<unsigned char>, @@ -2586,8 +3535,9 @@ void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s minMat<short>, minMat<int>, minMat<float>, - minMat<double> + minMat<double>, }; +#endif const int depth = src1.depth(); const int cn = src1.channels(); @@ -2657,6 +3607,19 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + maxMat<unsigned char>, + 0/*maxMat<signed char>*/, + 0/*maxMat<unsigned short>*/, + 0/*maxMat<short>*/, + 0/*maxMat<int>*/, + maxMat<float>, + 0/*maxMat<double>*/, + }; +#else static const func_t funcs[] = { maxMat<unsigned char>, @@ -2665,8 +3628,9 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s maxMat<short>, maxMat<int>, maxMat<float>, - maxMat<double> + maxMat<double>, }; +#endif const int depth = src1.depth(); const int cn = src1.channels(); @@ -2744,6 +3708,31 @@ void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); + typedef double (*cast_func_t)(double sc); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + minScalar<unsigned char>, + 0/*minScalar<signed char>*/, + 0/*minScalar<unsigned short>*/, + 0/*minScalar<short>*/, + 0/*minScalar<int>*/, + minScalar<float>, + 0/*minScalar<double>*/, + }; + + static const cast_func_t cast_func[] = + { + castScalar<unsigned char>, + 0/*castScalar<signed char>*/, + 0/*castScalar<unsigned short>*/, + 0/*castScalar<short>*/, + 0/*castScalar<int>*/, + castScalar<float>, + 0/*castScalar<double>*/, + }; +#else static const func_t funcs[] = { minScalar<unsigned char>, @@ -2752,14 +3741,20 @@ void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream) minScalar<short>, minScalar<int>, minScalar<float>, - minScalar<double> + minScalar<double>, }; - typedef double (*cast_func_t)(double sc); static const cast_func_t cast_func[] = { - castScalar<unsigned char>, castScalar<signed char>, castScalar<unsigned short>, castScalar<short>, castScalar<int>, castScalar<float>, castScalar<double> + castScalar<unsigned char>, + castScalar<signed char>, + castScalar<unsigned short>, + castScalar<short>, + castScalar<int>, + castScalar<float>, + castScalar<double>, }; +#endif const int depth = src.depth(); @@ -2772,9 +3767,13 @@ void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsNotImplemented, "not available in tiny build"); + dst.create(src.size(), src.type()); - funcs[depth](src, cast_func[depth](val), dst, StreamAccessor::getStream(stream)); + func(src, cast_func[depth](val), dst, StreamAccessor::getStream(stream)); } void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream) @@ -2782,6 +3781,31 @@ void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); + typedef double (*cast_func_t)(double sc); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + maxScalar<unsigned char>, + 0/*maxScalar<signed char>*/, + 0/*maxScalar<unsigned short>*/, + 0/*maxScalar<short>*/, + 0/*maxScalar<int>*/, + maxScalar<float>, + 0/*maxScalar<double>*/, + }; + + static const cast_func_t cast_func[] = + { + castScalar<unsigned char>, + 0/*castScalar<signed char>*/, + 0/*castScalar<unsigned short>*/, + 0/*castScalar<short>*/, + 0/*castScalar<int>*/, + castScalar<float>, + 0/*castScalar<double>*/ + }; +#else static const func_t funcs[] = { maxScalar<unsigned char>, @@ -2790,14 +3814,20 @@ void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream) maxScalar<short>, maxScalar<int>, maxScalar<float>, - maxScalar<double> + maxScalar<double>, }; - typedef double (*cast_func_t)(double sc); static const cast_func_t cast_func[] = { - castScalar<unsigned char>, castScalar<signed char>, castScalar<unsigned short>, castScalar<short>, castScalar<int>, castScalar<float>, castScalar<double> + castScalar<unsigned char>, + castScalar<signed char>, + castScalar<unsigned short>, + castScalar<short>, + castScalar<int>, + castScalar<float>, + castScalar<double>, }; +#endif const int depth = src.depth(); @@ -2810,9 +3840,13 @@ void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsNotImplemented, "not available in tiny build"); + dst.create(src.size(), src.type()); - funcs[depth](src, cast_func[depth](val), dst, StreamAccessor::getStream(stream)); + func(src, cast_func[depth](val), dst, StreamAccessor::getStream(stream)); } //////////////////////////////////////////////////////////////////////// @@ -2858,6 +3892,18 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double else { typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + arithm::threshold<unsigned char>, + 0/*arithm::threshold<signed char>*/, + 0/*arithm::threshold<unsigned short>*/, + 0/*arithm::threshold<short>*/, + 0/*arithm::threshold<int>*/, + arithm::threshold<float>, + 0/*arithm::threshold<double>*/ + }; +#else static const func_t funcs[] = { arithm::threshold<unsigned char>, @@ -2868,6 +3914,11 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double arithm::threshold<float>, arithm::threshold<double> }; +#endif + + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsNotImplemented, "not available in tiny build"); if (depth != CV_32F && depth != CV_64F) { @@ -2875,7 +3926,7 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxVal = cvRound(maxVal); } - funcs[depth](src, dst, thresh, maxVal, type, stream); + func(src, dst, thresh, maxVal, type, stream); } return thresh; @@ -2892,6 +3943,18 @@ namespace arithm void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream) { typedef void (*func_t)(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + 0/*arithm::pow<unsigned char>*/, + 0/*arithm::pow<signed char>*/, + 0/*arithm::pow<unsigned short>*/, + 0/*arithm::pow<short>*/, + 0/*arithm::pow<int>*/, + arithm::pow<float>, + 0/*arithm::pow<double>*/, + }; +#else static const func_t funcs[] = { arithm::pow<unsigned char>, @@ -2902,6 +3965,7 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream) arithm::pow<float>, arithm::pow<double> }; +#endif const int depth = src.depth(); const int cn = src.channels(); @@ -2914,12 +3978,16 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsNotImplemented, "not available in tiny build"); + dst.create(src.size(), src.type()); PtrStepSzb src_(src.rows, src.cols * cn, src.data, src.step); PtrStepSzb dst_(src.rows, src.cols * cn, dst.data, dst.step); - funcs[depth](src_, power, dst_, StreamAccessor::getStream(stream)); + func(src_, power, dst_, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////