From 9682d2afff6e0a2b61e032a28ad1412ac6ee54dd Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 4 Mar 2015 16:02:54 +0300 Subject: [PATCH] reduce element_operations instantiates for tiny build --- modules/gpu/src/cuda/element_operations.cu | 134 ++ modules/gpu/src/element_operations.cpp | 1284 ++++++++++++++++++-- 2 files changed, 1310 insertions(+), 108 deletions(-) diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu index e277d829a1..1f94f6a5c3 100644 --- a/modules/gpu/src/cuda/element_operations.cu +++ b/modules/gpu/src/cuda/element_operations.cu @@ -234,6 +234,7 @@ namespace arithm } template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -278,7 +279,9 @@ namespace arithm //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -288,6 +291,7 @@ namespace arithm //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -329,6 +333,7 @@ namespace arithm } template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -373,7 +378,9 @@ namespace arithm //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -383,6 +390,7 @@ namespace arithm //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -469,6 +477,7 @@ namespace arithm } template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -513,7 +522,9 @@ namespace arithm //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -523,6 +534,7 @@ namespace arithm //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -542,6 +554,7 @@ namespace arithm } template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -586,7 +599,9 @@ namespace arithm //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -596,6 +611,7 @@ namespace arithm //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -700,6 +716,7 @@ namespace arithm } template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); @@ -744,7 +761,9 @@ namespace arithm //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#endif template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); @@ -754,6 +773,7 @@ namespace arithm //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -791,6 +811,7 @@ namespace arithm } template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); @@ -835,7 +856,9 @@ namespace arithm //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#endif template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); @@ -845,6 +868,7 @@ namespace arithm //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -968,6 +992,7 @@ namespace arithm } template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); @@ -1012,7 +1037,9 @@ namespace arithm //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#endif template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); @@ -1022,6 +1049,7 @@ namespace arithm //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1037,6 +1065,7 @@ namespace arithm } template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); @@ -1081,7 +1110,9 @@ namespace arithm //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#endif template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); @@ -1091,6 +1122,7 @@ namespace arithm //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1128,6 +1160,7 @@ namespace arithm } template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); @@ -1172,7 +1205,9 @@ namespace arithm //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#endif template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); @@ -1182,6 +1217,7 @@ namespace arithm //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1278,12 +1314,16 @@ namespace arithm } template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1323,12 +1363,16 @@ namespace arithm } template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1349,13 +1393,17 @@ namespace arithm transform((PtrStepSz) src, (PtrStepSz) dst, abs_func(), WithOutMask(), stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1390,13 +1438,17 @@ namespace arithm transform((PtrStepSz) src, (PtrStepSz) dst, Sqr(), WithOutMask(), stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1417,13 +1469,17 @@ namespace arithm transform((PtrStepSz) src, (PtrStepSz) dst, sqrt_func(), WithOutMask(), stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1444,13 +1500,17 @@ namespace arithm transform((PtrStepSz) src, (PtrStepSz) dst, log_func(), WithOutMask(), stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1486,13 +1546,17 @@ namespace arithm transform((PtrStepSz) src, (PtrStepSz) dst, Exp(), WithOutMask(), stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////////////////// @@ -1620,36 +1684,52 @@ namespace arithm } template void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////////////////// @@ -1824,52 +1904,76 @@ namespace arithm } template void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////////////////// @@ -1981,19 +2085,25 @@ namespace arithm } template void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -2067,12 +2177,16 @@ namespace arithm } template void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream) { @@ -2080,12 +2194,16 @@ namespace arithm } template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -2159,12 +2277,16 @@ namespace arithm } template void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream) { @@ -2172,12 +2294,16 @@ namespace arithm } template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -2233,12 +2359,16 @@ namespace arithm } template void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); template void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); template void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); template void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); +#endif template void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -2312,13 +2442,17 @@ namespace arithm transform((PtrStepSz) src, (PtrStepSz) dst, PowOp(power), WithOutMask(), stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); +#endif template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// diff --git a/modules/gpu/src/element_operations.cpp b/modules/gpu/src/element_operations.cpp index fa98520ff8..bd8ca81bf1 100644 --- a/modules/gpu/src/element_operations.cpp +++ b/modules/gpu/src/element_operations.cpp @@ -275,6 +275,75 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + addMat, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + }, + { + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + }, + { + 0 /*addMat*/, + 0 /*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + }, + { + 0 /*addMat*/, + 0 /*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + }, + { + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + }, + { + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + addMat, + 0/*addMat*/, + }, + { + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + 0/*addMat*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -284,7 +353,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu addMat, addMat, addMat, - addMat + addMat, }, { addMat, @@ -293,7 +362,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu addMat, addMat, addMat, - addMat + addMat, }, { 0 /*addMat*/, @@ -302,7 +371,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu addMat, addMat, addMat, - addMat + addMat, }, { 0 /*addMat*/, @@ -311,7 +380,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu addMat, addMat, addMat, - addMat + addMat, }, { 0 /*addMat*/, @@ -320,7 +389,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu 0 /*addMat*/, addMat, addMat, - addMat + addMat, }, { 0 /*addMat*/, @@ -329,7 +398,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu 0 /*addMat*/, 0 /*addMat*/, addMat, - addMat + addMat, }, { 0 /*addMat*/, @@ -338,9 +407,10 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu 0 /*addMat*/, 0 /*addMat*/, 0 /*addMat*/, - addMat + addMat, } }; +#endif if (dtype < 0) dtype = src1.depth(); @@ -421,6 +491,75 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + addScalar, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + }, + { + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + }, + { + 0 /*addScalar*/, + 0 /*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + }, + { + 0 /*addScalar*/, + 0 /*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + }, + { + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + }, + { + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + addScalar, + 0/*addScalar*/, + }, + { + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + 0/*addScalar*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -430,7 +569,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat addScalar, addScalar, addScalar, - addScalar + addScalar, }, { addScalar, @@ -439,7 +578,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat addScalar, addScalar, addScalar, - addScalar + addScalar, }, { 0 /*addScalar*/, @@ -448,7 +587,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat addScalar, addScalar, addScalar, - addScalar + addScalar, }, { 0 /*addScalar*/, @@ -457,7 +596,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat addScalar, addScalar, addScalar, - addScalar + addScalar, }, { 0 /*addScalar*/, @@ -466,7 +605,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat 0 /*addScalar*/, addScalar, addScalar, - addScalar + addScalar, }, { 0 /*addScalar*/, @@ -475,7 +614,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat 0 /*addScalar*/, 0 /*addScalar*/, addScalar, - addScalar + addScalar, }, { 0 /*addScalar*/, @@ -484,9 +623,10 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat 0 /*addScalar*/, 0 /*addScalar*/, 0 /*addScalar*/, - addScalar + addScalar, } }; +#endif typedef void (*npp_func_t)(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream); static const npp_func_t npp_funcs[7][4] = @@ -555,6 +695,75 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + subMat, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + }, + { + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + }, + { + 0 /*subMat*/, + 0 /*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + }, + { + 0 /*subMat*/, + 0 /*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + }, + { + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + }, + { + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + subMat, + 0/*subMat*/, + }, + { + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + 0/*subMat*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -564,7 +773,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons subMat, subMat, subMat, - subMat + subMat, }, { subMat, @@ -573,7 +782,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons subMat, subMat, subMat, - subMat + subMat, }, { 0 /*subMat*/, @@ -582,7 +791,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons subMat, subMat, subMat, - subMat + subMat, }, { 0 /*subMat*/, @@ -591,7 +800,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons subMat, subMat, subMat, - subMat + subMat, }, { 0 /*subMat*/, @@ -600,7 +809,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons 0 /*subMat*/, subMat, subMat, - subMat + subMat, }, { 0 /*subMat*/, @@ -609,7 +818,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons 0 /*subMat*/, 0 /*subMat*/, subMat, - subMat + subMat, }, { 0 /*subMat*/, @@ -618,9 +827,10 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons 0 /*subMat*/, 0 /*subMat*/, 0 /*subMat*/, - subMat + subMat, } }; +#endif if (dtype < 0) dtype = src1.depth(); @@ -701,6 +911,75 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + subScalar, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + }, + { + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + }, + { + 0 /*subScalar*/, + 0 /*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + }, + { + 0 /*subScalar*/, + 0 /*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + }, + { + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + }, + { + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + subScalar, + 0/*subScalar*/, + }, + { + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + 0/*subScalar*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -710,7 +989,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G subScalar, subScalar, subScalar, - subScalar + subScalar, }, { subScalar, @@ -719,7 +998,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G subScalar, subScalar, subScalar, - subScalar + subScalar, }, { 0 /*subScalar*/, @@ -728,7 +1007,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G subScalar, subScalar, subScalar, - subScalar + subScalar, }, { 0 /*subScalar*/, @@ -737,7 +1016,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G subScalar, subScalar, subScalar, - subScalar + subScalar, }, { 0 /*subScalar*/, @@ -746,7 +1025,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G 0 /*subScalar*/, subScalar, subScalar, - subScalar + subScalar, }, { 0 /*subScalar*/, @@ -755,7 +1034,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G 0 /*subScalar*/, 0 /*subScalar*/, subScalar, - subScalar + subScalar, }, { 0 /*subScalar*/, @@ -764,9 +1043,10 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G 0 /*subScalar*/, 0 /*subScalar*/, 0 /*subScalar*/, - subScalar + subScalar, } }; +#endif typedef void (*npp_func_t)(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream); static const npp_func_t npp_funcs[7][4] = @@ -856,6 +1136,75 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub else { typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + mulMat, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + }, + { + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + }, + { + 0 /*mulMat*/, + 0 /*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + }, + { + 0 /*mulMat*/, + 0 /*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + }, + { + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + }, + { + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + mulMat, + 0/*mulMat*/, + }, + { + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + 0/*mulMat*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -865,7 +1214,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub mulMat, mulMat, mulMat, - mulMat + mulMat, }, { mulMat, @@ -874,7 +1223,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub mulMat, mulMat, mulMat, - mulMat + mulMat, }, { 0 /*mulMat*/, @@ -883,7 +1232,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub mulMat, mulMat, mulMat, - mulMat + mulMat, }, { 0 /*mulMat*/, @@ -892,7 +1241,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub mulMat, mulMat, mulMat, - mulMat + mulMat, }, { 0 /*mulMat*/, @@ -901,7 +1250,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub 0 /*mulMat*/, mulMat, mulMat, - mulMat + mulMat, }, { 0 /*mulMat*/, @@ -910,7 +1259,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub 0 /*mulMat*/, 0 /*mulMat*/, mulMat, - mulMat + mulMat, }, { 0 /*mulMat*/, @@ -919,9 +1268,10 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub 0 /*mulMat*/, 0 /*mulMat*/, 0 /*mulMat*/, - mulMat + mulMat, } }; +#endif if (dtype < 0) dtype = src1.depth(); @@ -965,6 +1315,75 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + mulScalar, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + }, + { + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + }, + { + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + }, + { + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + }, + { + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + }, + { + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + mulScalar, + 0/*mulScalar*/, + }, + { + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0/*mulScalar*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -974,7 +1393,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double mulScalar, mulScalar, mulScalar, - mulScalar + mulScalar, }, { mulScalar, @@ -983,7 +1402,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double mulScalar, mulScalar, mulScalar, - mulScalar + mulScalar, }, { 0 /*mulScalar*/, @@ -992,7 +1411,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double mulScalar, mulScalar, mulScalar, - mulScalar + mulScalar, }, { 0 /*mulScalar*/, @@ -1001,7 +1420,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double mulScalar, mulScalar, mulScalar, - mulScalar + mulScalar, }, { 0 /*mulScalar*/, @@ -1010,7 +1429,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double 0 /*mulScalar*/, mulScalar, mulScalar, - mulScalar + mulScalar, }, { 0 /*mulScalar*/, @@ -1019,7 +1438,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double 0 /*mulScalar*/, 0 /*mulScalar*/, mulScalar, - mulScalar + mulScalar, }, { 0 /*mulScalar*/, @@ -1028,9 +1447,10 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double 0 /*mulScalar*/, 0 /*mulScalar*/, 0 /*mulScalar*/, - mulScalar + mulScalar, } }; +#endif typedef void (*npp_func_t)(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream); static const npp_func_t npp_funcs[7][4] = @@ -1121,6 +1541,75 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double else { typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + divMat, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + }, + { + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + }, + { + 0 /*divMat*/, + 0 /*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + }, + { + 0 /*divMat*/, + 0 /*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + }, + { + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + }, + { + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + divMat, + 0/*divMat*/, + }, + { + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + 0/*divMat*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -1130,7 +1619,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double divMat, divMat, divMat, - divMat + divMat, }, { divMat, @@ -1139,7 +1628,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double divMat, divMat, divMat, - divMat + divMat, }, { 0 /*divMat*/, @@ -1148,7 +1637,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double divMat, divMat, divMat, - divMat + divMat, }, { 0 /*divMat*/, @@ -1157,7 +1646,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double divMat, divMat, divMat, - divMat + divMat, }, { 0 /*divMat*/, @@ -1166,7 +1655,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double 0 /*divMat*/, divMat, divMat, - divMat + divMat, }, { 0 /*divMat*/, @@ -1175,7 +1664,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double 0 /*divMat*/, 0 /*divMat*/, divMat, - divMat + divMat, }, { 0 /*divMat*/, @@ -1184,9 +1673,10 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double 0 /*divMat*/, 0 /*divMat*/, 0 /*divMat*/, - divMat + divMat, } }; +#endif if (dtype < 0) dtype = src1.depth(); @@ -1230,6 +1720,75 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + divScalar, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + }, + { + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + }, + { + 0 /*divScalar*/, + 0 /*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + }, + { + 0 /*divScalar*/, + 0 /*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + }, + { + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + }, + { + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + divScalar, + 0/*divScalar*/, + }, + { + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + 0/*divScalar*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -1239,7 +1798,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc divScalar, divScalar, divScalar, - divScalar + divScalar, }, { divScalar, @@ -1248,7 +1807,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc divScalar, divScalar, divScalar, - divScalar + divScalar, }, { 0 /*divScalar*/, @@ -1257,7 +1816,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc divScalar, divScalar, divScalar, - divScalar + divScalar, }, { 0 /*divScalar*/, @@ -1266,7 +1825,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc divScalar, divScalar, divScalar, - divScalar + divScalar, }, { 0 /*divScalar*/, @@ -1275,7 +1834,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc 0 /*divScalar*/, divScalar, divScalar, - divScalar + divScalar, }, { 0 /*divScalar*/, @@ -1284,7 +1843,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc 0 /*divScalar*/, 0 /*divScalar*/, divScalar, - divScalar + divScalar, }, { 0 /*divScalar*/, @@ -1293,9 +1852,10 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc 0 /*divScalar*/, 0 /*divScalar*/, 0 /*divScalar*/, - divScalar + divScalar, } }; +#endif typedef void (*npp_func_t)(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream); static const npp_func_t npp_funcs[7][4] = @@ -1359,6 +1919,75 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + divInv, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + }, + { + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + }, + { + 0 /*divInv*/, + 0 /*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + }, + { + 0 /*divInv*/, + 0 /*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + }, + { + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + }, + { + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + divInv, + 0/*divInv*/, + }, + { + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + 0/*divInv*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -1368,7 +1997,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St divInv, divInv, divInv, - divInv + divInv, }, { divInv, @@ -1377,7 +2006,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St divInv, divInv, divInv, - divInv + divInv, }, { 0 /*divInv*/, @@ -1386,7 +2015,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St divInv, divInv, divInv, - divInv + divInv, }, { 0 /*divInv*/, @@ -1395,7 +2024,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St divInv, divInv, divInv, - divInv + divInv, }, { 0 /*divInv*/, @@ -1404,7 +2033,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St 0 /*divInv*/, divInv, divInv, - divInv + divInv, }, { 0 /*divInv*/, @@ -1413,7 +2042,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St 0 /*divInv*/, 0 /*divInv*/, divInv, - divInv + divInv, }, { 0 /*divInv*/, @@ -1422,9 +2051,10 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St 0 /*divInv*/, 0 /*divInv*/, 0 /*divInv*/, - divInv + divInv, } }; +#endif if (dtype < 0) dtype = src.depth(); @@ -1471,6 +2101,19 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + absDiffMat, + 0/*absDiffMat*/, + 0/*absDiffMat*/, + 0/*absDiffMat*/, + 0/*absDiffMat*/, + absDiffMat, + 0/*absDiffMat*/, + }; +#else static const func_t funcs[] = { absDiffMat, @@ -1479,8 +2122,9 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea absDiffMat, absDiffMat, absDiffMat, - absDiffMat + absDiffMat, }; +#endif const int depth = src1.depth(); const int cn = src1.channels(); @@ -1556,6 +2200,19 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + absDiffScalar, + 0/*absDiffScalar*/, + 0/*absDiffScalar*/, + 0/*absDiffScalar*/, + 0/*absDiffScalar*/, + absDiffScalar, + 0/*absDiffScalar*/, + }; +#else static const func_t funcs[] = { absDiffScalar, @@ -1564,8 +2221,9 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea absDiffScalar, absDiffScalar, absDiffScalar, - absDiffScalar + absDiffScalar, }; +#endif const int depth = src1.depth(); @@ -1578,9 +2236,13 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src1.size(), src1.type()); - funcs[depth](src1, src2.val[0], dst, StreamAccessor::getStream(stream)); + func(src1, src2.val[0], dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// @@ -1597,6 +2259,19 @@ void cv::gpu::abs(const GpuMat& src, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + 0/*absMat*/, + 0/*absMat*/, + 0/*absMat*/, + 0/*absMat*/, + 0/*absMat*/, + absMat, + 0/*absMat*/, + }; +#else static const func_t funcs[] = { absMat, @@ -1605,8 +2280,9 @@ void cv::gpu::abs(const GpuMat& src, GpuMat& dst, Stream& stream) absMat, absMat, absMat, - absMat + absMat, }; +#endif const int depth = src.depth(); @@ -1619,9 +2295,13 @@ void cv::gpu::abs(const GpuMat& src, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth](src, dst, StreamAccessor::getStream(stream)); + func(src, dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// @@ -1638,6 +2318,19 @@ void cv::gpu::sqr(const GpuMat& src, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + 0/*sqrMat*/, + 0/*sqrMat*/, + 0/*sqrMat*/, + 0/*sqrMat*/, + 0/*sqrMat*/, + sqrMat, + 0/*sqrMat*/, + }; +#else static const func_t funcs[] = { sqrMat, @@ -1646,8 +2339,9 @@ void cv::gpu::sqr(const GpuMat& src, GpuMat& dst, Stream& stream) sqrMat, sqrMat, sqrMat, - sqrMat + sqrMat, }; +#endif const int depth = src.depth(); @@ -1660,9 +2354,13 @@ void cv::gpu::sqr(const GpuMat& src, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth](src, dst, StreamAccessor::getStream(stream)); + func(src, dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// @@ -1679,6 +2377,19 @@ void cv::gpu::sqrt(const GpuMat& src, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + 0/*sqrtMat*/, + 0/*sqrtMat*/, + 0/*sqrtMat*/, + 0/*sqrtMat*/, + 0/*sqrtMat*/, + sqrtMat, + 0/*sqrtMat*/, + }; +#else static const func_t funcs[] = { sqrtMat, @@ -1687,8 +2398,9 @@ void cv::gpu::sqrt(const GpuMat& src, GpuMat& dst, Stream& stream) sqrtMat, sqrtMat, sqrtMat, - sqrtMat + sqrtMat, }; +#endif const int depth = src.depth(); @@ -1701,9 +2413,13 @@ void cv::gpu::sqrt(const GpuMat& src, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth](src, dst, StreamAccessor::getStream(stream)); + func(src, dst, StreamAccessor::getStream(stream)); } //////////////////////////////////////////////////////////////////////// @@ -1720,6 +2436,19 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + 0/*logMat*/, + 0/*logMat*/, + 0/*logMat*/, + 0/*logMat*/, + 0/*logMat*/, + logMat, + 0/*logMat*/, + }; +#else static const func_t funcs[] = { logMat, @@ -1728,8 +2457,9 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst, Stream& stream) logMat, logMat, logMat, - logMat + logMat, }; +#endif const int depth = src.depth(); @@ -1742,9 +2472,13 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth](src, dst, StreamAccessor::getStream(stream)); + func(src, dst, StreamAccessor::getStream(stream)); } //////////////////////////////////////////////////////////////////////// @@ -1761,6 +2495,19 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + 0/*expMat*/, + 0/*expMat*/, + 0/*expMat*/, + 0/*expMat*/, + 0/*expMat*/, + expMat, + 0/*expMat*/, + }; +#else static const func_t funcs[] = { expMat, @@ -1769,8 +2516,9 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream) expMat, expMat, expMat, - expMat + expMat, }; +#endif const int depth = src.depth(); @@ -1783,9 +2531,13 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth](src, dst, StreamAccessor::getStream(stream)); + func(src, dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// @@ -1809,6 +2561,19 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][4] = + { + {cmpMatEq , cmpMatNe , cmpMatLt , cmpMatLe }, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {cmpMatEq , cmpMatNe , cmpMatLt , cmpMatLe }, + {0, 0, 0, 0}, + }; +#else static const func_t funcs[7][4] = { {cmpMatEq , cmpMatNe , cmpMatLt , cmpMatLe }, @@ -1819,6 +2584,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c {cmpMatEq , cmpMatNe , cmpMatLt , cmpMatLe }, {cmpMatEq , cmpMatNe , cmpMatLt , cmpMatLe } }; +#endif typedef void (*func_v4_t)(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream); static const func_v4_t funcs_v4[] = @@ -1839,10 +2605,6 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } - dst.create(src1.size(), CV_MAKE_TYPE(CV_8U, cn)); - - cudaStream_t stream = StreamAccessor::getStream(s); - static const int codes[] = { 0, 2, 3, 2, 3, 1 @@ -1857,6 +2619,15 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c }; const int code = codes[cmpop]; + + const func_t func = funcs[depth][code]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + + dst.create(src1.size(), CV_MAKE_TYPE(CV_8U, cn)); + + cudaStream_t stream = StreamAccessor::getStream(s); + PtrStepSzb src1_(src1.rows, src1.cols * cn, psrc1[cmpop]->data, psrc1[cmpop]->step); PtrStepSzb src2_(src1.rows, src1.cols * cn, psrc2[cmpop]->data, psrc2[cmpop]->step); PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step); @@ -1882,8 +2653,6 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c } } - const func_t func = funcs[depth][code]; - func(src1_, src2_, dst_, stream); } @@ -1913,6 +2682,31 @@ void cv::gpu::compare(const GpuMat& src, Scalar sc, GpuMat& dst, int cmpop, Stre using namespace arithm; typedef void (*func_t)(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); + typedef void (*cast_func_t)(Scalar& sc); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][6] = + { + {cmpScalarEq , cmpScalarGt , cmpScalarGe , cmpScalarLt , cmpScalarLe , cmpScalarNe }, + {0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0}, + {cmpScalarEq , cmpScalarGt , cmpScalarGe , cmpScalarLt , cmpScalarLe , cmpScalarNe }, + {0, 0, 0, 0, 0, 0}, + }; + + static const cast_func_t cast_func[] = + { + castScalar, + 0/*castScalar*/, + 0/*castScalar*/, + 0/*castScalar*/, + 0/*castScalar*/, + castScalar, + 0/*castScalar*/, + }; +#else static const func_t funcs[7][6] = { {cmpScalarEq , cmpScalarGt , cmpScalarGe , cmpScalarLt , cmpScalarLe , cmpScalarNe }, @@ -1924,11 +2718,11 @@ void cv::gpu::compare(const GpuMat& src, Scalar sc, GpuMat& dst, int cmpop, Stre {cmpScalarEq , cmpScalarGt , cmpScalarGe , cmpScalarLt , cmpScalarLe , cmpScalarNe } }; - typedef void (*cast_func_t)(Scalar& sc); static const cast_func_t cast_func[] = { castScalar, castScalar, castScalar, castScalar, castScalar, castScalar, castScalar }; +#endif const int depth = src.depth(); const int cn = src.channels(); @@ -1943,11 +2737,15 @@ void cv::gpu::compare(const GpuMat& src, Scalar sc, GpuMat& dst, int cmpop, Stre CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth][cmpop]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), CV_MAKE_TYPE(CV_8U, cn)); cast_func[depth](sc); - funcs[depth][cmpop](src, cn, sc.val, dst, StreamAccessor::getStream(stream)); + func(src, cn, sc.val, dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// @@ -2391,14 +3189,56 @@ void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre using namespace arithm; typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[5][4] = + { + { + BitScalar >::call, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + }, + {0,0,0,0}, + { + 0/*BitScalar >::call*/, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + }, + {0,0,0,0}, + { + 0/*BitScalar >::call*/, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + } + }; +#else static const func_t funcs[5][4] = { - {BitScalar >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarAnd >::call}, + { + BitScalar >::call, + 0, + NppBitwiseC::call, + BitScalar4< bitScalarAnd >::call + }, {0,0,0,0}, - {BitScalar >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, + { + BitScalar >::call, + 0, + NppBitwiseC::call, + NppBitwiseC::call + }, {0,0,0,0}, - {BitScalar >::call , 0, NppBitwiseC::call, NppBitwiseC::call} + { + BitScalar >::call, + 0, + NppBitwiseC::call, + NppBitwiseC::call + } }; +#endif const int depth = src.depth(); const int cn = src.channels(); @@ -2406,9 +3246,13 @@ void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32S ); CV_Assert( cn == 1 || cn == 3 || cn == 4 ); + const func_t func = funcs[depth][cn - 1]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth][cn - 1](src, sc, dst, StreamAccessor::getStream(stream)); + func(src, sc, dst, StreamAccessor::getStream(stream)); } void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) @@ -2416,14 +3260,56 @@ void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Strea using namespace arithm; typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[5][4] = + { + { + BitScalar >::call, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + }, + {0,0,0,0}, + { + 0/*BitScalar >::call*/, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + }, + {0,0,0,0}, + { + 0/*BitScalar >::call*/, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + } + }; +#else static const func_t funcs[5][4] = { - {BitScalar >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarOr >::call}, + { + BitScalar >::call, + 0, + NppBitwiseC::call, + BitScalar4< bitScalarOr >::call + }, {0,0,0,0}, - {BitScalar >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, + { + BitScalar >::call, + 0, + NppBitwiseC::call, + NppBitwiseC::call + }, {0,0,0,0}, - {BitScalar >::call , 0, NppBitwiseC::call, NppBitwiseC::call} + { + BitScalar >::call, + 0, + NppBitwiseC::call, + NppBitwiseC::call + } }; +#endif const int depth = src.depth(); const int cn = src.channels(); @@ -2431,9 +3317,13 @@ void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Strea CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32S ); CV_Assert( cn == 1 || cn == 3 || cn == 4 ); + const func_t func = funcs[depth][cn - 1]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth][cn - 1](src, sc, dst, StreamAccessor::getStream(stream)); + func(src, sc, dst, StreamAccessor::getStream(stream)); } void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) @@ -2441,14 +3331,56 @@ void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre using namespace arithm; typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[5][4] = + { + { + BitScalar >::call, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + }, + {0,0,0,0}, + { + 0/*BitScalar >::call*/, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + }, + {0,0,0,0}, + { + 0/*BitScalar >::call*/, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + } + }; +#else static const func_t funcs[5][4] = { - {BitScalar >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarXor >::call}, + { + BitScalar >::call, + 0, + NppBitwiseC::call, + BitScalar4< bitScalarXor >::call + }, {0,0,0,0}, - {BitScalar >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, + { + BitScalar >::call, + 0, + NppBitwiseC::call, + NppBitwiseC::call + }, {0,0,0,0}, - {BitScalar >::call , 0, NppBitwiseC::call, NppBitwiseC::call} + { + BitScalar >::call, + 0, + NppBitwiseC::call, + NppBitwiseC::call + } }; +#endif const int depth = src.depth(); const int cn = src.channels(); @@ -2456,9 +3388,13 @@ void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32S ); CV_Assert( cn == 1 || cn == 3 || cn == 4 ); + const func_t func = funcs[depth][cn - 1]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth][cn - 1](src, sc, dst, StreamAccessor::getStream(stream)); + func(src, sc, dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// @@ -2578,6 +3514,19 @@ void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + minMat, + 0/*minMat*/, + 0/*minMat*/, + 0/*minMat*/, + 0/*minMat*/, + minMat, + 0/*minMat*/, + }; +#else static const func_t funcs[] = { minMat, @@ -2586,8 +3535,9 @@ void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s minMat, minMat, minMat, - minMat + minMat, }; +#endif const int depth = src1.depth(); const int cn = src1.channels(); @@ -2657,6 +3607,19 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + maxMat, + 0/*maxMat*/, + 0/*maxMat*/, + 0/*maxMat*/, + 0/*maxMat*/, + maxMat, + 0/*maxMat*/, + }; +#else static const func_t funcs[] = { maxMat, @@ -2665,8 +3628,9 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s maxMat, maxMat, maxMat, - maxMat + maxMat, }; +#endif const int depth = src1.depth(); const int cn = src1.channels(); @@ -2744,6 +3708,31 @@ void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); + typedef double (*cast_func_t)(double sc); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + minScalar, + 0/*minScalar*/, + 0/*minScalar*/, + 0/*minScalar*/, + 0/*minScalar*/, + minScalar, + 0/*minScalar*/, + }; + + static const cast_func_t cast_func[] = + { + castScalar, + 0/*castScalar*/, + 0/*castScalar*/, + 0/*castScalar*/, + 0/*castScalar*/, + castScalar, + 0/*castScalar*/, + }; +#else static const func_t funcs[] = { minScalar, @@ -2752,14 +3741,20 @@ void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream) minScalar, minScalar, minScalar, - minScalar + minScalar, }; - typedef double (*cast_func_t)(double sc); static const cast_func_t cast_func[] = { - castScalar, castScalar, castScalar, castScalar, castScalar, castScalar, castScalar + castScalar, + castScalar, + castScalar, + castScalar, + castScalar, + castScalar, + castScalar, }; +#endif const int depth = src.depth(); @@ -2772,9 +3767,13 @@ void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsNotImplemented, "not available in tiny build"); + dst.create(src.size(), src.type()); - funcs[depth](src, cast_func[depth](val), dst, StreamAccessor::getStream(stream)); + func(src, cast_func[depth](val), dst, StreamAccessor::getStream(stream)); } void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream) @@ -2782,6 +3781,31 @@ void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); + typedef double (*cast_func_t)(double sc); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + maxScalar, + 0/*maxScalar*/, + 0/*maxScalar*/, + 0/*maxScalar*/, + 0/*maxScalar*/, + maxScalar, + 0/*maxScalar*/, + }; + + static const cast_func_t cast_func[] = + { + castScalar, + 0/*castScalar*/, + 0/*castScalar*/, + 0/*castScalar*/, + 0/*castScalar*/, + castScalar, + 0/*castScalar*/ + }; +#else static const func_t funcs[] = { maxScalar, @@ -2790,14 +3814,20 @@ void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream) maxScalar, maxScalar, maxScalar, - maxScalar + maxScalar, }; - typedef double (*cast_func_t)(double sc); static const cast_func_t cast_func[] = { - castScalar, castScalar, castScalar, castScalar, castScalar, castScalar, castScalar + castScalar, + castScalar, + castScalar, + castScalar, + castScalar, + castScalar, + castScalar, }; +#endif const int depth = src.depth(); @@ -2810,9 +3840,13 @@ void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsNotImplemented, "not available in tiny build"); + dst.create(src.size(), src.type()); - funcs[depth](src, cast_func[depth](val), dst, StreamAccessor::getStream(stream)); + func(src, cast_func[depth](val), dst, StreamAccessor::getStream(stream)); } //////////////////////////////////////////////////////////////////////// @@ -2858,6 +3892,18 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double else { typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + arithm::threshold, + 0/*arithm::threshold*/, + 0/*arithm::threshold*/, + 0/*arithm::threshold*/, + 0/*arithm::threshold*/, + arithm::threshold, + 0/*arithm::threshold*/ + }; +#else static const func_t funcs[] = { arithm::threshold, @@ -2868,6 +3914,11 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double arithm::threshold, arithm::threshold }; +#endif + + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsNotImplemented, "not available in tiny build"); if (depth != CV_32F && depth != CV_64F) { @@ -2875,7 +3926,7 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxVal = cvRound(maxVal); } - funcs[depth](src, dst, thresh, maxVal, type, stream); + func(src, dst, thresh, maxVal, type, stream); } return thresh; @@ -2892,6 +3943,18 @@ namespace arithm void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream) { typedef void (*func_t)(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + 0/*arithm::pow*/, + 0/*arithm::pow*/, + 0/*arithm::pow*/, + 0/*arithm::pow*/, + 0/*arithm::pow*/, + arithm::pow, + 0/*arithm::pow*/, + }; +#else static const func_t funcs[] = { arithm::pow, @@ -2902,6 +3965,7 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream) arithm::pow, arithm::pow }; +#endif const int depth = src.depth(); const int cn = src.channels(); @@ -2914,12 +3978,16 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsNotImplemented, "not available in tiny build"); + dst.create(src.size(), src.type()); PtrStepSzb src_(src.rows, src.cols * cn, src.data, src.step); PtrStepSzb dst_(src.rows, src.cols * cn, dst.data, dst.step); - funcs[depth](src_, power, dst_, StreamAccessor::getStream(stream)); + func(src_, power, dst_, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////