diff --git a/modules/gpu/src/arithm.cpp b/modules/gpu/src/arithm.cpp index 7e0aaab680..573f7c574a 100644 --- a/modules/gpu/src/arithm.cpp +++ b/modules/gpu/src/arithm.cpp @@ -319,40 +319,14 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& stream) void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s) { - class LevelsInit - { - public: - Npp32s pLevels[256]; - const Npp32s* pLevels3[3]; - int nValues3[3]; - -#if (CUDA_VERSION > 4020) - GpuMat d_pLevels; -#endif - - LevelsInit() - { - nValues3[0] = nValues3[1] = nValues3[2] = 256; - for (int i = 0; i < 256; ++i) - pLevels[i] = i; - + const int cn = src.channels(); -#if (CUDA_VERSION <= 4020) - pLevels3[0] = pLevels3[1] = pLevels3[2] = pLevels; -#else - d_pLevels.upload(Mat(1, 256, CV_32S, pLevels)); - pLevels3[0] = pLevels3[1] = pLevels3[2] = d_pLevels.ptr(); -#endif - } - }; - static LevelsInit lvls; + CV_Assert( src.type() == CV_8UC1 || src.type() == CV_8UC3 ); + CV_Assert( lut.depth() == CV_8U ); + CV_Assert( lut.channels() == 1 || lut.channels() == cn ); + CV_Assert( lut.rows * lut.cols == 256 && lut.isContinuous() ); - int cn = src.channels(); - - CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3); - CV_Assert(lut.depth() == CV_8U && (lut.channels() == 1 || lut.channels() == cn) && lut.rows * lut.cols == 256 && lut.isContinuous()); - - dst.create(src.size(), CV_MAKETYPE(lut.depth(), cn)); + dst.create(src.size(), CV_MAKE_TYPE(lut.depth(), cn)); NppiSize sz; sz.height = src.rows; @@ -361,19 +335,34 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s) Mat nppLut; lut.convertTo(nppLut, CV_32S); - cudaStream_t stream = StreamAccessor::getStream(s); + int nValues3[] = {256, 256, 256}; + + Npp32s pLevels[256]; + for (int i = 0; i < 256; ++i) + pLevels[i] = i; + + const Npp32s* pLevels3[3]; +#if (CUDA_VERSION <= 4020) + pLevels3[0] = pLevels3[1] = pLevels3[2] = pLevels; +#else + GpuMat d_pLevels; + d_pLevels.upload(Mat(1, 256, CV_32S, pLevels)); + pLevels3[0] = pLevels3[1] = pLevels3[2] = d_pLevels.ptr(); +#endif + + cudaStream_t stream = StreamAccessor::getStream(s); NppStreamHandler h(stream); if (src.type() == CV_8UC1) { #if (CUDA_VERSION <= 4020) nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr(), static_cast(src.step), - dst.ptr(), static_cast(dst.step), sz, nppLut.ptr(), lvls.pLevels, 256) ); + dst.ptr(), static_cast(dst.step), sz, nppLut.ptr(), pLevels, 256) ); #else GpuMat d_nppLut(Mat(1, 256, CV_32S, nppLut.data)); nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr(), static_cast(src.step), - dst.ptr(), static_cast(dst.step), sz, d_nppLut.ptr(), lvls.d_pLevels.ptr(), 256) ); + dst.ptr(), static_cast(dst.step), sz, d_nppLut.ptr(), d_pLevels.ptr(), 256) ); #endif } else @@ -410,7 +399,7 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s) } nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr(), static_cast(src.step), - dst.ptr(), static_cast(dst.step), sz, pValues3, lvls.pLevels3, lvls.nValues3) ); + dst.ptr(), static_cast(dst.step), sz, pValues3, pLevels3, nValues3) ); } if (stream == 0)