added gpu::LUT for CV_8UC3 type, added gpu::cvtColor for BGR2BGR5x5, minor fix in tests.

pull/13383/head
Vladislav Vinogradov 15 years ago
parent 1b8c00000c
commit e1e5047b6e
  1. 53
      modules/gpu/src/arithm.cpp
  2. 456
      modules/gpu/src/cuda/color.cu
  3. 63
      modules/gpu/src/imgproc_gpu.cpp
  4. 1124
      tests/gpu/src/arithm.cpp
  5. 12
      tests/gpu/src/gputest_main.cpp
  6. 1085
      tests/gpu/src/imgproc_gpu.cpp

@ -266,13 +266,13 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
sz.height = src1.rows;
int funcIdx = normType >> 1;
Scalar retVal;
double retVal;
nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), src1.step,
src2.ptr<Npp8u>(), src2.step,
sz, retVal.val) );
sz, &retVal) );
return retVal[0];
return retVal;
}
////////////////////////////////////////////////////////////////////////
@ -307,10 +307,7 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode)
Scalar cv::gpu::sum(const GpuMat& src)
{
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
NppiSize sz;
sz.width = src.cols;
@ -324,7 +321,7 @@ Scalar cv::gpu::sum(const GpuMat& src)
GpuMat buf(1, bufsz, CV_32S);
Scalar res;
nppSafeCall( nppiSum_8u_C1R(src.ptr<Npp8u>(), src.step, sz, buf.ptr<Npp32s>(), res.val) );
nppSafeCall( nppiSum_8u_C1R(src.ptr<Npp8u>(), src.step, sz, buf.ptr<Npp32s>(), res.val) );
return res;
}
else
@ -336,8 +333,6 @@ Scalar cv::gpu::sum(const GpuMat& src)
nppSafeCall( nppiSum_8u_C4R(src.ptr<Npp8u>(), src.step, sz, buf.ptr<Npp32s>(), res.val) );
return res;
}
}
////////////////////////////////////////////////////////////////////////
@ -371,28 +366,54 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst)
{
public:
Npp32s pLevels[256];
const Npp32s* pLevels3[3];
int nValues3[3];
LevelsInit()
{
{
nValues3[0] = nValues3[1] = nValues3[2] = 256;
for (int i = 0; i < 256; ++i)
pLevels[i] = i;
pLevels3[0] = pLevels3[1] = pLevels3[2] = pLevels;
}
};
static LevelsInit lvls;
int cn = src.channels();
CV_Assert(src.type() == CV_8UC1);
CV_Assert(lut.depth() == CV_32SC1 && lut.rows * lut.cols == 256 && lut.isContinuous());
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3);
CV_Assert(lut.depth() == CV_8U && (lut.channels() == 1 || lut.channels() == cn) && lut.rows * lut.cols == 256 && lut.isContinuous());
dst.create(src.size(), src.type());
dst.create(src.size(), CV_MAKETYPE(lut.depth(), cn));
NppiSize sz;
sz.height = src.rows;
sz.width = src.cols;
Mat nppLut;
lut.convertTo(nppLut, CV_32S);
nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz,
lut.ptr<Npp32s>(), lvls.pLevels, 256) );
if (src.type() == CV_8UC1)
{
nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz,
nppLut.ptr<Npp32s>(), lvls.pLevels, 256) );
}
else
{
Mat nppLut3[3];
const Npp32s* pValues3[3];
if (nppLut.channels() == 1)
pValues3[0] = pValues3[1] = pValues3[2] = nppLut.ptr<Npp32s>();
else
{
cv::split(nppLut, nppLut3);
pValues3[0] = nppLut3[0].ptr<Npp32s>();
pValues3[1] = nppLut3[1].ptr<Npp32s>();
pValues3[2] = nppLut3[2].ptr<Npp32s>();
}
nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz,
pValues3, lvls.pLevels3, lvls.nValues3) );
}
}
#endif /* !defined (HAVE_CUDA) */

@ -65,7 +65,7 @@ namespace imgproc
template<> struct TypeVec<float, 3> { typedef float3 vec_t; };
template<> struct TypeVec<float, 4> { typedef float4 vec_t; };
template<typename _Tp> struct ColorChannel {};
template<typename T> struct ColorChannel {};
template<> struct ColorChannel<uchar>
{
@ -86,7 +86,17 @@ namespace imgproc
typedef float worktype_f;
static __device__ float max() { return 1.f; }
static __device__ float half() { return 0.5f; }
};
};
template <typename T>
__device__ void assignAlpha(typename TypeVec<T, 3>::vec_t& vec, T val)
{
}
template <typename T>
__device__ void assignAlpha(typename TypeVec<T, 4>::vec_t& vec, T val)
{
vec.w = val;
}
}
//////////////////////////////////////// SwapChannels /////////////////////////////////////
@ -96,7 +106,7 @@ namespace imgproc
__constant__ int ccoeffs[4];
template <int CN, typename T>
__global__ void swapChannels(const T* src_, size_t src_step, T* dst_, size_t dst_step, int rows, int cols)
__global__ void swapChannels(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)
{
typedef typename TypeVec<T, CN>::vec_t vec_t;
@ -121,8 +131,8 @@ namespace imgproc
namespace cv { namespace gpu { namespace improc
{
template <typename T>
void swapChannels_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, int cn, const int* coeffs, cudaStream_t stream)
template <typename T, int CN>
void swapChannels_caller(const DevMem2D& src, const DevMem2D& dst, const int* coeffs, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
@ -130,39 +140,38 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y);
cudaSafeCall( cudaMemcpyToSymbol(imgproc::ccoeffs, coeffs, cn * sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(imgproc::ccoeffs, coeffs, CN * sizeof(int)) );
switch (cn)
{
case 3:
imgproc::swapChannels<3><<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(T), dst.ptr, dst.step / sizeof(T), src.rows, src.cols);
break;
case 4:
imgproc::swapChannels<4><<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(T), dst.ptr, dst.step / sizeof(T), src.rows, src.cols);
break;
default:
cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
break;
}
imgproc::swapChannels<CN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
}
void swapChannels_gpu(const DevMem2D& src, const DevMem2D& dst, int cn, const int* coeffs, cudaStream_t stream)
void swapChannels_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int cn, const int* coeffs, cudaStream_t stream)
{
swapChannels_caller(src, dst, cn, coeffs, stream);
typedef void (*swapChannels_caller_t)(const DevMem2D& src, const DevMem2D& dst, const int* coeffs, cudaStream_t stream);
static const swapChannels_caller_t swapChannels_callers[] = {swapChannels_caller<uchar, 3>, swapChannels_caller<uchar, 4>};
swapChannels_callers[cn - 3](src, dst, coeffs, stream);
}
void swapChannels_gpu(const DevMem2D_<unsigned short>& src, const DevMem2D_<unsigned short>& dst, int cn, const int* coeffs, cudaStream_t stream)
void swapChannels_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int cn, const int* coeffs, cudaStream_t stream)
{
swapChannels_caller(src, dst, cn, coeffs, stream);
typedef void (*swapChannels_caller_t)(const DevMem2D& src, const DevMem2D& dst, const int* coeffs, cudaStream_t stream);
static const swapChannels_caller_t swapChannels_callers[] = {swapChannels_caller<unsigned short, 3>, swapChannels_caller<unsigned short, 4>};
swapChannels_callers[cn - 3](src, dst, coeffs, stream);
}
void swapChannels_gpu(const DevMem2Df& src, const DevMem2Df& dst, int cn, const int* coeffs, cudaStream_t stream)
void swapChannels_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int cn, const int* coeffs, cudaStream_t stream)
{
swapChannels_caller(src, dst, cn, coeffs, stream);
}
typedef void (*swapChannels_caller_t)(const DevMem2D& src, const DevMem2D& dst, const int* coeffs, cudaStream_t stream);
static const swapChannels_caller_t swapChannels_callers[] = {swapChannels_caller<float, 3>, swapChannels_caller<float, 4>};
swapChannels_callers[cn - 3](src, dst, coeffs, stream);
}
}}}
////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////
@ -170,7 +179,7 @@ namespace cv { namespace gpu { namespace improc
namespace imgproc
{
template <int SRCCN, int DSTCN, typename T>
__global__ void RGB2RGB(const T* src_, size_t src_step, T* dst_, size_t dst_step, int rows, int cols, int bidx)
__global__ void RGB2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)
{
typedef typename TypeVec<T, SRCCN>::vec_t src_t;
typedef typename TypeVec<T, DSTCN>::vec_t dst_t;
@ -186,8 +195,7 @@ namespace imgproc
dst.x = ((const T*)(&src))[bidx];
dst.y = src.y;
dst.z = ((const T*)(&src))[bidx ^ 2];
if (DSTCN == 4)
((T*)(&dst))[3] = ColorChannel<T>::max();
assignAlpha(dst, ColorChannel<T>::max());
*(dst_t*)(dst_ + y * dst_step + x * DSTCN) = dst;
}
@ -196,8 +204,8 @@ namespace imgproc
namespace cv { namespace gpu { namespace improc
{
template <typename T>
void RGB2RGB_caller(const DevMem2D_<T>& src, int srccn, const DevMem2D_<T>& dst, int dstcn, int bidx, cudaStream_t stream)
template <typename T, int SRCCN, int DSTCN>
void RGB2RGB_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
@ -205,171 +213,248 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y);
switch (dstcn)
{
case 3:
switch (srccn)
{
case 3:
{
int coeffs[] = {2, 1, 0};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::ccoeffs, coeffs, 3 * sizeof(int)) );
imgproc::swapChannels<3><<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(T), dst.ptr, dst.step / sizeof(T), src.rows, src.cols);
break;
}
case 4:
imgproc::RGB2RGB<4, 3><<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(T), dst.ptr, dst.step / sizeof(T),
src.rows, src.cols, bidx);
break;
default:
cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
break;
}
break;
case 4:
switch (srccn)
{
case 3:
imgproc::RGB2RGB<3, 4><<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(T), dst.ptr, dst.step / sizeof(T),
src.rows, src.cols, bidx);
break;
case 4:
{
int coeffs[] = {2, 1, 0, 3};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::ccoeffs, coeffs, 4 * sizeof(int)) );
imgproc::swapChannels<4><<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(T), dst.ptr, dst.step / sizeof(T), src.rows, src.cols);
break;
}
default:
cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
break;
}
break;
default:
cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
break;
}
imgproc::RGB2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols, bidx);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
}
void RGB2RGB_gpu(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream)
void RGB2RGB_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream)
{
RGB2RGB_caller(src, srccn, dst, dstcn, bidx, stream);
typedef void (*RGB2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);
static const RGB2RGB_caller_t RGB2RGB_callers[2][2] =
{
{RGB2RGB_caller<uchar, 3, 3>, RGB2RGB_caller<uchar, 3, 4>},
{RGB2RGB_caller<uchar, 4, 3>, RGB2RGB_caller<uchar, 4, 4>}
};
RGB2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
}
void RGB2RGB_gpu(const DevMem2D_<unsigned short>& src, int srccn, const DevMem2D_<unsigned short>& dst, int dstcn, int bidx, cudaStream_t stream)
void RGB2RGB_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream)
{
RGB2RGB_caller(src, srccn, dst, dstcn, bidx, stream);
typedef void (*RGB2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);
static const RGB2RGB_caller_t RGB2RGB_callers[2][2] =
{
{RGB2RGB_caller<unsigned short, 3, 3>, RGB2RGB_caller<unsigned short, 3, 4>},
{RGB2RGB_caller<unsigned short, 4, 3>, RGB2RGB_caller<unsigned short, 4, 4>}
};
RGB2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
}
void RGB2RGB_gpu(const DevMem2Df& src, int srccn, const DevMem2Df& dst, int dstcn, int bidx, cudaStream_t stream)
void RGB2RGB_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream)
{
RGB2RGB_caller(src, srccn, dst, dstcn, bidx, stream);
typedef void (*RGB2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);
static const RGB2RGB_caller_t RGB2RGB_callers[2][2] =
{
{RGB2RGB_caller<float, 3, 3>, RGB2RGB_caller<float, 3, 4>},
{RGB2RGB_caller<float, 4, 3>, RGB2RGB_caller<float, 4, 4>}
};
RGB2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
}
}}}
/////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB //////////
//namespace imgproc
//{
// struct RGB5x52RGB
// {
// typedef uchar channel_type;
//
// RGB5x52RGB(int _dstcn, int _blueIdx, int _greenBits)
// : dstcn(_dstcn), blueIdx(_blueIdx), greenBits(_greenBits) {}
//
// void operator()(const uchar* src, uchar* dst, int n) const
// {
// int dcn = dstcn, bidx = blueIdx;
// if( greenBits == 6 )
// for( int i = 0; i < n; i++, dst += dcn )
// {
// unsigned t = ((const unsigned short*)src)[i];
// dst[bidx] = (uchar)(t << 3);
// dst[1] = (uchar)((t >> 3) & ~3);
// dst[bidx ^ 2] = (uchar)((t >> 8) & ~7);
// if( dcn == 4 )
// dst[3] = 255;
// }
// else
// for( int i = 0; i < n; i++, dst += dcn )
// {
// unsigned t = ((const unsigned short*)src)[i];
// dst[bidx] = (uchar)(t << 3);
// dst[1] = (uchar)((t >> 2) & ~7);
// dst[bidx ^ 2] = (uchar)((t >> 7) & ~7);
// if( dcn == 4 )
// dst[3] = t & 0x8000 ? 255 : 0;
// }
// }
//
// int dstcn, blueIdx, greenBits;
// };
//
//
// struct RGB2RGB5x5
// {
// typedef uchar channel_type;
//
// RGB2RGB5x5(int _srccn, int _blueIdx, int _greenBits)
// : srccn(_srccn), blueIdx(_blueIdx), greenBits(_greenBits) {}
//
// void operator()(const uchar* src, uchar* dst, int n) const
// {
// int scn = srccn, bidx = blueIdx;
// if( greenBits == 6 )
// for( int i = 0; i < n; i++, src += scn )
// {
// ((unsigned short*)dst)[i] = (unsigned short)((src[bidx] >> 3)|((src[1]&~3) << 3)|((src[bidx^2]&~7) << 8));
// }
// else if( scn == 3 )
// for( int i = 0; i < n; i++, src += 3 )
// {
// ((unsigned short*)dst)[i] = (unsigned short)((src[bidx] >> 3)|((src[1]&~7) << 2)|((src[bidx^2]&~7) << 7));
// }
// else
// for( int i = 0; i < n; i++, src += 4 )
// {
// ((unsigned short*)dst)[i] = (unsigned short)((src[bidx] >> 3)|((src[1]&~7) << 2)|
// ((src[bidx^2]&~7) << 7)|(src[3] ? 0x8000 : 0));
// }
// }
//
// int srccn, blueIdx, greenBits;
// };
//}
//
//namespace cv { namespace gpu { namespace impl
//{
//}}}
///////////////////////////////// Grayscale to Color ////////////////////////////////
namespace imgproc
{
template <typename T>
__global__ void Gray2RGB_3(const T* src_, size_t src_step, T* dst_, size_t dst_step, int rows, int cols)
template <int GREEN_BITS, int DSTCN> struct RGB5x52RGBConverter {};
template <int DSTCN> struct RGB5x52RGBConverter<5, DSTCN>
{
typedef typename TypeVec<uchar, DSTCN>::vec_t dst_t;
static __device__ dst_t cvt(unsigned int src, int bidx)
{
dst_t dst;
((uchar*)(&dst))[bidx] = (uchar)(src << 3);
dst.y = (uchar)((src >> 2) & ~7);
((uchar*)(&dst))[bidx ^ 2] = (uchar)((src >> 7) & ~7);
assignAlpha(dst, (uchar)(src & 0x8000 ? 255 : 0));
return dst;
}
};
template <int DSTCN> struct RGB5x52RGBConverter<6, DSTCN>
{
typedef typename TypeVec<uchar, DSTCN>::vec_t dst_t;
static __device__ dst_t cvt(unsigned int src, int bidx)
{
dst_t dst;
((uchar*)(&dst))[bidx] = (uchar)(src << 3);
dst.y = (uchar)((src >> 3) & ~3);
((uchar*)(&dst))[bidx ^ 2] = (uchar)((src >> 8) & ~7);
assignAlpha(dst, (uchar)(255));
return dst;
}
};
template <int GREEN_BITS, int DSTCN>
__global__ void RGB5x52RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)
{
typedef typename TypeVec<uchar, DSTCN>::vec_t dst_t;
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (y < rows && x < cols)
{
T src = src_[y * src_step + x];
T* dst = dst_ + y * dst_step + x * 3;
dst[0] = src;
dst[1] = src;
dst[2] = src;
unsigned int src = *(const unsigned short*)(src_ + y * src_step + (x << 1));
*(dst_t*)(dst_ + y * dst_step + x * DSTCN) = RGB5x52RGBConverter<GREEN_BITS, DSTCN>::cvt(src, bidx);
}
}
template <typename T>
__global__ void Gray2RGB_4(const T* src_, size_t src_step, T* dst_, size_t dst_step, int rows, int cols)
/*struct RGB5x52RGB
{
typedef uchar channel_type;
RGB5x52RGB(int _dstcn, int _blueIdx, int _greenBits)
: dstcn(_dstcn), blueIdx(_blueIdx), greenBits(_greenBits) {}
void operator()(const uchar* src, uchar* dst, int n) const
{
int dcn = dstcn, bidx = blueIdx;
if( greenBits == 6 )
for( int i = 0; i < n; i++, dst += dcn )
{
unsigned t = ((const unsigned short*)src)[i];
dst[bidx] = (uchar)(t << 3);
dst[1] = (uchar)((t >> 3) & ~3);
dst[bidx ^ 2] = (uchar)((t >> 8) & ~7);
if( dcn == 4 )
dst[3] = 255;
}
else
for( int i = 0; i < n; i++, dst += dcn )
{
unsigned t = ((const unsigned short*)src)[i];
dst[bidx] = (uchar)(t << 3);
dst[1] = (uchar)((t >> 2) & ~7);
dst[bidx ^ 2] = (uchar)((t >> 7) & ~7);
if( dcn == 4 )
dst[3] = t & 0x8000 ? 255 : 0;
}
}
int dstcn, blueIdx, greenBits;
};*/
template <int SRCCN, int GREEN_BITS> struct RGB2RGB5x5Converter {};
template<int SRCCN> struct RGB2RGB5x5Converter<SRCCN, 6>
{
static __device__ unsigned short cvt(const uchar* src_ptr, int bidx)
{
return (unsigned short)((src_ptr[bidx] >> 3) | ((src_ptr[1] & ~3) << 3) | ((src_ptr[bidx^2] & ~7) << 8));
}
};
template<> struct RGB2RGB5x5Converter<3, 5>
{
static __device__ unsigned short cvt(const uchar* src_ptr, int bidx)
{
return (unsigned short)((src_ptr[bidx] >> 3) | ((src_ptr[1] & ~7) << 2) | ((src_ptr[bidx^2] & ~7) << 7));
}
};
template<> struct RGB2RGB5x5Converter<4, 5>
{
typedef typename TypeVec<T, 4>::vec_t vec4_t;
static __device__ unsigned short cvt(const uchar* src_ptr, int bidx)
{
return (unsigned short)((src_ptr[bidx] >> 3) | ((src_ptr[1] & ~7) << 2) | ((src_ptr[bidx^2] & ~7) << 7)|(src_ptr[3] ? 0x8000 : 0));
}
};
template<int SRCCN, int GREEN_BITS>
__global__ void RGB2RGB5x5(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)
{
typedef typename TypeVec<uchar, SRCCN>::vec_t src_t;
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (y < rows && x < cols)
{
src_t src = *(src_t*)(src_ + y * src_step + x * SRCCN);
*(unsigned short*)(dst_ + y * dst_step + (x << 1)) = RGB2RGB5x5Converter<SRCCN, GREEN_BITS>::cvt((const uchar*)(&src), bidx);
}
}
}
namespace cv { namespace gpu { namespace improc
{
template <int GREEN_BITS, int DSTCN>
void RGB5x52RGB_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y);
imgproc::RGB5x52RGB<GREEN_BITS, DSTCN><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols, bidx);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
}
void RGB5x52RGB_gpu(const DevMem2D& src, int green_bits, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream)
{
typedef void (*RGB5x52RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);
static const RGB5x52RGB_caller_t RGB5x52RGB_callers[2][2] =
{
{RGB5x52RGB_caller<5, 3>, RGB5x52RGB_caller<5, 4>},
{RGB5x52RGB_caller<6, 3>, RGB5x52RGB_caller<6, 4>}
};
RGB5x52RGB_callers[green_bits - 5][dstcn - 5](src, dst, bidx, stream);
}
template <int SRCCN, int GREEN_BITS>
void RGB2RGB5x5_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y);
imgproc::RGB2RGB5x5<SRCCN, GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols, bidx);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
}
void RGB2RGB5x5_gpu(const DevMem2D& src, int srccn, const DevMem2D& dst, int green_bits, int bidx, cudaStream_t stream)
{
typedef void (*RGB2RGB5x5_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);
static const RGB2RGB5x5_caller_t RGB2RGB5x5_callers[2][2] =
{
{RGB2RGB5x5_caller<3, 5>, RGB2RGB5x5_caller<3, 6>},
{RGB2RGB5x5_caller<4, 5>, RGB2RGB5x5_caller<4, 6>}
};
RGB2RGB5x5_callers[srccn - 3][green_bits - 5](src, dst, bidx, stream);
}
}}}
///////////////////////////////// Grayscale to Color ////////////////////////////////
namespace imgproc
{
template <int DSTCN, typename T>
__global__ void Gray2RGB(const T* src_, size_t src_step, T* dst_, size_t dst_step, int rows, int cols)
{
typedef typename TypeVec<T, DSTCN>::vec_t dst_t;
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
@ -377,12 +462,12 @@ namespace imgproc
if (y < rows && x < cols)
{
T src = src_[y * src_step + x];
vec4_t dst;
dst_t dst;
dst.x = src;
dst.y = src;
dst.z = src;
dst.w = ColorChannel<T>::max();
*(vec4_t*)(dst_ + y * dst_step + (x << 2)) = dst;
assignAlpha(dst, ColorChannel<T>::max());
*(dst_t*)(dst_ + y * dst_step + x * DSTCN) = dst;
}
}
@ -412,8 +497,8 @@ namespace imgproc
namespace cv { namespace gpu { namespace improc
{
template <typename T>
void Gray2RGB_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, int dstcn, cudaStream_t stream)
template <typename T, int DSTCN>
void Gray2RGB_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
@ -421,18 +506,8 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y);
switch (dstcn)
{
case 3:
imgproc::Gray2RGB_3<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(T), dst.ptr, dst.step / sizeof(T), src.rows, src.cols);
break;
case 4:
imgproc::Gray2RGB_4<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(T), dst.ptr, dst.step / sizeof(T), src.rows, src.cols);
break;
default:
cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
break;
}
imgproc::Gray2RGB<DSTCN><<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(T),
dst.ptr, dst.step / sizeof(T), src.rows, src.cols);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
@ -440,17 +515,26 @@ namespace cv { namespace gpu { namespace improc
void Gray2RGB_gpu(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream)
{
Gray2RGB_caller(src, dst, dstcn, stream);
typedef void (*Gray2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
static const Gray2RGB_caller_t Gray2RGB_callers[] = {Gray2RGB_caller<uchar, 3>, Gray2RGB_caller<uchar, 4>};
Gray2RGB_callers[dstcn - 3](src, dst, stream);
}
void Gray2RGB_gpu(const DevMem2D_<unsigned short>& src, const DevMem2D_<unsigned short>& dst, int dstcn, cudaStream_t stream)
{
Gray2RGB_caller(src, dst, dstcn, stream);
typedef void (*Gray2RGB_caller_t)(const DevMem2D_<unsigned short>& src, const DevMem2D_<unsigned short>& dst, cudaStream_t stream);
static const Gray2RGB_caller_t Gray2RGB_callers[] = {Gray2RGB_caller<unsigned short, 3>, Gray2RGB_caller<unsigned short, 4>};
Gray2RGB_callers[dstcn - 3](src, dst, stream);
}
void Gray2RGB_gpu(const DevMem2Df& src, const DevMem2Df& dst, int dstcn, cudaStream_t stream)
{
Gray2RGB_caller(src, dst, dstcn, stream);
typedef void (*Gray2RGB_caller_t)(const DevMem2Df& src, const DevMem2Df& dst, cudaStream_t stream);
static const Gray2RGB_caller_t Gray2RGB_callers[] = {Gray2RGB_caller<float, 3>, Gray2RGB_caller<float, 4>};
Gray2RGB_callers[dstcn - 3](src, dst, stream);
}
}}}

@ -81,13 +81,16 @@ namespace cv { namespace gpu
void reprojectImageTo3D_gpu(const DevMem2D& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);
void reprojectImageTo3D_gpu(const DevMem2D_<short>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);
void swapChannels_gpu(const DevMem2D& src, const DevMem2D& dst, int cn, const int* coeffs, cudaStream_t stream);
void swapChannels_gpu(const DevMem2D_<ushort>& src, const DevMem2D_<ushort>& dst, int cn, const int* coeffs, cudaStream_t stream);
void swapChannels_gpu(const DevMem2Df& src, const DevMem2Df& dst, int cn, const int* coeffs, cudaStream_t stream);
void swapChannels_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int cn, const int* coeffs, cudaStream_t stream);
void swapChannels_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int cn, const int* coeffs, cudaStream_t stream);
void swapChannels_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int cn, const int* coeffs, cudaStream_t stream);
void RGB2RGB_gpu(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream);
void RGB2RGB_gpu(const DevMem2D_<ushort>& src, int srccn, const DevMem2D_<ushort>& dst, int dstcn, int bidx, cudaStream_t stream);
void RGB2RGB_gpu(const DevMem2Df& src, int srccn, const DevMem2Df& dst, int dstcn, int bidx, cudaStream_t stream);
void RGB2RGB_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream);
void RGB2RGB_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream);
void RGB2RGB_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream);
void RGB5x52RGB_gpu(const DevMem2D& src, int green_bits, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream);
void RGB2RGB5x5_gpu(const DevMem2D& src, int srccn, const DevMem2D& dst, int green_bits, int bidx, cudaStream_t stream);
void Gray2RGB_gpu(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream);
void Gray2RGB_gpu(const DevMem2D_<ushort>& src, const DevMem2D_<ushort>& dst, int dstcn, cudaStream_t stream);
@ -245,38 +248,36 @@ namespace
out.create(sz, CV_MAKETYPE(depth, dcn));
if( depth == CV_8U )
improc::RGB2RGB_gpu((DevMem2D)src, scn, (DevMem2D)out, dcn, bidx, stream);
improc::RGB2RGB_gpu_8u(src, scn, out, dcn, bidx, stream);
else if( depth == CV_16U )
improc::RGB2RGB_gpu((DevMem2D_<unsigned short>)src, scn, (DevMem2D_<unsigned short>)out, dcn, bidx, stream);
improc::RGB2RGB_gpu_16u(src, scn, out, dcn, bidx, stream);
else
improc::RGB2RGB_gpu((DevMem2Df)src, scn, (DevMem2Df)out, dcn, bidx, stream);
improc::RGB2RGB_gpu_32f(src, scn, out, dcn, bidx, stream);
break;
//case CV_BGR2BGR565: case CV_BGR2BGR555: case CV_RGB2BGR565: case CV_RGB2BGR555:
//case CV_BGRA2BGR565: case CV_BGRA2BGR555: case CV_RGBA2BGR565: case CV_RGBA2BGR555:
// CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U );
// dst.create(sz, CV_8UC2);
//
// CvtColorLoop(src, dst, RGB2RGB5x5(scn,
// code == CV_BGR2BGR565 || code == CV_BGR2BGR555 ||
// code == CV_BGRA2BGR565 || code == CV_BGRA2BGR555 ? 0 : 2,
// code == CV_BGR2BGR565 || code == CV_RGB2BGR565 ||
// code == CV_BGRA2BGR565 || code == CV_RGBA2BGR565 ? 6 : 5 // green bits
// ));
// break;
case CV_BGR2BGR565: case CV_BGR2BGR555: case CV_RGB2BGR565: case CV_RGB2BGR555:
case CV_BGRA2BGR565: case CV_BGRA2BGR555: case CV_RGBA2BGR565: case CV_RGBA2BGR555:
CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U );
out.create(sz, CV_8UC2);
improc::RGB2RGB5x5_gpu(src, scn, out, code == CV_BGR2BGR565 || code == CV_RGB2BGR565 ||
code == CV_BGRA2BGR565 || code == CV_RGBA2BGR565 ? 6 : 5,
code == CV_BGR2BGR565 || code == CV_BGR2BGR555 ||
code == CV_BGRA2BGR565 || code == CV_BGRA2BGR555 ? 0 : 2,
stream);
break;
//case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB:
//case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA:
// if(dcn <= 0) dcn = 3;
// CV_Assert( (dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U );
// dst.create(sz, CV_MAKETYPE(depth, dcn));
//
// CvtColorLoop(src, dst, RGB5x52RGB(dcn,
// code == CV_BGR5652BGR || code == CV_BGR5552BGR ||
// code == CV_BGR5652BGRA || code == CV_BGR5552BGRA ? 0 : 2, // blue idx
// code == CV_BGR5652BGR || code == CV_BGR5652RGB ||
// code == CV_BGR5652BGRA || code == CV_BGR5652RGBA ? 6 : 5 // green bits
// ));
// out.create(sz, CV_MAKETYPE(depth, dcn));
// improc::RGB5x52RGB_gpu(src, code == CV_BGR2BGR565 || code == CV_RGB2BGR565 ||
// code == CV_BGRA2BGR565 || code == CV_RGBA2BGR565 ? 6 : 5, out, dcn,
// code == CV_BGR2BGR565 || code == CV_BGR2BGR555 ||
// code == CV_BGRA2BGR565 || code == CV_BGRA2BGR555 ? 0 : 2,
// stream);
// break;
case CV_BGR2GRAY: case CV_BGRA2GRAY: case CV_RGB2GRAY: case CV_RGBA2GRAY:
@ -329,7 +330,7 @@ namespace
nppSafeCall( nppiRGBToYCbCr_8u_C3R(src.ptr<Npp8u>(), src.step, out.ptr<Npp8u>(), out.step, nppsz) );
{
static int coeffs[] = {0, 2, 1};
improc::swapChannels_gpu((DevMem2D)out, (DevMem2D)out, 3, coeffs, 0);
improc::swapChannels_gpu_8u(out, out, 3, coeffs, 0);
}
break;
@ -341,7 +342,7 @@ namespace
{
static int coeffs[] = {0, 2, 1};
GpuMat src1(src.size(), src.type());
improc::swapChannels_gpu((DevMem2D)src, (DevMem2D)src1, 3, coeffs, 0);
improc::swapChannels_gpu_8u(src, src1, 3, coeffs, 0);
nppSafeCall( nppiYCbCrToRGB_8u_C3R(src1.ptr<Npp8u>(), src1.step, out.ptr<Npp8u>(), out.step, nppsz) );
}
break;

File diff suppressed because it is too large Load Diff

@ -46,6 +46,18 @@ CvTS test_system;
const char* blacklist[] =
{
"GPU-NppImageSum",
"GPU-MatOperatorAsyncCall",
//"GPU-NppErode",
//"GPU-NppDilate",
//"GPU-NppMorphologyEx",
//"GPU-NppImageDivide",
//"GPU-NppImageMeanStdDev",
//"GPU-NppImageMinNax",
//"GPU-NppImageResize",
//"GPU-NppImageWarpAffine",
//"GPU-NppImageWarpPerspective",
//"GPU-NppImageIntegral",
//"GPU-NppImageBlur",
0
};

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save