|
|
|
@ -65,7 +65,7 @@ namespace imgproc |
|
|
|
|
static __device__ unsigned char half() { return (unsigned char)(max()/2 + 1); } |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template<> struct ColorChannel<ushort> |
|
|
|
|
template<> struct ColorChannel<unsigned short> |
|
|
|
|
{ |
|
|
|
|
typedef float worktype_f; |
|
|
|
|
typedef ushort3 vec3_t; |
|
|
|
@ -226,7 +226,7 @@ namespace cv { namespace gpu { namespace impl |
|
|
|
|
RGB2RGB_caller(src, srccn, dst, dstcn, bidx, stream); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void RGB2RGB_gpu(const DevMem2D_<ushort>& src, int srccn, const DevMem2D_<ushort>& dst, int dstcn, int bidx, cudaStream_t stream) |
|
|
|
|
void RGB2RGB_gpu(const DevMem2D_<unsigned short>& src, int srccn, const DevMem2D_<unsigned short>& dst, int dstcn, int bidx, cudaStream_t stream) |
|
|
|
|
{ |
|
|
|
|
RGB2RGB_caller(src, srccn, dst, dstcn, bidx, stream); |
|
|
|
|
} |
|
|
|
@ -254,7 +254,7 @@ namespace cv { namespace gpu { namespace impl |
|
|
|
|
// if( greenBits == 6 ) |
|
|
|
|
// for( int i = 0; i < n; i++, dst += dcn ) |
|
|
|
|
// { |
|
|
|
|
// unsigned t = ((const ushort*)src)[i]; |
|
|
|
|
// unsigned t = ((const unsigned short*)src)[i]; |
|
|
|
|
// dst[bidx] = (uchar)(t << 3); |
|
|
|
|
// dst[1] = (uchar)((t >> 3) & ~3); |
|
|
|
|
// dst[bidx ^ 2] = (uchar)((t >> 8) & ~7); |
|
|
|
@ -264,7 +264,7 @@ namespace cv { namespace gpu { namespace impl |
|
|
|
|
// else |
|
|
|
|
// for( int i = 0; i < n; i++, dst += dcn ) |
|
|
|
|
// { |
|
|
|
|
// unsigned t = ((const ushort*)src)[i]; |
|
|
|
|
// unsigned t = ((const unsigned short*)src)[i]; |
|
|
|
|
// dst[bidx] = (uchar)(t << 3); |
|
|
|
|
// dst[1] = (uchar)((t >> 2) & ~7); |
|
|
|
|
// dst[bidx ^ 2] = (uchar)((t >> 7) & ~7); |
|
|
|
@ -290,17 +290,17 @@ namespace cv { namespace gpu { namespace impl |
|
|
|
|
// if( greenBits == 6 ) |
|
|
|
|
// for( int i = 0; i < n; i++, src += scn ) |
|
|
|
|
// { |
|
|
|
|
// ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~3) << 3)|((src[bidx^2]&~7) << 8)); |
|
|
|
|
// ((unsigned short*)dst)[i] = (unsigned short)((src[bidx] >> 3)|((src[1]&~3) << 3)|((src[bidx^2]&~7) << 8)); |
|
|
|
|
// } |
|
|
|
|
// else if( scn == 3 ) |
|
|
|
|
// for( int i = 0; i < n; i++, src += 3 ) |
|
|
|
|
// { |
|
|
|
|
// ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~7) << 2)|((src[bidx^2]&~7) << 7)); |
|
|
|
|
// ((unsigned short*)dst)[i] = (unsigned short)((src[bidx] >> 3)|((src[1]&~7) << 2)|((src[bidx^2]&~7) << 7)); |
|
|
|
|
// } |
|
|
|
|
// else |
|
|
|
|
// for( int i = 0; i < n; i++, src += 4 ) |
|
|
|
|
// { |
|
|
|
|
// ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~7) << 2)| |
|
|
|
|
// ((unsigned short*)dst)[i] = (unsigned short)((src[bidx] >> 3)|((src[1]&~7) << 2)| |
|
|
|
|
// ((src[bidx^2]&~7) << 7)|(src[3] ? 0x8000 : 0)); |
|
|
|
|
// } |
|
|
|
|
// } |
|
|
|
@ -364,13 +364,13 @@ namespace imgproc |
|
|
|
|
// for( int i = 0; i < n; i++ ) |
|
|
|
|
// { |
|
|
|
|
// int t = src[i]; |
|
|
|
|
// ((ushort*)dst)[i] = (ushort)((t >> 3)|((t & ~3) << 3)|((t & ~7) << 8)); |
|
|
|
|
// ((unsigned short*)dst)[i] = (unsigned short)((t >> 3)|((t & ~3) << 3)|((t & ~7) << 8)); |
|
|
|
|
// } |
|
|
|
|
// else |
|
|
|
|
// for( int i = 0; i < n; i++ ) |
|
|
|
|
// { |
|
|
|
|
// int t = src[i] >> 3; |
|
|
|
|
// ((ushort*)dst)[i] = (ushort)(t|(t << 5)|(t << 10)); |
|
|
|
|
// ((unsigned short*)dst)[i] = (unsigned short)(t|(t << 5)|(t << 10)); |
|
|
|
|
// } |
|
|
|
|
// } |
|
|
|
|
// int greenBits; |
|
|
|
@ -410,7 +410,7 @@ namespace cv { namespace gpu { namespace impl |
|
|
|
|
Gray2RGB_caller(src, dst, dstcn, stream); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void Gray2RGB_gpu(const DevMem2D_<ushort>& src, const DevMem2D_<ushort>& dst, int dstcn, cudaStream_t stream) |
|
|
|
|
void Gray2RGB_gpu(const DevMem2D_<unsigned short>& src, const DevMem2D_<unsigned short>& dst, int dstcn, cudaStream_t stream) |
|
|
|
|
{ |
|
|
|
|
Gray2RGB_caller(src, dst, dstcn, stream); |
|
|
|
|
} |
|
|
|
@ -449,7 +449,7 @@ namespace imgproc |
|
|
|
|
// if( greenBits == 6 ) |
|
|
|
|
// for( int i = 0; i < n; i++ ) |
|
|
|
|
// { |
|
|
|
|
// int t = ((ushort*)src)[i]; |
|
|
|
|
// int t = ((unsigned short*)src)[i]; |
|
|
|
|
// dst[i] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y + |
|
|
|
|
// ((t >> 3) & 0xfc)*G2Y + |
|
|
|
|
// ((t >> 8) & 0xf8)*R2Y, yuv_shift); |
|
|
|
@ -457,7 +457,7 @@ namespace imgproc |
|
|
|
|
// else |
|
|
|
|
// for( int i = 0; i < n; i++ ) |
|
|
|
|
// { |
|
|
|
|
// int t = ((ushort*)src)[i]; |
|
|
|
|
// int t = ((unsigned short*)src)[i]; |
|
|
|
|
// dst[i] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y + |
|
|
|
|
// ((t >> 2) & 0xf8)*G2Y + |
|
|
|
|
// ((t >> 7) & 0xf8)*R2Y, yuv_shift); |
|
|
|
@ -501,7 +501,7 @@ namespace imgproc |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__global__ void RGB2Gray_3(const ushort* src_, size_t src_step, ushort* dst_, size_t dst_step, int rows, int cols, int bidx) |
|
|
|
|
__global__ void RGB2Gray_3(const unsigned short* src_, size_t src_step, unsigned short* dst_, size_t dst_step, int rows, int cols, int bidx) |
|
|
|
|
{ |
|
|
|
|
const int cr = 4899; |
|
|
|
|
const int cg = 9617; |
|
|
|
@ -513,16 +513,16 @@ namespace imgproc |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
{ |
|
|
|
|
const ushort* src = src_ + y * src_step + x * 3; |
|
|
|
|
const unsigned short* src = src_ + y * src_step + x * 3; |
|
|
|
|
|
|
|
|
|
ushort t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2]; |
|
|
|
|
unsigned short t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2]; |
|
|
|
|
|
|
|
|
|
ushort2 dst; |
|
|
|
|
dst.x = (ushort)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift); |
|
|
|
|
dst.x = (unsigned short)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift); |
|
|
|
|
|
|
|
|
|
src += 3; |
|
|
|
|
t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2]; |
|
|
|
|
dst.y = (ushort)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift); |
|
|
|
|
dst.y = (unsigned short)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift); |
|
|
|
|
|
|
|
|
|
*(ushort2*)(dst_ + y * dst_step + x) = dst; |
|
|
|
|
} |
|
|
|
@ -581,7 +581,7 @@ namespace imgproc |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__global__ void RGB2Gray_4(const ushort* src_, size_t src_step, ushort* dst_, size_t dst_step, int rows, int cols, int bidx) |
|
|
|
|
__global__ void RGB2Gray_4(const unsigned short* src_, size_t src_step, unsigned short* dst_, size_t dst_step, int rows, int cols, int bidx) |
|
|
|
|
{ |
|
|
|
|
const int cr = 4899; |
|
|
|
|
const int cg = 9617; |
|
|
|
@ -595,14 +595,14 @@ namespace imgproc |
|
|
|
|
{ |
|
|
|
|
ushort4 src = *(ushort4*)(src_ + y * src_step + (x << 2)); |
|
|
|
|
|
|
|
|
|
ushort t0 = ((ushort*)(&src))[bidx], t1 = src.y, t2 = ((ushort*)(&src))[bidx ^ 2]; |
|
|
|
|
unsigned short t0 = ((unsigned short*)(&src))[bidx], t1 = src.y, t2 = ((unsigned short*)(&src))[bidx ^ 2]; |
|
|
|
|
|
|
|
|
|
ushort2 dst; |
|
|
|
|
dst.x = (ushort)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift); |
|
|
|
|
dst.x = (unsigned short)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift); |
|
|
|
|
|
|
|
|
|
src = *(ushort4*)(src_ + y * src_step + (x << 2) + 4); |
|
|
|
|
t0 = ((ushort*)(&src))[bidx], t1 = src.y, t2 = ((ushort*)(&src))[bidx ^ 2]; |
|
|
|
|
dst.y = (ushort)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift); |
|
|
|
|
t0 = ((unsigned short*)(&src))[bidx], t1 = src.y, t2 = ((unsigned short*)(&src))[bidx ^ 2]; |
|
|
|
|
dst.y = (unsigned short)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift); |
|
|
|
|
|
|
|
|
|
*(ushort2*)(dst_ + y * dst_step + x) = dst; |
|
|
|
|
} |
|
|
|
@ -654,7 +654,7 @@ namespace cv { namespace gpu { namespace impl |
|
|
|
|
cudaSafeCall( cudaThreadSynchronize() ); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void RGB2Gray_gpu(const DevMem2D_<ushort>& src, int srccn, const DevMem2D_<ushort>& dst, int bidx, cudaStream_t stream) |
|
|
|
|
void RGB2Gray_gpu(const DevMem2D_<unsigned short>& src, int srccn, const DevMem2D_<unsigned short>& dst, int bidx, cudaStream_t stream) |
|
|
|
|
{ |
|
|
|
|
dim3 threads(32, 8, 1); |
|
|
|
|
dim3 grid(1, 1, 1); |
|
|
|
@ -665,10 +665,10 @@ namespace cv { namespace gpu { namespace impl |
|
|
|
|
switch (srccn) |
|
|
|
|
{ |
|
|
|
|
case 3: |
|
|
|
|
imgproc::RGB2Gray_3<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(ushort), dst.ptr, dst.step / sizeof(ushort), src.rows, src.cols, bidx); |
|
|
|
|
imgproc::RGB2Gray_3<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(unsigned short), dst.ptr, dst.step / sizeof(unsigned short), src.rows, src.cols, bidx); |
|
|
|
|
break; |
|
|
|
|
case 4: |
|
|
|
|
imgproc::RGB2Gray_4<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(ushort), dst.ptr, dst.step / sizeof(ushort), src.rows, src.cols, bidx); |
|
|
|
|
imgproc::RGB2Gray_4<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(unsigned short), dst.ptr, dst.step / sizeof(unsigned short), src.rows, src.cols, bidx); |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
cv::gpu::error("Unsupported channels count", __FILE__, __LINE__); |
|
|
|
@ -1472,13 +1472,13 @@ namespace cv { namespace gpu { namespace impl |
|
|
|
|
//static float sRGBGammaTab[GAMMA_TAB_SIZE*4], sRGBInvGammaTab[GAMMA_TAB_SIZE*4]; |
|
|
|
|
//static const float GammaTabScale = (float)GAMMA_TAB_SIZE; |
|
|
|
|
// |
|
|
|
|
//static ushort sRGBGammaTab_b[256], linearGammaTab_b[256]; |
|
|
|
|
//static unsigned short sRGBGammaTab_b[256], linearGammaTab_b[256]; |
|
|
|
|
//#undef lab_shift |
|
|
|
|
//#define lab_shift xyz_shift |
|
|
|
|
//#define gamma_shift 3 |
|
|
|
|
//#define lab_shift2 (lab_shift + gamma_shift) |
|
|
|
|
//#define LAB_CBRT_TAB_SIZE_B (256*3/2*(1<<gamma_shift)) |
|
|
|
|
//static ushort LabCbrtTab_b[LAB_CBRT_TAB_SIZE_B]; |
|
|
|
|
//static unsigned short LabCbrtTab_b[LAB_CBRT_TAB_SIZE_B]; |
|
|
|
|
// |
|
|
|
|
//static void initLabTabs() |
|
|
|
|
//{ |
|
|
|
@ -1507,14 +1507,14 @@ namespace cv { namespace gpu { namespace impl |
|
|
|
|
// for(i = 0; i < 256; i++) |
|
|
|
|
// { |
|
|
|
|
// float x = i*(1.f/255.f); |
|
|
|
|
// sRGBGammaTab_b[i] = saturate_cast<ushort>(255.f*(1 << gamma_shift)*(x <= 0.04045f ? x*(1.f/12.92f) : (float)pow((double)(x + 0.055)*(1./1.055), 2.4))); |
|
|
|
|
// linearGammaTab_b[i] = (ushort)(i*(1 << gamma_shift)); |
|
|
|
|
// sRGBGammaTab_b[i] = saturate_cast<unsigned short>(255.f*(1 << gamma_shift)*(x <= 0.04045f ? x*(1.f/12.92f) : (float)pow((double)(x + 0.055)*(1./1.055), 2.4))); |
|
|
|
|
// linearGammaTab_b[i] = (unsigned short)(i*(1 << gamma_shift)); |
|
|
|
|
// } |
|
|
|
|
// |
|
|
|
|
// for(i = 0; i < LAB_CBRT_TAB_SIZE_B; i++) |
|
|
|
|
// { |
|
|
|
|
// float x = i*(1.f/(255.f*(1 << gamma_shift))); |
|
|
|
|
// LabCbrtTab_b[i] = saturate_cast<ushort>((1 << lab_shift2)*(x < 0.008856f ? x*7.787f + 0.13793103448275862f : cvCbrt(x))); |
|
|
|
|
// LabCbrtTab_b[i] = saturate_cast<unsigned short>((1 << lab_shift2)*(x < 0.008856f ? x*7.787f + 0.13793103448275862f : cvCbrt(x))); |
|
|
|
|
// } |
|
|
|
|
// initialized = true; |
|
|
|
|
// } |
|
|
|
@ -1554,7 +1554,7 @@ namespace cv { namespace gpu { namespace impl |
|
|
|
|
// { |
|
|
|
|
// const int Lscale = (116*255+50)/100; |
|
|
|
|
// const int Lshift = -((16*255*(1 << lab_shift2) + 50)/100); |
|
|
|
|
// const ushort* tab = srgb ? sRGBGammaTab_b : linearGammaTab_b; |
|
|
|
|
// const unsigned short* tab = srgb ? sRGBGammaTab_b : linearGammaTab_b; |
|
|
|
|
// int i, scn = srccn; |
|
|
|
|
// int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], |
|
|
|
|
// C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], |
|
|
|
@ -2103,8 +2103,8 @@ namespace cv { namespace gpu { namespace impl |
|
|
|
|
// const int brows = 3, bcn = 7; |
|
|
|
|
// int N = size.width, N2 = N*2, N3 = N*3, N4 = N*4, N5 = N*5, N6 = N*6, N7 = N*7; |
|
|
|
|
// int i, bufstep = N7*bcn; |
|
|
|
|
// cv::AutoBuffer<ushort> _buf(bufstep*brows); |
|
|
|
|
// ushort* buf = (ushort*)_buf; |
|
|
|
|
// cv::AutoBuffer<unsigned short> _buf(bufstep*brows); |
|
|
|
|
// unsigned short* buf = (unsigned short*)_buf; |
|
|
|
|
// |
|
|
|
|
// bayer += bstep*2; |
|
|
|
|
// |
|
|
|
@ -2120,7 +2120,7 @@ namespace cv { namespace gpu { namespace impl |
|
|
|
|
// |
|
|
|
|
// for( int dy = (y == 2 ? -1 : 1); dy <= 1; dy++ ) |
|
|
|
|
// { |
|
|
|
|
// ushort* brow = buf + ((y + dy - 1)%brows)*bufstep + 1; |
|
|
|
|
// unsigned short* brow = buf + ((y + dy - 1)%brows)*bufstep + 1; |
|
|
|
|
// srow = bayer + (y+dy)*bstep + 1; |
|
|
|
|
// |
|
|
|
|
// for( i = 0; i < bcn; i++ ) |
|
|
|
@ -2179,25 +2179,25 @@ namespace cv { namespace gpu { namespace impl |
|
|
|
|
// |
|
|
|
|
// for( ; i < N-1; i++, srow++, brow++ ) |
|
|
|
|
// { |
|
|
|
|
// brow[0] = (ushort)(std::abs(srow[-1-bstep] - srow[-1+bstep]) + |
|
|
|
|
// brow[0] = (unsigned short)(std::abs(srow[-1-bstep] - srow[-1+bstep]) + |
|
|
|
|
// std::abs(srow[-bstep] - srow[+bstep])*2 + |
|
|
|
|
// std::abs(srow[1-bstep] - srow[1+bstep])); |
|
|
|
|
// brow[N] = (ushort)(std::abs(srow[-1-bstep] - srow[1-bstep]) + |
|
|
|
|
// brow[N] = (unsigned short)(std::abs(srow[-1-bstep] - srow[1-bstep]) + |
|
|
|
|
// std::abs(srow[-1] - srow[1])*2 + |
|
|
|
|
// std::abs(srow[-1+bstep] - srow[1+bstep])); |
|
|
|
|
// brow[N2] = (ushort)(std::abs(srow[+1-bstep] - srow[-1+bstep])*2); |
|
|
|
|
// brow[N3] = (ushort)(std::abs(srow[-1-bstep] - srow[1+bstep])*2); |
|
|
|
|
// brow[N4] = (ushort)(brow[N2] + std::abs(srow[-bstep] - srow[-1]) + |
|
|
|
|
// brow[N2] = (unsigned short)(std::abs(srow[+1-bstep] - srow[-1+bstep])*2); |
|
|
|
|
// brow[N3] = (unsigned short)(std::abs(srow[-1-bstep] - srow[1+bstep])*2); |
|
|
|
|
// brow[N4] = (unsigned short)(brow[N2] + std::abs(srow[-bstep] - srow[-1]) + |
|
|
|
|
// std::abs(srow[+bstep] - srow[1])); |
|
|
|
|
// brow[N5] = (ushort)(brow[N3] + std::abs(srow[-bstep] - srow[1]) + |
|
|
|
|
// brow[N5] = (unsigned short)(brow[N3] + std::abs(srow[-bstep] - srow[1]) + |
|
|
|
|
// std::abs(srow[+bstep] - srow[-1])); |
|
|
|
|
// brow[N6] = (ushort)((srow[-bstep] + srow[-1] + srow[1] + srow[+bstep])>>1); |
|
|
|
|
// brow[N6] = (unsigned short)((srow[-bstep] + srow[-1] + srow[1] + srow[+bstep])>>1); |
|
|
|
|
// } |
|
|
|
|
// } |
|
|
|
|
// |
|
|
|
|
// const ushort* brow0 = buf + ((y - 2) % brows)*bufstep + 2; |
|
|
|
|
// const ushort* brow1 = buf + ((y - 1) % brows)*bufstep + 2; |
|
|
|
|
// const ushort* brow2 = buf + (y % brows)*bufstep + 2; |
|
|
|
|
// const unsigned short* brow0 = buf + ((y - 2) % brows)*bufstep + 2; |
|
|
|
|
// const unsigned short* brow1 = buf + ((y - 1) % brows)*bufstep + 2; |
|
|
|
|
// const unsigned short* brow2 = buf + (y % brows)*bufstep + 2; |
|
|
|
|
// static const float scale[] = { 0.f, 0.5f, 0.25f, 0.1666666666667f, 0.125f, 0.1f, 0.08333333333f, 0.0714286f, 0.0625f }; |
|
|
|
|
// srow = bayer + y*bstep + 2; |
|
|
|
|
// bool greenCell = greenCell0; |
|
|
|
@ -2380,8 +2380,7 @@ namespace cv { namespace gpu { namespace impl |
|
|
|
|
// z = _mm_setzero_si128(); |
|
|
|
|
// __m128 _0_5 = _mm_set1_ps(0.5f); |
|
|
|
|
// |
|
|
|
|
// #define _mm_merge_epi16(a, b) \ |
|
|
|
|
// _mm_or_si128(_mm_and_si128(a, emask), _mm_and_si128(b, omask)) |
|
|
|
|
// #define _mm_merge_epi16(a, b) _mm_or_si128(_mm_and_si128(a, emask), _mm_and_si128(b, omask)) |
|
|
|
|
// #define _mm_cvtloepi16_ps(a) _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(a,a), 16)) |
|
|
|
|
// #define _mm_cvthiepi16_ps(a) _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(a,a), 16)) |
|
|
|
|
// |
|
|
|
|