From 5aa9ac9a7788de69726bf696cc6e3ffd14f7d895 Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Mon, 6 Oct 2014 19:21:57 +0400 Subject: [PATCH] Added OCL code for YUV422 -> RGB[A]|BGR[A] color conversion --- modules/imgproc/src/color.cpp | 32 ++++++-- modules/imgproc/src/opencl/cvtcolor.cl | 105 +++++++++++++++++------- modules/imgproc/test/ocl/test_color.cpp | 43 ++++++++++ 3 files changed, 146 insertions(+), 34 deletions(-) diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 6cca73652f..f363189579 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -4848,7 +4848,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) bool ok = false; UMat src = _src.getUMat(), dst; Size sz = src.size(), dstSz = sz; - int scn = src.channels(), depth = src.depth(), bidx, uidx; + int scn = src.channels(), depth = src.depth(), bidx, uidx, yidx; int dims = 2, stripeSize = 1; ocl::Kernel k; @@ -4967,14 +4967,14 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); dcn = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2RGBA_NV12 || code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2RGBA_NV21 ? 4 : 3; - bidx = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2BGR_NV12 || + bidx = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2BGR_NV12 || code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2BGR_NV21 ? 0 : 2; uidx = code == COLOR_YUV2RGBA_NV21 || code == COLOR_YUV2RGB_NV21 || code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2BGR_NV21 ? 1 : 0; dstSz = Size(sz.width, sz.height * 2 / 3); globalsize[0] = dstSz.width / 2; globalsize[1] = (dstSz.height/2 + pxPerWIy - 1) / pxPerWIy; - k.create("YUV2RGB_NV", ocl::imgproc::cvtcolor_oclsrc, + k.create("YUV2RGB_NVx", ocl::imgproc::cvtcolor_oclsrc, opts + format("-D dcn=%d -D bidx=%d -D uidx=%d", dcn, bidx, uidx)); break; } @@ -4985,7 +4985,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); dcn = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2RGBA_YV12 || code == COLOR_YUV2BGRA_IYUV || code == COLOR_YUV2RGBA_IYUV ? 4 : 3; - bidx = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2BGR_YV12 || + bidx = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2BGR_YV12 || code == COLOR_YUV2BGRA_IYUV || code == COLOR_YUV2BGR_IYUV ? 0 : 2; uidx = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2BGR_YV12 || code == COLOR_YUV2RGBA_YV12 || code == COLOR_YUV2RGB_YV12 ? 1 : 0; @@ -5015,7 +5015,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) case COLOR_RGB2YUV_IYUV: case COLOR_BGR2YUV_IYUV: case COLOR_RGBA2YUV_IYUV: case COLOR_BGRA2YUV_IYUV: { if (dcn <= 0) dcn = 1; - bidx = code == COLOR_BGRA2YUV_YV12 || code == COLOR_BGR2YUV_YV12 || + bidx = code == COLOR_BGRA2YUV_YV12 || code == COLOR_BGR2YUV_YV12 || code == COLOR_BGRA2YUV_IYUV || code == COLOR_BGR2YUV_IYUV ? 0 : 2; uidx = code == COLOR_RGBA2YUV_YV12 || code == COLOR_RGB2YUV_YV12 || code == COLOR_BGRA2YUV_YV12 || code == COLOR_BGR2YUV_YV12 ? 1 : 0; @@ -5030,6 +5030,28 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) opts + format("-D dcn=%d -D bidx=%d -D uidx=%d", dcn, bidx, uidx)); break; } + case COLOR_YUV2RGB_UYVY: case COLOR_YUV2BGR_UYVY: case COLOR_YUV2RGBA_UYVY: case COLOR_YUV2BGRA_UYVY: + case COLOR_YUV2RGB_YUY2: case COLOR_YUV2BGR_YUY2: case COLOR_YUV2RGB_YVYU: case COLOR_YUV2BGR_YVYU: + case COLOR_YUV2RGBA_YUY2: case COLOR_YUV2BGRA_YUY2: case COLOR_YUV2RGBA_YVYU: case COLOR_YUV2BGRA_YVYU: + { + if (dcn <= 0) + dcn = (code==COLOR_YUV2RGBA_UYVY || code==COLOR_YUV2BGRA_UYVY || code==COLOR_YUV2RGBA_YUY2 || + code==COLOR_YUV2BGRA_YUY2 || code==COLOR_YUV2RGBA_YVYU || code==COLOR_YUV2BGRA_YVYU) ? 4 : 3; + + bidx = (code==COLOR_YUV2BGR_UYVY || code==COLOR_YUV2BGRA_UYVY || code==COLOR_YUV2BGRA_YUY2 || + code==COLOR_YUV2BGR_YUY2 || code==COLOR_YUV2BGRA_YVYU || code==COLOR_YUV2BGR_YVYU) ? 0 : 2; + yidx = (code==COLOR_YUV2RGB_UYVY || code==COLOR_YUV2RGBA_UYVY || code==COLOR_YUV2BGR_UYVY || code==COLOR_YUV2BGRA_UYVY) ? 1 : 0; + uidx = (code==COLOR_YUV2RGB_YVYU || code==COLOR_YUV2RGBA_YVYU || + code==COLOR_YUV2BGR_YVYU || code==COLOR_YUV2BGRA_YVYU) ? 2 : 0; + uidx = 1 - yidx + uidx; + + CV_Assert( dcn == 3 || dcn == 4 ); + CV_Assert( scn == 2 && depth == CV_8U ); + + k.create("YUV2RGB_422", ocl::imgproc::cvtcolor_oclsrc, + opts + format("-D dcn=%d -D bidx=%d -D uidx=%d -D yidx=%d", dcn, bidx, uidx, yidx)); + break; + } case COLOR_BGR2YCrCb: case COLOR_RGB2YCrCb: { diff --git a/modules/imgproc/src/opencl/cvtcolor.cl b/modules/imgproc/src/opencl/cvtcolor.cl index f57c0c068c..cf7c06ee68 100644 --- a/modules/imgproc/src/opencl/cvtcolor.cl +++ b/modules/imgproc/src/opencl/cvtcolor.cl @@ -300,12 +300,12 @@ __kernel void YUV2RGB(__global const uchar* srcptr, int src_step, int src_offset __constant int ITUR_BT_601_CY = 1220542; __constant int ITUR_BT_601_CUB = 2116026; -__constant int ITUR_BT_601_CUG = 409993; -__constant int ITUR_BT_601_CVG = 852492; +__constant int ITUR_BT_601_CUG = -409993; +__constant int ITUR_BT_601_CVG = -852492; __constant int ITUR_BT_601_CVR = 1673527; __constant int ITUR_BT_601_SHIFT = 20; -__kernel void YUV2RGB_NV(__global const uchar* srcptr, int src_step, int src_offset, +__kernel void YUV2RGB_NVx(__global const uchar* srcptr, int src_step, int src_offset, __global uchar* dstptr, int dst_step, int dt_offset, int rows, int cols) { @@ -329,41 +329,41 @@ __kernel void YUV2RGB_NV(__global const uchar* srcptr, int src_step, int src_off int Y3 = ysrc[src_step]; int Y4 = ysrc[src_step + 1]; - int U = ((int)usrc[uidx]) - 128; - int V = ((int)usrc[1-uidx]) - 128; + int U = ((int)usrc[uidx]) - HALF_MAX; + int V = ((int)usrc[1-uidx]) - HALF_MAX; - int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * V; - int guv = (1 << (ITUR_BT_601_SHIFT - 1)) - ITUR_BT_601_CVG * V - ITUR_BT_601_CUG * U; - int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * U; + int ruv = mad24(ITUR_BT_601_CVR, V, (1 << (ITUR_BT_601_SHIFT - 1))); + int guv = mad24(ITUR_BT_601_CVG, V, mad24(ITUR_BT_601_CUG, U, (1 << (ITUR_BT_601_SHIFT - 1)))); + int buv = mad24(ITUR_BT_601_CUB, U, (1 << (ITUR_BT_601_SHIFT - 1))); - Y1 = max(0, Y1 - 16) * ITUR_BT_601_CY; - dst1[2 - bidx] = convert_uchar_sat((Y1 + ruv) >> ITUR_BT_601_SHIFT); + Y1 = mul24(max(0, Y1 - 16), ITUR_BT_601_CY); + dst1[2 - bidx] = convert_uchar_sat((Y1 + ruv) >> ITUR_BT_601_SHIFT); dst1[1] = convert_uchar_sat((Y1 + guv) >> ITUR_BT_601_SHIFT); - dst1[bidx] = convert_uchar_sat((Y1 + buv) >> ITUR_BT_601_SHIFT); + dst1[bidx] = convert_uchar_sat((Y1 + buv) >> ITUR_BT_601_SHIFT); #if dcn == 4 dst1[3] = 255; #endif - Y2 = max(0, Y2 - 16) * ITUR_BT_601_CY; + Y2 = mul24(max(0, Y2 - 16), ITUR_BT_601_CY); dst1[dcn + 2 - bidx] = convert_uchar_sat((Y2 + ruv) >> ITUR_BT_601_SHIFT); dst1[dcn + 1] = convert_uchar_sat((Y2 + guv) >> ITUR_BT_601_SHIFT); - dst1[dcn + bidx] = convert_uchar_sat((Y2 + buv) >> ITUR_BT_601_SHIFT); + dst1[dcn + bidx] = convert_uchar_sat((Y2 + buv) >> ITUR_BT_601_SHIFT); #if dcn == 4 dst1[7] = 255; #endif - Y3 = max(0, Y3 - 16) * ITUR_BT_601_CY; - dst2[2 - bidx] = convert_uchar_sat((Y3 + ruv) >> ITUR_BT_601_SHIFT); + Y3 = mul24(max(0, Y3 - 16), ITUR_BT_601_CY); + dst2[2 - bidx] = convert_uchar_sat((Y3 + ruv) >> ITUR_BT_601_SHIFT); dst2[1] = convert_uchar_sat((Y3 + guv) >> ITUR_BT_601_SHIFT); - dst2[bidx] = convert_uchar_sat((Y3 + buv) >> ITUR_BT_601_SHIFT); + dst2[bidx] = convert_uchar_sat((Y3 + buv) >> ITUR_BT_601_SHIFT); #if dcn == 4 dst2[3] = 255; #endif - Y4 = max(0, Y4 - 16) * ITUR_BT_601_CY; + Y4 = mul24(max(0, Y4 - 16), ITUR_BT_601_CY); dst2[dcn + 2 - bidx] = convert_uchar_sat((Y4 + ruv) >> ITUR_BT_601_SHIFT); dst2[dcn + 1] = convert_uchar_sat((Y4 + guv) >> ITUR_BT_601_SHIFT); - dst2[dcn + bidx] = convert_uchar_sat((Y4 + buv) >> ITUR_BT_601_SHIFT); + dst2[dcn + bidx] = convert_uchar_sat((Y4 + buv) >> ITUR_BT_601_SHIFT); #if dcn == 4 dst2[7] = 255; #endif @@ -399,21 +399,21 @@ __kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int #ifdef SRC_CONT __global const uchar* uvsrc = srcptr + mad24(rows, src_step, src_offset); int u_ind = mad24(y, cols >> 1, x); - int uv[2] = { ((int)uvsrc[u_ind]) - 128, ((int)uvsrc[u_ind + ((rows * cols) >> 2)]) - 128 }; + int uv[2] = { ((int)uvsrc[u_ind]) - HALF_MAX, ((int)uvsrc[u_ind + ((rows * cols) >> 2)]) - HALF_MAX }; #else int vsteps[2] = { cols >> 1, src_step - (cols >> 1)}; __global const uchar* usrc = srcptr + mad24(rows + (y>>1), src_step, src_offset + (y%2)*(cols >> 1) + x); __global const uchar* vsrc = usrc + mad24(rows >> 2, src_step, rows % 4 ? vsteps[y%2] : 0); - int uv[2] = { ((int)usrc[0]) - 128, ((int)vsrc[0]) - 128 }; + int uv[2] = { ((int)usrc[0]) - HALF_MAX, ((int)vsrc[0]) - HALF_MAX }; #endif - int u = uv[uidx]; - int v = uv[1-uidx]; + int U = uv[uidx]; + int V = uv[1-uidx]; - int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v; - int guv = (1 << (ITUR_BT_601_SHIFT - 1)) - ITUR_BT_601_CVG * v - ITUR_BT_601_CUG * u; - int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u; + int ruv = mad24(ITUR_BT_601_CVR, V, (1 << (ITUR_BT_601_SHIFT - 1))); + int guv = mad24(ITUR_BT_601_CVG, V, mad24(ITUR_BT_601_CUG, U, (1 << (ITUR_BT_601_SHIFT - 1)))); + int buv = mad24(ITUR_BT_601_CUB, U, (1 << (ITUR_BT_601_SHIFT - 1))); - Y1 = max(0, Y1 - 16) * ITUR_BT_601_CY; + Y1 = mul24(max(0, Y1 - 16), ITUR_BT_601_CY); dst1[2 - bidx] = convert_uchar_sat((Y1 + ruv) >> ITUR_BT_601_SHIFT); dst1[1] = convert_uchar_sat((Y1 + guv) >> ITUR_BT_601_SHIFT); dst1[bidx] = convert_uchar_sat((Y1 + buv) >> ITUR_BT_601_SHIFT); @@ -421,7 +421,7 @@ __kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int dst1[3] = 255; #endif - Y2 = max(0, Y2 - 16) * ITUR_BT_601_CY; + Y2 = mul24(max(0, Y2 - 16), ITUR_BT_601_CY); dst1[dcn + 2 - bidx] = convert_uchar_sat((Y2 + ruv) >> ITUR_BT_601_SHIFT); dst1[dcn + 1] = convert_uchar_sat((Y2 + guv) >> ITUR_BT_601_SHIFT); dst1[dcn + bidx] = convert_uchar_sat((Y2 + buv) >> ITUR_BT_601_SHIFT); @@ -429,7 +429,7 @@ __kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int dst1[7] = 255; #endif - Y3 = max(0, Y3 - 16) * ITUR_BT_601_CY; + Y3 = mul24(max(0, Y3 - 16), ITUR_BT_601_CY); dst2[2 - bidx] = convert_uchar_sat((Y3 + ruv) >> ITUR_BT_601_SHIFT); dst2[1] = convert_uchar_sat((Y3 + guv) >> ITUR_BT_601_SHIFT); dst2[bidx] = convert_uchar_sat((Y3 + buv) >> ITUR_BT_601_SHIFT); @@ -437,7 +437,7 @@ __kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int dst2[3] = 255; #endif - Y4 = max(0, Y4 - 16) * ITUR_BT_601_CY; + Y4 = mul24(max(0, Y4 - 16), ITUR_BT_601_CY); dst2[dcn + 2 - bidx] = convert_uchar_sat((Y4 + ruv) >> ITUR_BT_601_SHIFT); dst2[dcn + 1] = convert_uchar_sat((Y4 + guv) >> ITUR_BT_601_SHIFT); dst2[dcn + bidx] = convert_uchar_sat((Y4 + buv) >> ITUR_BT_601_SHIFT); @@ -517,6 +517,53 @@ __kernel void RGB2YUV_YV12_IYUV(__global const uchar* srcptr, int src_step, int } } +__kernel void YUV2RGB_422(__global const uchar* srcptr, int src_step, int src_offset, + __global uchar* dstptr, int dst_step, int dst_offset, + int rows, int cols) +{ + int x = get_global_id(0); + int y = get_global_id(1) * PIX_PER_WI_Y; + + if (x < cols / 2) + { + __global const uchar* src = srcptr + mad24(y, src_step, (x << 2) + src_offset); + __global uchar* dst = dstptr + mad24(y, dst_step, mad24(x << 1, dcn, dst_offset)); + + #pragma unroll + for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) + { + if (y < rows ) + { + int U = ((int) src[uidx]) - HALF_MAX; + int V = ((int) src[(2 + uidx) % 4]) - HALF_MAX; + + int ruv = mad24(ITUR_BT_601_CVR, V, (1 << (ITUR_BT_601_SHIFT - 1))); + int guv = mad24(ITUR_BT_601_CVG, V, mad24(ITUR_BT_601_CUG, U, (1 << (ITUR_BT_601_SHIFT - 1)))); + int buv = mad24(ITUR_BT_601_CUB, U, (1 << (ITUR_BT_601_SHIFT - 1))); + + int y00 = max(0, ((int) src[yidx]) - 16) * ITUR_BT_601_CY; + dst[2 - bidx] = convert_uchar_sat((y00 + ruv) >> ITUR_BT_601_SHIFT); + dst[1] = convert_uchar_sat((y00 + guv) >> ITUR_BT_601_SHIFT); + dst[bidx] = convert_uchar_sat((y00 + buv) >> ITUR_BT_601_SHIFT); +#if dcn == 4 + dst[3] = 255; +#endif + + int y01 = max(0, ((int) src[yidx + 2]) - 16) * ITUR_BT_601_CY; + dst[dcn + 2 - bidx] = convert_uchar_sat((y01 + ruv) >> ITUR_BT_601_SHIFT); + dst[dcn + 1] = convert_uchar_sat((y01 + guv) >> ITUR_BT_601_SHIFT); + dst[dcn + bidx] = convert_uchar_sat((y01 + buv) >> ITUR_BT_601_SHIFT); +#if dcn == 4 + dst[7] = 255; +#endif + } + ++y; + src += src_step; + dst += dst_step; + } + } +} + ///////////////////////////////////// RGB <-> YCrCb ////////////////////////////////////// __constant float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f}; diff --git a/modules/imgproc/test/ocl/test_color.cpp b/modules/imgproc/test/ocl/test_color.cpp index 1484db9720..89affcfac7 100644 --- a/modules/imgproc/test/ocl/test_color.cpp +++ b/modules/imgproc/test/ocl/test_color.cpp @@ -396,6 +396,44 @@ OCL_TEST_P(CvtColor_RGB2YUV_420, BGRA2YUV_IYUV) { performTest(4, 1, CVTCODE(BGRA OCL_TEST_P(CvtColor_RGB2YUV_420, RGB2YUV_IYUV) { performTest(3, 1, CVTCODE(RGB2YUV_IYUV)); } OCL_TEST_P(CvtColor_RGB2YUV_420, BGR2YUV_IYUV) { performTest(3, 1, CVTCODE(BGR2YUV_IYUV)); } +// YUV422 -> RGBA + +struct CvtColor_YUV2RGB_422 : + public CvtColor +{ + void generateTestData(int channelsIn, int channelsOut) + { + const int srcType = CV_MAKE_TYPE(depth, channelsIn); + const int dstType = CV_MAKE_TYPE(depth, channelsOut); + + Size roiSize = randomSize(1, MAX_VALUE); + roiSize.width *= 2; + + Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(src, src_roi, roiSize, srcBorder, srcType, 2, 100); + + Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(dst, dst_roi, roiSize, dstBorder, dstType, 5, 16); + + UMAT_UPLOAD_INPUT_PARAMETER(src); + UMAT_UPLOAD_OUTPUT_PARAMETER(dst); + } +}; + +OCL_TEST_P(CvtColor_YUV2RGB_422, YUV2RGB_UYVY) { performTest(2, 3, CVTCODE(YUV2RGB_UYVY)); } +OCL_TEST_P(CvtColor_YUV2RGB_422, YUV2BGR_UYVY) { performTest(2, 3, CVTCODE(YUV2BGR_UYVY)); } +OCL_TEST_P(CvtColor_YUV2RGB_422, YUV2RGBA_UYVY) { performTest(2, 4, CVTCODE(YUV2RGBA_UYVY)); } +OCL_TEST_P(CvtColor_YUV2RGB_422, YUV2BGRA_UYVY) { performTest(2, 4, CVTCODE(YUV2BGRA_UYVY)); } +OCL_TEST_P(CvtColor_YUV2RGB_422, YUV2RGB_YUY2) { performTest(2, 3, CVTCODE(YUV2RGB_YUY2)); } +OCL_TEST_P(CvtColor_YUV2RGB_422, YUV2BGR_YUY2) { performTest(2, 3, CVTCODE(YUV2BGR_YUY2)); } +OCL_TEST_P(CvtColor_YUV2RGB_422, YUV2RGBA_YUY2) { performTest(2, 4, CVTCODE(YUV2RGBA_YUY2)); } +OCL_TEST_P(CvtColor_YUV2RGB_422, YUV2BGRA_YUY2) { performTest(2, 4, CVTCODE(YUV2BGRA_YUY2)); } +OCL_TEST_P(CvtColor_YUV2RGB_422, YUV2RGB_YVYU) { performTest(2, 3, CVTCODE(YUV2RGB_YVYU)); } +OCL_TEST_P(CvtColor_YUV2RGB_422, YUV2BGR_YVYU) { performTest(2, 3, CVTCODE(YUV2BGR_YVYU)); } +OCL_TEST_P(CvtColor_YUV2RGB_422, YUV2RGBA_YVYU) { performTest(2, 4, CVTCODE(YUV2RGBA_YVYU)); } +OCL_TEST_P(CvtColor_YUV2RGB_422, YUV2BGRA_YVYU) { performTest(2, 4, CVTCODE(YUV2BGRA_YVYU)); } + + OCL_INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor8u, testing::Combine(testing::Values(MatDepth(CV_8U)), Bool())); @@ -417,6 +455,11 @@ OCL_INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor_RGB2YUV_420, testing::Values(MatDepth(CV_8U)), Bool())); +OCL_INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor_YUV2RGB_422, + testing::Combine( + testing::Values(MatDepth(CV_8U)), + Bool())); + } } // namespace cvtest::ocl #endif