From 85b60ee3cb109fc35a5f5f9b1c7bd74f792ff93a Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Wed, 1 Oct 2014 18:08:07 +0400 Subject: [PATCH 1/8] Added support for YUV2RGB[A]_NV21 and YUV2BGR[A]_NV21 conversion --- modules/imgproc/src/color.cpp | 18 +++++++++++------- modules/imgproc/src/opencl/cvtcolor.cl | 10 +++++++--- modules/imgproc/test/ocl/test_color.cpp | 6 +++++- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 21b0b25f33..c7743cb93f 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -4848,7 +4848,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) bool ok = false; UMat src = _src.getUMat(), dst; Size sz = src.size(), dstSz = sz; - int scn = src.channels(), depth = src.depth(), bidx; + int scn = src.channels(), depth = src.depth(), bidx, uidx; int dims = 2, stripeSize = 1; ocl::Kernel k; @@ -4960,17 +4960,21 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) opts + format("-D dcn=%d -D bidx=%d", dcn, bidx)); break; } - case COLOR_YUV2RGB_NV12: case COLOR_YUV2BGR_NV12: - case COLOR_YUV2RGBA_NV12: case COLOR_YUV2BGRA_NV12: + case COLOR_YUV2RGB_NV12: case COLOR_YUV2BGR_NV12: case COLOR_YUV2RGB_NV21: case COLOR_YUV2BGR_NV21: + case COLOR_YUV2RGBA_NV12: case COLOR_YUV2BGRA_NV12: case COLOR_YUV2RGBA_NV21: case COLOR_YUV2BGRA_NV21: { CV_Assert( scn == 1 ); CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); - dcn = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2RGBA_NV12 ? 4 : 3; - bidx = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2BGR_NV12 ? 0 : 2; + dcn = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2RGBA_NV12 || + code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2RGBA_NV21 ? 4 : 3; + bidx = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2BGR_NV12 || + code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2BGR_NV21 ? 0 : 2; + uidx = code == COLOR_YUV2RGBA_NV21 || code == COLOR_YUV2RGB_NV21 || + code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2BGR_NV21 ? 1 : 0; dstSz = Size(sz.width, sz.height * 2 / 3); - k.create("YUV2RGB_NV12", ocl::imgproc::cvtcolor_oclsrc, - opts + format("-D dcn=%d -D bidx=%d", dcn, bidx)); + k.create("YUV2RGB_NVx", ocl::imgproc::cvtcolor_oclsrc, + opts + format("-D dcn=%d -D bidx=%d -D uidx=%d", dcn, bidx, uidx)); break; } case COLOR_BGR2YCrCb: diff --git a/modules/imgproc/src/opencl/cvtcolor.cl b/modules/imgproc/src/opencl/cvtcolor.cl index 365f470c11..b647f80045 100644 --- a/modules/imgproc/src/opencl/cvtcolor.cl +++ b/modules/imgproc/src/opencl/cvtcolor.cl @@ -111,6 +111,10 @@ enum #define B_COMP w #endif +#ifndef uidx +#define uidx 0 +#endif + #define __CAT(x, y) x##y #define CAT(x, y) __CAT(x, y) @@ -297,7 +301,7 @@ __constant int ITUR_BT_601_CVG = 852492; __constant int ITUR_BT_601_CVR = 1673527; __constant int ITUR_BT_601_SHIFT = 20; -__kernel void YUV2RGB_NV12(__global const uchar* srcptr, int src_step, int src_offset, +__kernel void YUV2RGB_NVx(__global const uchar* srcptr, int src_step, int src_offset, __global uchar* dstptr, int dst_step, int dt_offset, int rows, int cols) { @@ -321,8 +325,8 @@ __kernel void YUV2RGB_NV12(__global const uchar* srcptr, int src_step, int src_o int Y3 = ysrc[src_step]; int Y4 = ysrc[src_step + 1]; - int U = usrc[0] - 128; - int V = usrc[1] - 128; + int U = usrc[uidx] - 128; + int V = usrc[1 - uidx] - 128; int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * V; int guv = (1 << (ITUR_BT_601_SHIFT - 1)) - ITUR_BT_601_CVG * V - ITUR_BT_601_CUG * U; diff --git a/modules/imgproc/test/ocl/test_color.cpp b/modules/imgproc/test/ocl/test_color.cpp index 818d6a85ab..2130eca20e 100644 --- a/modules/imgproc/test/ocl/test_color.cpp +++ b/modules/imgproc/test/ocl/test_color.cpp @@ -320,7 +320,7 @@ OCL_TEST_P(CvtColor8u32f, Luv2RGBA) { performTest(3, 4, CVTCODE(Luv2RGB), depth OCL_TEST_P(CvtColor8u32f, Luv2LBGRA) { performTest(3, 4, CVTCODE(Luv2LBGR), depth == CV_8U ? 1 : 1e-5); } OCL_TEST_P(CvtColor8u32f, Luv2LRGBA) { performTest(3, 4, CVTCODE(Luv2LRGB), depth == CV_8U ? 1 : 1e-5); } -// YUV -> RGBA_NV12 +// YUV -> RGBA_NVx struct CvtColor_YUV420 : public CvtColor @@ -348,6 +348,10 @@ OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_NV12) { performTest(1, 4, COLOR_YUV2RGBA_NV OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_NV12) { performTest(1, 4, COLOR_YUV2BGRA_NV12); } OCL_TEST_P(CvtColor_YUV420, YUV2RGB_NV12) { performTest(1, 3, COLOR_YUV2RGB_NV12); } OCL_TEST_P(CvtColor_YUV420, YUV2BGR_NV12) { performTest(1, 3, COLOR_YUV2BGR_NV12); } +OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_NV21) { performTest(1, 4, COLOR_YUV2RGBA_NV21); } +OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_NV21) { performTest(1, 4, COLOR_YUV2BGRA_NV21); } +OCL_TEST_P(CvtColor_YUV420, YUV2RGB_NV21) { performTest(1, 3, COLOR_YUV2RGB_NV21); } +OCL_TEST_P(CvtColor_YUV420, YUV2BGR_NV21) { performTest(1, 3, COLOR_YUV2BGR_NV21); } OCL_INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor8u, From 1cc17a7186017b0e6178f0a78ba6c7da820edfe8 Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Thu, 2 Oct 2014 18:10:14 +0400 Subject: [PATCH 2/8] Added OCL code for YUV2BGR_YV12 and YUV2BGR_IYUV color conversions --- modules/imgproc/src/color.cpp | 22 ++++++- modules/imgproc/src/opencl/cvtcolor.cl | 85 +++++++++++++++++++++++-- modules/imgproc/test/ocl/test_color.cpp | 10 ++- 3 files changed, 111 insertions(+), 6 deletions(-) diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index c7743cb93f..324b824ae0 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -4973,10 +4973,30 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2BGR_NV21 ? 1 : 0; dstSz = Size(sz.width, sz.height * 2 / 3); - k.create("YUV2RGB_NVx", ocl::imgproc::cvtcolor_oclsrc, + globalsize[0] = dstSz.width / 2; globalsize[1] = (dstSz.height/2 + pxPerWIy - 1) / pxPerWIy; + k.create("YUV2RGB_NV", ocl::imgproc::cvtcolor_oclsrc, opts + format("-D dcn=%d -D bidx=%d -D uidx=%d", dcn, bidx, uidx)); break; } + case COLOR_YUV2BGR_YV12: case COLOR_YUV2RGB_YV12: case COLOR_YUV2BGRA_YV12: case COLOR_YUV2RGBA_YV12: + case COLOR_YUV2BGR_IYUV: case COLOR_YUV2RGB_IYUV: case COLOR_YUV2BGRA_IYUV: case COLOR_YUV2RGBA_IYUV: + { + CV_Assert( scn == 1 ); + CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); + dcn = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2RGBA_YV12 || + code == COLOR_YUV2BGRA_IYUV || code == COLOR_YUV2RGBA_IYUV ? 4 : 3; + bidx = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2BGR_YV12 || + code == COLOR_YUV2BGRA_IYUV || code == COLOR_YUV2BGR_IYUV ? 0 : 2; + uidx = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2BGR_YV12 || + code == COLOR_YUV2RGBA_YV12 || code == COLOR_YUV2RGB_YV12 ? 1 : 0; + + dstSz = Size(sz.width, sz.height * 2 / 3); + globalsize[0] = dstSz.width / 2; globalsize[1] = (dstSz.height/2 + pxPerWIy - 1) / pxPerWIy; + k.create("YUV2RGB_YV12_IYUV", ocl::imgproc::cvtcolor_oclsrc, + opts + format("-D dcn=%d -D bidx=%d -D uidx=%d%s", dcn, bidx, uidx, + src.isContinuous() ? " -D SRC_CONT" : "")); + break; + } case COLOR_BGR2YCrCb: case COLOR_RGB2YCrCb: { diff --git a/modules/imgproc/src/opencl/cvtcolor.cl b/modules/imgproc/src/opencl/cvtcolor.cl index b647f80045..4ac516ae8c 100644 --- a/modules/imgproc/src/opencl/cvtcolor.cl +++ b/modules/imgproc/src/opencl/cvtcolor.cl @@ -301,7 +301,7 @@ __constant int ITUR_BT_601_CVG = 852492; __constant int ITUR_BT_601_CVR = 1673527; __constant int ITUR_BT_601_SHIFT = 20; -__kernel void YUV2RGB_NVx(__global const uchar* srcptr, int src_step, int src_offset, +__kernel void YUV2RGB_NV(__global const uchar* srcptr, int src_step, int src_offset, __global uchar* dstptr, int dst_step, int dt_offset, int rows, int cols) { @@ -318,15 +318,15 @@ __kernel void YUV2RGB_NVx(__global const uchar* srcptr, int src_step, int src_of __global const uchar* ysrc = srcptr + mad24(y << 1, src_step, (x << 1) + src_offset); __global const uchar* usrc = srcptr + mad24(rows + y, src_step, (x << 1) + src_offset); __global uchar* dst1 = dstptr + mad24(y << 1, dst_step, x * (dcn<<1) + dt_offset); - __global uchar* dst2 = dstptr + mad24((y << 1) + 1, dst_step, x * (dcn<<1) + dt_offset); + __global uchar* dst2 = dst1 + dst_step; int Y1 = ysrc[0]; int Y2 = ysrc[1]; int Y3 = ysrc[src_step]; int Y4 = ysrc[src_step + 1]; - int U = usrc[uidx] - 128; - int V = usrc[1 - uidx] - 128; + int U = ((int)usrc[uidx]) - 128; + int V = ((int)usrc[1-uidx]) - 128; int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * V; int guv = (1 << (ITUR_BT_601_SHIFT - 1)) - ITUR_BT_601_CVG * V - ITUR_BT_601_CUG * U; @@ -369,6 +369,83 @@ __kernel void YUV2RGB_NVx(__global const uchar* srcptr, int src_step, int src_of } } +__kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int src_offset, + __global uchar* dstptr, int dst_step, int dt_offset, + int rows, int cols) +{ + int x = get_global_id(0); + int y = get_global_id(1) * PIX_PER_WI_Y; + + if (x < cols / 2) + { + #pragma unroll + for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) + { + if (y < rows / 2 ) + { + __global const uchar* ysrc = srcptr + mad24(y << 1, src_step, (x << 1) + src_offset); + __global uchar* dst1 = dstptr + mad24(y << 1, dst_step, x * (dcn<<1) + dt_offset); + __global uchar* dst2 = dst1 + dst_step; + + int Y1 = ysrc[0]; + int Y2 = ysrc[1]; + int Y3 = ysrc[src_step]; + int Y4 = ysrc[src_step + 1]; + +#ifdef SRC_CONT + __global const uchar* uvsrc = srcptr + mad24(rows, src_step, src_offset); + int u_ind = mad24(y, cols >> 1, x); + int uv[2] = { ((int)uvsrc[u_ind]) - 128, ((int)uvsrc[u_ind + ((rows * cols) >> 2)]) - 128 }; +#else + int vsteps[2] = { cols >> 1, src_step - (cols >> 1)}; + __global const uchar* usrc = srcptr + mad24(rows + (y>>1), src_step, src_offset + (y%2)*(cols >> 1) + x); + __global const uchar* vsrc = usrc + mad24(rows >> 2, src_step, rows % 4 ? vsteps[y%2] : 0); + int uv[2] = { ((int)usrc[0]) - 128, ((int)vsrc[0]) - 128 }; +#endif + int u = uv[uidx]; + int v = uv[1-uidx]; + + int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v; + int guv = (1 << (ITUR_BT_601_SHIFT - 1)) - ITUR_BT_601_CVG * v - ITUR_BT_601_CUG * u; + int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u; + + Y1 = max(0, Y1 - 16) * ITUR_BT_601_CY; + dst1[2 - bidx] = convert_uchar_sat((Y1 + ruv) >> ITUR_BT_601_SHIFT); + dst1[1] = convert_uchar_sat((Y1 + guv) >> ITUR_BT_601_SHIFT); + dst1[bidx] = convert_uchar_sat((Y1 + buv) >> ITUR_BT_601_SHIFT); +#if dcn == 4 + dst1[3] = 255; +#endif + + Y2 = max(0, Y2 - 16) * ITUR_BT_601_CY; + dst1[dcn + 2 - bidx] = convert_uchar_sat((Y2 + ruv) >> ITUR_BT_601_SHIFT); + dst1[dcn + 1] = convert_uchar_sat((Y2 + guv) >> ITUR_BT_601_SHIFT); + dst1[dcn + bidx] = convert_uchar_sat((Y2 + buv) >> ITUR_BT_601_SHIFT); +#if dcn == 4 + dst1[7] = 255; +#endif + + Y3 = max(0, Y3 - 16) * ITUR_BT_601_CY; + dst2[2 - bidx] = convert_uchar_sat((Y3 + ruv) >> ITUR_BT_601_SHIFT); + dst2[1] = convert_uchar_sat((Y3 + guv) >> ITUR_BT_601_SHIFT); + dst2[bidx] = convert_uchar_sat((Y3 + buv) >> ITUR_BT_601_SHIFT); +#if dcn == 4 + dst2[3] = 255; +#endif + + Y4 = max(0, Y4 - 16) * ITUR_BT_601_CY; + dst2[dcn + 2 - bidx] = convert_uchar_sat((Y4 + ruv) >> ITUR_BT_601_SHIFT); + dst2[dcn + 1] = convert_uchar_sat((Y4 + guv) >> ITUR_BT_601_SHIFT); + dst2[dcn + bidx] = convert_uchar_sat((Y4 + buv) >> ITUR_BT_601_SHIFT); +#if dcn == 4 + dst2[7] = 255; +#endif + } + ++y; + } + } +} + ///////////////////////////////////// RGB <-> YCrCb ////////////////////////////////////// __constant float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f}; diff --git a/modules/imgproc/test/ocl/test_color.cpp b/modules/imgproc/test/ocl/test_color.cpp index 2130eca20e..2055326a90 100644 --- a/modules/imgproc/test/ocl/test_color.cpp +++ b/modules/imgproc/test/ocl/test_color.cpp @@ -320,7 +320,7 @@ OCL_TEST_P(CvtColor8u32f, Luv2RGBA) { performTest(3, 4, CVTCODE(Luv2RGB), depth OCL_TEST_P(CvtColor8u32f, Luv2LBGRA) { performTest(3, 4, CVTCODE(Luv2LBGR), depth == CV_8U ? 1 : 1e-5); } OCL_TEST_P(CvtColor8u32f, Luv2LRGBA) { performTest(3, 4, CVTCODE(Luv2LRGB), depth == CV_8U ? 1 : 1e-5); } -// YUV -> RGBA_NVx +// YUV420 -> RGBA struct CvtColor_YUV420 : public CvtColor @@ -352,6 +352,14 @@ OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_NV21) { performTest(1, 4, COLOR_YUV2RGBA_NV OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_NV21) { performTest(1, 4, COLOR_YUV2BGRA_NV21); } OCL_TEST_P(CvtColor_YUV420, YUV2RGB_NV21) { performTest(1, 3, COLOR_YUV2RGB_NV21); } OCL_TEST_P(CvtColor_YUV420, YUV2BGR_NV21) { performTest(1, 3, COLOR_YUV2BGR_NV21); } +OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_YV12) { performTest(1, 4, COLOR_YUV2RGBA_YV12); } +OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_YV12) { performTest(1, 4, COLOR_YUV2BGRA_YV12); } +OCL_TEST_P(CvtColor_YUV420, YUV2RGB_YV12) { performTest(1, 3, COLOR_YUV2RGB_YV12); } +OCL_TEST_P(CvtColor_YUV420, YUV2BGR_YV12) { performTest(1, 3, COLOR_YUV2BGR_YV12); } +OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_IYUV) { performTest(1, 4, COLOR_YUV2RGBA_IYUV); } +OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_IYUV) { performTest(1, 4, COLOR_YUV2BGRA_IYUV); } +OCL_TEST_P(CvtColor_YUV420, YUV2RGB_IYUV) { performTest(1, 3, COLOR_YUV2RGB_IYUV); } +OCL_TEST_P(CvtColor_YUV420, YUV2BGR_IYUV) { performTest(1, 3, COLOR_YUV2BGR_IYUV); } OCL_INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor8u, From 8c91604f5acd60b62e8e7d64f30ff58dc0c582e2 Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Fri, 3 Oct 2014 12:29:15 +0400 Subject: [PATCH 3/8] Added OCL code for YUV2GRAY_420 color conversion --- modules/imgproc/src/color.cpp | 14 +++++++++++ modules/imgproc/test/ocl/test_color.cpp | 33 +++++++++++++------------ 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 324b824ae0..a6cebe6bc0 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -4997,6 +4997,20 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) src.isContinuous() ? " -D SRC_CONT" : "")); break; } + case COLOR_YUV2GRAY_420: + { + if (dcn <= 0) dcn = 1; + + CV_Assert( dcn == 1 ); + CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); + + dstSz = Size(sz.width, sz.height * 2 / 3); + _dst.create(dstSz, CV_MAKETYPE(depth, dcn)); + dst = _dst.getUMat(); + + src.rowRange(0, dstSz.height).copyTo(dst); + return true; + } case COLOR_BGR2YCrCb: case COLOR_RGB2YCrCb: { diff --git a/modules/imgproc/test/ocl/test_color.cpp b/modules/imgproc/test/ocl/test_color.cpp index 2055326a90..43343469c5 100644 --- a/modules/imgproc/test/ocl/test_color.cpp +++ b/modules/imgproc/test/ocl/test_color.cpp @@ -344,22 +344,23 @@ struct CvtColor_YUV420 : } }; -OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_NV12) { performTest(1, 4, COLOR_YUV2RGBA_NV12); } -OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_NV12) { performTest(1, 4, COLOR_YUV2BGRA_NV12); } -OCL_TEST_P(CvtColor_YUV420, YUV2RGB_NV12) { performTest(1, 3, COLOR_YUV2RGB_NV12); } -OCL_TEST_P(CvtColor_YUV420, YUV2BGR_NV12) { performTest(1, 3, COLOR_YUV2BGR_NV12); } -OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_NV21) { performTest(1, 4, COLOR_YUV2RGBA_NV21); } -OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_NV21) { performTest(1, 4, COLOR_YUV2BGRA_NV21); } -OCL_TEST_P(CvtColor_YUV420, YUV2RGB_NV21) { performTest(1, 3, COLOR_YUV2RGB_NV21); } -OCL_TEST_P(CvtColor_YUV420, YUV2BGR_NV21) { performTest(1, 3, COLOR_YUV2BGR_NV21); } -OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_YV12) { performTest(1, 4, COLOR_YUV2RGBA_YV12); } -OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_YV12) { performTest(1, 4, COLOR_YUV2BGRA_YV12); } -OCL_TEST_P(CvtColor_YUV420, YUV2RGB_YV12) { performTest(1, 3, COLOR_YUV2RGB_YV12); } -OCL_TEST_P(CvtColor_YUV420, YUV2BGR_YV12) { performTest(1, 3, COLOR_YUV2BGR_YV12); } -OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_IYUV) { performTest(1, 4, COLOR_YUV2RGBA_IYUV); } -OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_IYUV) { performTest(1, 4, COLOR_YUV2BGRA_IYUV); } -OCL_TEST_P(CvtColor_YUV420, YUV2RGB_IYUV) { performTest(1, 3, COLOR_YUV2RGB_IYUV); } -OCL_TEST_P(CvtColor_YUV420, YUV2BGR_IYUV) { performTest(1, 3, COLOR_YUV2BGR_IYUV); } +OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_NV12) { performTest(1, 4, CVTCODE(YUV2RGBA_NV12)); } +OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_NV12) { performTest(1, 4, CVTCODE(YUV2BGRA_NV12)); } +OCL_TEST_P(CvtColor_YUV420, YUV2RGB_NV12) { performTest(1, 3, CVTCODE(YUV2RGB_NV12)); } +OCL_TEST_P(CvtColor_YUV420, YUV2BGR_NV12) { performTest(1, 3, CVTCODE(YUV2BGR_NV12)); } +OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_NV21) { performTest(1, 4, CVTCODE(YUV2RGBA_NV21)); } +OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_NV21) { performTest(1, 4, CVTCODE(YUV2BGRA_NV21)); } +OCL_TEST_P(CvtColor_YUV420, YUV2RGB_NV21) { performTest(1, 3, CVTCODE(YUV2RGB_NV21)); } +OCL_TEST_P(CvtColor_YUV420, YUV2BGR_NV21) { performTest(1, 3, CVTCODE(YUV2BGR_NV21)); } +OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_YV12) { performTest(1, 4, CVTCODE(YUV2RGBA_YV12)); } +OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_YV12) { performTest(1, 4, CVTCODE(YUV2BGRA_YV12)); } +OCL_TEST_P(CvtColor_YUV420, YUV2RGB_YV12) { performTest(1, 3, CVTCODE(YUV2RGB_YV12)); } +OCL_TEST_P(CvtColor_YUV420, YUV2BGR_YV12) { performTest(1, 3, CVTCODE(YUV2BGR_YV12)); } +OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_IYUV) { performTest(1, 4, CVTCODE(YUV2RGBA_IYUV)); } +OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_IYUV) { performTest(1, 4, CVTCODE(YUV2BGRA_IYUV)); } +OCL_TEST_P(CvtColor_YUV420, YUV2RGB_IYUV) { performTest(1, 3, CVTCODE(YUV2RGB_IYUV)); } +OCL_TEST_P(CvtColor_YUV420, YUV2BGR_IYUV) { performTest(1, 3, CVTCODE(YUV2BGR_IYUV)); } +OCL_TEST_P(CvtColor_YUV420, YUV2GRAY_420) { performTest(1, 1, CVTCODE(YUV2GRAY_420)); } OCL_INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor8u, From c8707b891b1cd7ca9f717ce95598cdab26865aa3 Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Mon, 6 Oct 2014 19:19:44 +0400 Subject: [PATCH 4/8] Added OCL code for RGB[A]|BGR[A] -> YUV_[YV12|IYUV] color conversion --- modules/imgproc/src/color.cpp | 19 +++++++ modules/imgproc/src/opencl/cvtcolor.cl | 73 +++++++++++++++++++++++- modules/imgproc/test/ocl/test_color.cpp | 76 ++++++++++++++++++------- 3 files changed, 148 insertions(+), 20 deletions(-) diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index a6cebe6bc0..6cca73652f 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -5011,6 +5011,25 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) src.rowRange(0, dstSz.height).copyTo(dst); return true; } + case COLOR_RGB2YUV_YV12: case COLOR_BGR2YUV_YV12: case COLOR_RGBA2YUV_YV12: case COLOR_BGRA2YUV_YV12: + case COLOR_RGB2YUV_IYUV: case COLOR_BGR2YUV_IYUV: case COLOR_RGBA2YUV_IYUV: case COLOR_BGRA2YUV_IYUV: + { + if (dcn <= 0) dcn = 1; + bidx = code == COLOR_BGRA2YUV_YV12 || code == COLOR_BGR2YUV_YV12 || + code == COLOR_BGRA2YUV_IYUV || code == COLOR_BGR2YUV_IYUV ? 0 : 2; + uidx = code == COLOR_RGBA2YUV_YV12 || code == COLOR_RGB2YUV_YV12 || + code == COLOR_BGRA2YUV_YV12 || code == COLOR_BGR2YUV_YV12 ? 1 : 0; + + CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U ); + CV_Assert( dcn == 1 ); + CV_Assert( sz.width % 2 == 0 && sz.height % 2 == 0 ); + + dstSz = Size(sz.width, sz.height / 2 * 3); + globalsize[0] = dstSz.width / 2; globalsize[1] = (dstSz.height/3 + pxPerWIy - 1) / pxPerWIy; + k.create("RGB2YUV_YV12_IYUV", ocl::imgproc::cvtcolor_oclsrc, + opts + format("-D dcn=%d -D bidx=%d -D uidx=%d", dcn, bidx, uidx)); + break; + } case COLOR_BGR2YCrCb: case COLOR_RGB2YCrCb: { diff --git a/modules/imgproc/src/opencl/cvtcolor.cl b/modules/imgproc/src/opencl/cvtcolor.cl index 4ac516ae8c..f57c0c068c 100644 --- a/modules/imgproc/src/opencl/cvtcolor.cl +++ b/modules/imgproc/src/opencl/cvtcolor.cl @@ -115,6 +115,10 @@ enum #define uidx 0 #endif +#ifndef yidx +#define yidx 0 +#endif + #define __CAT(x, y) x##y #define CAT(x, y) __CAT(x, y) @@ -317,7 +321,7 @@ __kernel void YUV2RGB_NV(__global const uchar* srcptr, int src_step, int src_off { __global const uchar* ysrc = srcptr + mad24(y << 1, src_step, (x << 1) + src_offset); __global const uchar* usrc = srcptr + mad24(rows + y, src_step, (x << 1) + src_offset); - __global uchar* dst1 = dstptr + mad24(y << 1, dst_step, x * (dcn<<1) + dt_offset); + __global uchar* dst1 = dstptr + mad24(y << 1, dst_step, mad24(x, dcn<<1, dt_offset)); __global uchar* dst2 = dst1 + dst_step; int Y1 = ysrc[0]; @@ -446,6 +450,73 @@ __kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int } } +__constant int ITUR_BT_601_CRY = 269484; +__constant int ITUR_BT_601_CGY = 528482; +__constant int ITUR_BT_601_CBY = 102760; +__constant int ITUR_BT_601_CRU = -155188; +__constant int ITUR_BT_601_CGU = -305135; +__constant int ITUR_BT_601_CBU = 460324; +__constant int ITUR_BT_601_CGV = -385875; +__constant int ITUR_BT_601_CBV = -74448; +__constant int YSHIFT = 17301504; +__constant int UVSHIFT = 134742016; + +__kernel void RGB2YUV_YV12_IYUV(__global const uchar* srcptr, int src_step, int src_offset, + __global uchar* dstptr, int dst_step, int dst_offset, + int rows, int cols) +{ + int x = get_global_id(0); + int y = get_global_id(1) * PIX_PER_WI_Y; + + if (x < cols/2) + { + int src_index = mad24(y << 1, src_step, mad24(x << 1, scn, src_offset)); + int ydst_index = mad24(y << 1, dst_step, (x << 1) + dst_offset); + int y_rows = rows / 3 * 2; + int vsteps[2] = { cols >> 1, dst_step - (cols >> 1)}; + + #pragma unroll + for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) + { + if (y < rows / 3) + { + __global const uchar* src1 = srcptr + src_index; + __global const uchar* src2 = src1 + src_step; + __global uchar* ydst1 = dstptr + ydst_index; + __global uchar* ydst2 = ydst1 + dst_step; + + __global uchar* udst = dstptr + mad24(y_rows + (y>>1), dst_step, dst_offset + (y%2)*(cols >> 1) + x); + __global uchar* vdst = udst + mad24(y_rows >> 2, dst_step, y_rows % 4 ? vsteps[y%2] : 0); + + int4 src_pix1 = convert_int4(vload4(0, src1)); + int4 src_pix2 = convert_int4(vload4(0, src1+scn)); + int4 src_pix3 = convert_int4(vload4(0, src2)); + int4 src_pix4 = convert_int4(vload4(0, src2+scn)); + + int y00 = mad24(ITUR_BT_601_CRY, src_pix1.R_COMP, mad24(ITUR_BT_601_CGY, src_pix1.G_COMP, mad24(ITUR_BT_601_CBY, src_pix1.B_COMP, YSHIFT))); + int y01 = mad24(ITUR_BT_601_CRY, src_pix2.R_COMP, mad24(ITUR_BT_601_CGY, src_pix2.G_COMP, mad24(ITUR_BT_601_CBY, src_pix2.B_COMP, YSHIFT))); + int y10 = mad24(ITUR_BT_601_CRY, src_pix3.R_COMP, mad24(ITUR_BT_601_CGY, src_pix3.G_COMP, mad24(ITUR_BT_601_CBY, src_pix3.B_COMP, YSHIFT))); + int y11 = mad24(ITUR_BT_601_CRY, src_pix4.R_COMP, mad24(ITUR_BT_601_CGY, src_pix4.G_COMP, mad24(ITUR_BT_601_CBY, src_pix4.B_COMP, YSHIFT))); + + ydst1[0] = convert_uchar_sat(y00 >> ITUR_BT_601_SHIFT); + ydst1[1] = convert_uchar_sat(y01 >> ITUR_BT_601_SHIFT); + ydst2[0] = convert_uchar_sat(y10 >> ITUR_BT_601_SHIFT); + ydst2[1] = convert_uchar_sat(y11 >> ITUR_BT_601_SHIFT); + + int uv[2] = { mad24(ITUR_BT_601_CRU, src_pix1.R_COMP, mad24(ITUR_BT_601_CGU, src_pix1.G_COMP, mad24(ITUR_BT_601_CBU, src_pix1.B_COMP, UVSHIFT))), + mad24(ITUR_BT_601_CBU, src_pix1.R_COMP, mad24(ITUR_BT_601_CGV, src_pix1.G_COMP, mad24(ITUR_BT_601_CBV, src_pix1.B_COMP, UVSHIFT))) }; + + udst[0] = convert_uchar_sat(uv[uidx] >> ITUR_BT_601_SHIFT); + vdst[0] = convert_uchar_sat(uv[1-uidx] >> ITUR_BT_601_SHIFT); + + ++y; + src_index += 2*src_step; + ydst_index += 2*dst_step; + } + } + } +} + ///////////////////////////////////// RGB <-> YCrCb ////////////////////////////////////// __constant float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f}; diff --git a/modules/imgproc/test/ocl/test_color.cpp b/modules/imgproc/test/ocl/test_color.cpp index 43343469c5..1484db9720 100644 --- a/modules/imgproc/test/ocl/test_color.cpp +++ b/modules/imgproc/test/ocl/test_color.cpp @@ -322,7 +322,7 @@ OCL_TEST_P(CvtColor8u32f, Luv2LRGBA) { performTest(3, 4, CVTCODE(Luv2LRGB), dept // YUV420 -> RGBA -struct CvtColor_YUV420 : +struct CvtColor_YUV2RGB_420 : public CvtColor { void generateTestData(int channelsIn, int channelsOut) @@ -344,24 +344,57 @@ struct CvtColor_YUV420 : } }; -OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_NV12) { performTest(1, 4, CVTCODE(YUV2RGBA_NV12)); } -OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_NV12) { performTest(1, 4, CVTCODE(YUV2BGRA_NV12)); } -OCL_TEST_P(CvtColor_YUV420, YUV2RGB_NV12) { performTest(1, 3, CVTCODE(YUV2RGB_NV12)); } -OCL_TEST_P(CvtColor_YUV420, YUV2BGR_NV12) { performTest(1, 3, CVTCODE(YUV2BGR_NV12)); } -OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_NV21) { performTest(1, 4, CVTCODE(YUV2RGBA_NV21)); } -OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_NV21) { performTest(1, 4, CVTCODE(YUV2BGRA_NV21)); } -OCL_TEST_P(CvtColor_YUV420, YUV2RGB_NV21) { performTest(1, 3, CVTCODE(YUV2RGB_NV21)); } -OCL_TEST_P(CvtColor_YUV420, YUV2BGR_NV21) { performTest(1, 3, CVTCODE(YUV2BGR_NV21)); } -OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_YV12) { performTest(1, 4, CVTCODE(YUV2RGBA_YV12)); } -OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_YV12) { performTest(1, 4, CVTCODE(YUV2BGRA_YV12)); } -OCL_TEST_P(CvtColor_YUV420, YUV2RGB_YV12) { performTest(1, 3, CVTCODE(YUV2RGB_YV12)); } -OCL_TEST_P(CvtColor_YUV420, YUV2BGR_YV12) { performTest(1, 3, CVTCODE(YUV2BGR_YV12)); } -OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_IYUV) { performTest(1, 4, CVTCODE(YUV2RGBA_IYUV)); } -OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_IYUV) { performTest(1, 4, CVTCODE(YUV2BGRA_IYUV)); } -OCL_TEST_P(CvtColor_YUV420, YUV2RGB_IYUV) { performTest(1, 3, CVTCODE(YUV2RGB_IYUV)); } -OCL_TEST_P(CvtColor_YUV420, YUV2BGR_IYUV) { performTest(1, 3, CVTCODE(YUV2BGR_IYUV)); } -OCL_TEST_P(CvtColor_YUV420, YUV2GRAY_420) { performTest(1, 1, CVTCODE(YUV2GRAY_420)); } +OCL_TEST_P(CvtColor_YUV2RGB_420, YUV2RGBA_NV12) { performTest(1, 4, CVTCODE(YUV2RGBA_NV12)); } +OCL_TEST_P(CvtColor_YUV2RGB_420, YUV2BGRA_NV12) { performTest(1, 4, CVTCODE(YUV2BGRA_NV12)); } +OCL_TEST_P(CvtColor_YUV2RGB_420, YUV2RGB_NV12) { performTest(1, 3, CVTCODE(YUV2RGB_NV12)); } +OCL_TEST_P(CvtColor_YUV2RGB_420, YUV2BGR_NV12) { performTest(1, 3, CVTCODE(YUV2BGR_NV12)); } +OCL_TEST_P(CvtColor_YUV2RGB_420, YUV2RGBA_NV21) { performTest(1, 4, CVTCODE(YUV2RGBA_NV21)); } +OCL_TEST_P(CvtColor_YUV2RGB_420, YUV2BGRA_NV21) { performTest(1, 4, CVTCODE(YUV2BGRA_NV21)); } +OCL_TEST_P(CvtColor_YUV2RGB_420, YUV2RGB_NV21) { performTest(1, 3, CVTCODE(YUV2RGB_NV21)); } +OCL_TEST_P(CvtColor_YUV2RGB_420, YUV2BGR_NV21) { performTest(1, 3, CVTCODE(YUV2BGR_NV21)); } +OCL_TEST_P(CvtColor_YUV2RGB_420, YUV2RGBA_YV12) { performTest(1, 4, CVTCODE(YUV2RGBA_YV12)); } +OCL_TEST_P(CvtColor_YUV2RGB_420, YUV2BGRA_YV12) { performTest(1, 4, CVTCODE(YUV2BGRA_YV12)); } +OCL_TEST_P(CvtColor_YUV2RGB_420, YUV2RGB_YV12) { performTest(1, 3, CVTCODE(YUV2RGB_YV12)); } +OCL_TEST_P(CvtColor_YUV2RGB_420, YUV2BGR_YV12) { performTest(1, 3, CVTCODE(YUV2BGR_YV12)); } +OCL_TEST_P(CvtColor_YUV2RGB_420, YUV2RGBA_IYUV) { performTest(1, 4, CVTCODE(YUV2RGBA_IYUV)); } +OCL_TEST_P(CvtColor_YUV2RGB_420, YUV2BGRA_IYUV) { performTest(1, 4, CVTCODE(YUV2BGRA_IYUV)); } +OCL_TEST_P(CvtColor_YUV2RGB_420, YUV2RGB_IYUV) { performTest(1, 3, CVTCODE(YUV2RGB_IYUV)); } +OCL_TEST_P(CvtColor_YUV2RGB_420, YUV2BGR_IYUV) { performTest(1, 3, CVTCODE(YUV2BGR_IYUV)); } +OCL_TEST_P(CvtColor_YUV2RGB_420, YUV2GRAY_420) { performTest(1, 1, CVTCODE(YUV2GRAY_420)); } + +// RGBA -> YUV420 + +struct CvtColor_RGB2YUV_420 : + public CvtColor +{ + void generateTestData(int channelsIn, int channelsOut) + { + const int srcType = CV_MAKE_TYPE(depth, channelsIn); + const int dstType = CV_MAKE_TYPE(depth, channelsOut); + Size srcRoiSize = randomSize(1, MAX_VALUE); + srcRoiSize.width *= 2; + srcRoiSize.height *= 2; + Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(src, src_roi, srcRoiSize, srcBorder, srcType, 2, 100); + + Size dstRoiSize(srcRoiSize.width, srcRoiSize.height / 2 * 3); + Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(dst, dst_roi, dstRoiSize, dstBorder, dstType, 5, 16); + + UMAT_UPLOAD_INPUT_PARAMETER(src); + UMAT_UPLOAD_OUTPUT_PARAMETER(dst); + } +}; + +OCL_TEST_P(CvtColor_RGB2YUV_420, RGBA2YUV_YV12) { performTest(4, 1, CVTCODE(RGBA2YUV_YV12)); } +OCL_TEST_P(CvtColor_RGB2YUV_420, BGRA2YUV_YV12) { performTest(4, 1, CVTCODE(BGRA2YUV_YV12)); } +OCL_TEST_P(CvtColor_RGB2YUV_420, RGB2YUV_YV12) { performTest(3, 1, CVTCODE(RGB2YUV_YV12)); } +OCL_TEST_P(CvtColor_RGB2YUV_420, BGR2YUV_YV12) { performTest(3, 1, CVTCODE(BGR2YUV_YV12)); } +OCL_TEST_P(CvtColor_RGB2YUV_420, RGBA2YUV_IYUV) { performTest(4, 1, CVTCODE(RGBA2YUV_IYUV)); } +OCL_TEST_P(CvtColor_RGB2YUV_420, BGRA2YUV_IYUV) { performTest(4, 1, CVTCODE(BGRA2YUV_IYUV)); } +OCL_TEST_P(CvtColor_RGB2YUV_420, RGB2YUV_IYUV) { performTest(3, 1, CVTCODE(RGB2YUV_IYUV)); } +OCL_TEST_P(CvtColor_RGB2YUV_420, BGR2YUV_IYUV) { performTest(3, 1, CVTCODE(BGR2YUV_IYUV)); } OCL_INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor8u, testing::Combine(testing::Values(MatDepth(CV_8U)), Bool())); @@ -374,7 +407,12 @@ OCL_INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor, testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F)), Bool())); -OCL_INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor_YUV420, +OCL_INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor_YUV2RGB_420, + testing::Combine( + testing::Values(MatDepth(CV_8U)), + Bool())); + +OCL_INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor_RGB2YUV_420, testing::Combine( testing::Values(MatDepth(CV_8U)), Bool())); From 5aa9ac9a7788de69726bf696cc6e3ffd14f7d895 Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Mon, 6 Oct 2014 19:21:57 +0400 Subject: [PATCH 5/8] Added OCL code for YUV422 -> RGB[A]|BGR[A] color conversion --- modules/imgproc/src/color.cpp | 32 ++++++-- modules/imgproc/src/opencl/cvtcolor.cl | 105 +++++++++++++++++------- modules/imgproc/test/ocl/test_color.cpp | 43 ++++++++++ 3 files changed, 146 insertions(+), 34 deletions(-) diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 6cca73652f..f363189579 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -4848,7 +4848,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) bool ok = false; UMat src = _src.getUMat(), dst; Size sz = src.size(), dstSz = sz; - int scn = src.channels(), depth = src.depth(), bidx, uidx; + int scn = src.channels(), depth = src.depth(), bidx, uidx, yidx; int dims = 2, stripeSize = 1; ocl::Kernel k; @@ -4967,14 +4967,14 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); dcn = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2RGBA_NV12 || code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2RGBA_NV21 ? 4 : 3; - bidx = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2BGR_NV12 || + bidx = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2BGR_NV12 || code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2BGR_NV21 ? 0 : 2; uidx = code == COLOR_YUV2RGBA_NV21 || code == COLOR_YUV2RGB_NV21 || code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2BGR_NV21 ? 1 : 0; dstSz = Size(sz.width, sz.height * 2 / 3); globalsize[0] = dstSz.width / 2; globalsize[1] = (dstSz.height/2 + pxPerWIy - 1) / pxPerWIy; - k.create("YUV2RGB_NV", ocl::imgproc::cvtcolor_oclsrc, + k.create("YUV2RGB_NVx", ocl::imgproc::cvtcolor_oclsrc, opts + format("-D dcn=%d -D bidx=%d -D uidx=%d", dcn, bidx, uidx)); break; } @@ -4985,7 +4985,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); dcn = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2RGBA_YV12 || code == COLOR_YUV2BGRA_IYUV || code == COLOR_YUV2RGBA_IYUV ? 4 : 3; - bidx = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2BGR_YV12 || + bidx = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2BGR_YV12 || code == COLOR_YUV2BGRA_IYUV || code == COLOR_YUV2BGR_IYUV ? 0 : 2; uidx = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2BGR_YV12 || code == COLOR_YUV2RGBA_YV12 || code == COLOR_YUV2RGB_YV12 ? 1 : 0; @@ -5015,7 +5015,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) case COLOR_RGB2YUV_IYUV: case COLOR_BGR2YUV_IYUV: case COLOR_RGBA2YUV_IYUV: case COLOR_BGRA2YUV_IYUV: { if (dcn <= 0) dcn = 1; - bidx = code == COLOR_BGRA2YUV_YV12 || code == COLOR_BGR2YUV_YV12 || + bidx = code == COLOR_BGRA2YUV_YV12 || code == COLOR_BGR2YUV_YV12 || code == COLOR_BGRA2YUV_IYUV || code == COLOR_BGR2YUV_IYUV ? 0 : 2; uidx = code == COLOR_RGBA2YUV_YV12 || code == COLOR_RGB2YUV_YV12 || code == COLOR_BGRA2YUV_YV12 || code == COLOR_BGR2YUV_YV12 ? 1 : 0; @@ -5030,6 +5030,28 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) opts + format("-D dcn=%d -D bidx=%d -D uidx=%d", dcn, bidx, uidx)); break; } + case COLOR_YUV2RGB_UYVY: case COLOR_YUV2BGR_UYVY: case COLOR_YUV2RGBA_UYVY: case COLOR_YUV2BGRA_UYVY: + case COLOR_YUV2RGB_YUY2: case COLOR_YUV2BGR_YUY2: case COLOR_YUV2RGB_YVYU: case COLOR_YUV2BGR_YVYU: + case COLOR_YUV2RGBA_YUY2: case COLOR_YUV2BGRA_YUY2: case COLOR_YUV2RGBA_YVYU: case COLOR_YUV2BGRA_YVYU: + { + if (dcn <= 0) + dcn = (code==COLOR_YUV2RGBA_UYVY || code==COLOR_YUV2BGRA_UYVY || code==COLOR_YUV2RGBA_YUY2 || + code==COLOR_YUV2BGRA_YUY2 || code==COLOR_YUV2RGBA_YVYU || code==COLOR_YUV2BGRA_YVYU) ? 4 : 3; + + bidx = (code==COLOR_YUV2BGR_UYVY || code==COLOR_YUV2BGRA_UYVY || code==COLOR_YUV2BGRA_YUY2 || + code==COLOR_YUV2BGR_YUY2 || code==COLOR_YUV2BGRA_YVYU || code==COLOR_YUV2BGR_YVYU) ? 0 : 2; + yidx = (code==COLOR_YUV2RGB_UYVY || code==COLOR_YUV2RGBA_UYVY || code==COLOR_YUV2BGR_UYVY || code==COLOR_YUV2BGRA_UYVY) ? 1 : 0; + uidx = (code==COLOR_YUV2RGB_YVYU || code==COLOR_YUV2RGBA_YVYU || + code==COLOR_YUV2BGR_YVYU || code==COLOR_YUV2BGRA_YVYU) ? 2 : 0; + uidx = 1 - yidx + uidx; + + CV_Assert( dcn == 3 || dcn == 4 ); + CV_Assert( scn == 2 && depth == CV_8U ); + + k.create("YUV2RGB_422", ocl::imgproc::cvtcolor_oclsrc, + opts + format("-D dcn=%d -D bidx=%d -D uidx=%d -D yidx=%d", dcn, bidx, uidx, yidx)); + break; + } case COLOR_BGR2YCrCb: case COLOR_RGB2YCrCb: { diff --git a/modules/imgproc/src/opencl/cvtcolor.cl b/modules/imgproc/src/opencl/cvtcolor.cl index f57c0c068c..cf7c06ee68 100644 --- a/modules/imgproc/src/opencl/cvtcolor.cl +++ b/modules/imgproc/src/opencl/cvtcolor.cl @@ -300,12 +300,12 @@ __kernel void YUV2RGB(__global const uchar* srcptr, int src_step, int src_offset __constant int ITUR_BT_601_CY = 1220542; __constant int ITUR_BT_601_CUB = 2116026; -__constant int ITUR_BT_601_CUG = 409993; -__constant int ITUR_BT_601_CVG = 852492; +__constant int ITUR_BT_601_CUG = -409993; +__constant int ITUR_BT_601_CVG = -852492; __constant int ITUR_BT_601_CVR = 1673527; __constant int ITUR_BT_601_SHIFT = 20; -__kernel void YUV2RGB_NV(__global const uchar* srcptr, int src_step, int src_offset, +__kernel void YUV2RGB_NVx(__global const uchar* srcptr, int src_step, int src_offset, __global uchar* dstptr, int dst_step, int dt_offset, int rows, int cols) { @@ -329,41 +329,41 @@ __kernel void YUV2RGB_NV(__global const uchar* srcptr, int src_step, int src_off int Y3 = ysrc[src_step]; int Y4 = ysrc[src_step + 1]; - int U = ((int)usrc[uidx]) - 128; - int V = ((int)usrc[1-uidx]) - 128; + int U = ((int)usrc[uidx]) - HALF_MAX; + int V = ((int)usrc[1-uidx]) - HALF_MAX; - int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * V; - int guv = (1 << (ITUR_BT_601_SHIFT - 1)) - ITUR_BT_601_CVG * V - ITUR_BT_601_CUG * U; - int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * U; + int ruv = mad24(ITUR_BT_601_CVR, V, (1 << (ITUR_BT_601_SHIFT - 1))); + int guv = mad24(ITUR_BT_601_CVG, V, mad24(ITUR_BT_601_CUG, U, (1 << (ITUR_BT_601_SHIFT - 1)))); + int buv = mad24(ITUR_BT_601_CUB, U, (1 << (ITUR_BT_601_SHIFT - 1))); - Y1 = max(0, Y1 - 16) * ITUR_BT_601_CY; - dst1[2 - bidx] = convert_uchar_sat((Y1 + ruv) >> ITUR_BT_601_SHIFT); + Y1 = mul24(max(0, Y1 - 16), ITUR_BT_601_CY); + dst1[2 - bidx] = convert_uchar_sat((Y1 + ruv) >> ITUR_BT_601_SHIFT); dst1[1] = convert_uchar_sat((Y1 + guv) >> ITUR_BT_601_SHIFT); - dst1[bidx] = convert_uchar_sat((Y1 + buv) >> ITUR_BT_601_SHIFT); + dst1[bidx] = convert_uchar_sat((Y1 + buv) >> ITUR_BT_601_SHIFT); #if dcn == 4 dst1[3] = 255; #endif - Y2 = max(0, Y2 - 16) * ITUR_BT_601_CY; + Y2 = mul24(max(0, Y2 - 16), ITUR_BT_601_CY); dst1[dcn + 2 - bidx] = convert_uchar_sat((Y2 + ruv) >> ITUR_BT_601_SHIFT); dst1[dcn + 1] = convert_uchar_sat((Y2 + guv) >> ITUR_BT_601_SHIFT); - dst1[dcn + bidx] = convert_uchar_sat((Y2 + buv) >> ITUR_BT_601_SHIFT); + dst1[dcn + bidx] = convert_uchar_sat((Y2 + buv) >> ITUR_BT_601_SHIFT); #if dcn == 4 dst1[7] = 255; #endif - Y3 = max(0, Y3 - 16) * ITUR_BT_601_CY; - dst2[2 - bidx] = convert_uchar_sat((Y3 + ruv) >> ITUR_BT_601_SHIFT); + Y3 = mul24(max(0, Y3 - 16), ITUR_BT_601_CY); + dst2[2 - bidx] = convert_uchar_sat((Y3 + ruv) >> ITUR_BT_601_SHIFT); dst2[1] = convert_uchar_sat((Y3 + guv) >> ITUR_BT_601_SHIFT); - dst2[bidx] = convert_uchar_sat((Y3 + buv) >> ITUR_BT_601_SHIFT); + dst2[bidx] = convert_uchar_sat((Y3 + buv) >> ITUR_BT_601_SHIFT); #if dcn == 4 dst2[3] = 255; #endif - Y4 = max(0, Y4 - 16) * ITUR_BT_601_CY; + Y4 = mul24(max(0, Y4 - 16), ITUR_BT_601_CY); dst2[dcn + 2 - bidx] = convert_uchar_sat((Y4 + ruv) >> ITUR_BT_601_SHIFT); dst2[dcn + 1] = convert_uchar_sat((Y4 + guv) >> ITUR_BT_601_SHIFT); - dst2[dcn + bidx] = convert_uchar_sat((Y4 + buv) >> ITUR_BT_601_SHIFT); + dst2[dcn + bidx] = convert_uchar_sat((Y4 + buv) >> ITUR_BT_601_SHIFT); #if dcn == 4 dst2[7] = 255; #endif @@ -399,21 +399,21 @@ __kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int #ifdef SRC_CONT __global const uchar* uvsrc = srcptr + mad24(rows, src_step, src_offset); int u_ind = mad24(y, cols >> 1, x); - int uv[2] = { ((int)uvsrc[u_ind]) - 128, ((int)uvsrc[u_ind + ((rows * cols) >> 2)]) - 128 }; + int uv[2] = { ((int)uvsrc[u_ind]) - HALF_MAX, ((int)uvsrc[u_ind + ((rows * cols) >> 2)]) - HALF_MAX }; #else int vsteps[2] = { cols >> 1, src_step - (cols >> 1)}; __global const uchar* usrc = srcptr + mad24(rows + (y>>1), src_step, src_offset + (y%2)*(cols >> 1) + x); __global const uchar* vsrc = usrc + mad24(rows >> 2, src_step, rows % 4 ? vsteps[y%2] : 0); - int uv[2] = { ((int)usrc[0]) - 128, ((int)vsrc[0]) - 128 }; + int uv[2] = { ((int)usrc[0]) - HALF_MAX, ((int)vsrc[0]) - HALF_MAX }; #endif - int u = uv[uidx]; - int v = uv[1-uidx]; + int U = uv[uidx]; + int V = uv[1-uidx]; - int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v; - int guv = (1 << (ITUR_BT_601_SHIFT - 1)) - ITUR_BT_601_CVG * v - ITUR_BT_601_CUG * u; - int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u; + int ruv = mad24(ITUR_BT_601_CVR, V, (1 << (ITUR_BT_601_SHIFT - 1))); + int guv = mad24(ITUR_BT_601_CVG, V, mad24(ITUR_BT_601_CUG, U, (1 << (ITUR_BT_601_SHIFT - 1)))); + int buv = mad24(ITUR_BT_601_CUB, U, (1 << (ITUR_BT_601_SHIFT - 1))); - Y1 = max(0, Y1 - 16) * ITUR_BT_601_CY; + Y1 = mul24(max(0, Y1 - 16), ITUR_BT_601_CY); dst1[2 - bidx] = convert_uchar_sat((Y1 + ruv) >> ITUR_BT_601_SHIFT); dst1[1] = convert_uchar_sat((Y1 + guv) >> ITUR_BT_601_SHIFT); dst1[bidx] = convert_uchar_sat((Y1 + buv) >> ITUR_BT_601_SHIFT); @@ -421,7 +421,7 @@ __kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int dst1[3] = 255; #endif - Y2 = max(0, Y2 - 16) * ITUR_BT_601_CY; + Y2 = mul24(max(0, Y2 - 16), ITUR_BT_601_CY); dst1[dcn + 2 - bidx] = convert_uchar_sat((Y2 + ruv) >> ITUR_BT_601_SHIFT); dst1[dcn + 1] = convert_uchar_sat((Y2 + guv) >> ITUR_BT_601_SHIFT); dst1[dcn + bidx] = convert_uchar_sat((Y2 + buv) >> ITUR_BT_601_SHIFT); @@ -429,7 +429,7 @@ __kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int dst1[7] = 255; #endif - Y3 = max(0, Y3 - 16) * ITUR_BT_601_CY; + Y3 = mul24(max(0, Y3 - 16), ITUR_BT_601_CY); dst2[2 - bidx] = convert_uchar_sat((Y3 + ruv) >> ITUR_BT_601_SHIFT); dst2[1] = convert_uchar_sat((Y3 + guv) >> ITUR_BT_601_SHIFT); dst2[bidx] = convert_uchar_sat((Y3 + buv) >> ITUR_BT_601_SHIFT); @@ -437,7 +437,7 @@ __kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int dst2[3] = 255; #endif - Y4 = max(0, Y4 - 16) * ITUR_BT_601_CY; + Y4 = mul24(max(0, Y4 - 16), ITUR_BT_601_CY); dst2[dcn + 2 - bidx] = convert_uchar_sat((Y4 + ruv) >> ITUR_BT_601_SHIFT); dst2[dcn + 1] = convert_uchar_sat((Y4 + guv) >> ITUR_BT_601_SHIFT); dst2[dcn + bidx] = convert_uchar_sat((Y4 + buv) >> ITUR_BT_601_SHIFT); @@ -517,6 +517,53 @@ __kernel void RGB2YUV_YV12_IYUV(__global const uchar* srcptr, int src_step, int } } +__kernel void YUV2RGB_422(__global const uchar* srcptr, int src_step, int src_offset, + __global uchar* dstptr, int dst_step, int dst_offset, + int rows, int cols) +{ + int x = get_global_id(0); + int y = get_global_id(1) * PIX_PER_WI_Y; + + if (x < cols / 2) + { + __global const uchar* src = srcptr + mad24(y, src_step, (x << 2) + src_offset); + __global uchar* dst = dstptr + mad24(y, dst_step, mad24(x << 1, dcn, dst_offset)); + + #pragma unroll + for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) + { + if (y < rows ) + { + int U = ((int) src[uidx]) - HALF_MAX; + int V = ((int) src[(2 + uidx) % 4]) - HALF_MAX; + + int ruv = mad24(ITUR_BT_601_CVR, V, (1 << (ITUR_BT_601_SHIFT - 1))); + int guv = mad24(ITUR_BT_601_CVG, V, mad24(ITUR_BT_601_CUG, U, (1 << (ITUR_BT_601_SHIFT - 1)))); + int buv = mad24(ITUR_BT_601_CUB, U, (1 << (ITUR_BT_601_SHIFT - 1))); + + int y00 = max(0, ((int) src[yidx]) - 16) * ITUR_BT_601_CY; + dst[2 - bidx] = convert_uchar_sat((y00 + ruv) >> ITUR_BT_601_SHIFT); + dst[1] = convert_uchar_sat((y00 + guv) >> ITUR_BT_601_SHIFT); + dst[bidx] = convert_uchar_sat((y00 + buv) >> ITUR_BT_601_SHIFT); +#if dcn == 4 + dst[3] = 255; +#endif + + int y01 = max(0, ((int) src[yidx + 2]) - 16) * ITUR_BT_601_CY; + dst[dcn + 2 - bidx] = convert_uchar_sat((y01 + ruv) >> ITUR_BT_601_SHIFT); + dst[dcn + 1] = convert_uchar_sat((y01 + guv) >> ITUR_BT_601_SHIFT); + dst[dcn + bidx] = convert_uchar_sat((y01 + buv) >> ITUR_BT_601_SHIFT); +#if dcn == 4 + dst[7] = 255; +#endif + } + ++y; + src += src_step; + dst += dst_step; + } + } +} + ///////////////////////////////////// RGB <-> YCrCb ////////////////////////////////////// __constant float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f}; diff --git a/modules/imgproc/test/ocl/test_color.cpp b/modules/imgproc/test/ocl/test_color.cpp index 1484db9720..89affcfac7 100644 --- a/modules/imgproc/test/ocl/test_color.cpp +++ b/modules/imgproc/test/ocl/test_color.cpp @@ -396,6 +396,44 @@ OCL_TEST_P(CvtColor_RGB2YUV_420, BGRA2YUV_IYUV) { performTest(4, 1, CVTCODE(BGRA OCL_TEST_P(CvtColor_RGB2YUV_420, RGB2YUV_IYUV) { performTest(3, 1, CVTCODE(RGB2YUV_IYUV)); } OCL_TEST_P(CvtColor_RGB2YUV_420, BGR2YUV_IYUV) { performTest(3, 1, CVTCODE(BGR2YUV_IYUV)); } +// YUV422 -> RGBA + +struct CvtColor_YUV2RGB_422 : + public CvtColor +{ + void generateTestData(int channelsIn, int channelsOut) + { + const int srcType = CV_MAKE_TYPE(depth, channelsIn); + const int dstType = CV_MAKE_TYPE(depth, channelsOut); + + Size roiSize = randomSize(1, MAX_VALUE); + roiSize.width *= 2; + + Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(src, src_roi, roiSize, srcBorder, srcType, 2, 100); + + Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(dst, dst_roi, roiSize, dstBorder, dstType, 5, 16); + + UMAT_UPLOAD_INPUT_PARAMETER(src); + UMAT_UPLOAD_OUTPUT_PARAMETER(dst); + } +}; + +OCL_TEST_P(CvtColor_YUV2RGB_422, YUV2RGB_UYVY) { performTest(2, 3, CVTCODE(YUV2RGB_UYVY)); } +OCL_TEST_P(CvtColor_YUV2RGB_422, YUV2BGR_UYVY) { performTest(2, 3, CVTCODE(YUV2BGR_UYVY)); } +OCL_TEST_P(CvtColor_YUV2RGB_422, YUV2RGBA_UYVY) { performTest(2, 4, CVTCODE(YUV2RGBA_UYVY)); } +OCL_TEST_P(CvtColor_YUV2RGB_422, YUV2BGRA_UYVY) { performTest(2, 4, CVTCODE(YUV2BGRA_UYVY)); } +OCL_TEST_P(CvtColor_YUV2RGB_422, YUV2RGB_YUY2) { performTest(2, 3, CVTCODE(YUV2RGB_YUY2)); } +OCL_TEST_P(CvtColor_YUV2RGB_422, YUV2BGR_YUY2) { performTest(2, 3, CVTCODE(YUV2BGR_YUY2)); } +OCL_TEST_P(CvtColor_YUV2RGB_422, YUV2RGBA_YUY2) { performTest(2, 4, CVTCODE(YUV2RGBA_YUY2)); } +OCL_TEST_P(CvtColor_YUV2RGB_422, YUV2BGRA_YUY2) { performTest(2, 4, CVTCODE(YUV2BGRA_YUY2)); } +OCL_TEST_P(CvtColor_YUV2RGB_422, YUV2RGB_YVYU) { performTest(2, 3, CVTCODE(YUV2RGB_YVYU)); } +OCL_TEST_P(CvtColor_YUV2RGB_422, YUV2BGR_YVYU) { performTest(2, 3, CVTCODE(YUV2BGR_YVYU)); } +OCL_TEST_P(CvtColor_YUV2RGB_422, YUV2RGBA_YVYU) { performTest(2, 4, CVTCODE(YUV2RGBA_YVYU)); } +OCL_TEST_P(CvtColor_YUV2RGB_422, YUV2BGRA_YVYU) { performTest(2, 4, CVTCODE(YUV2BGRA_YVYU)); } + + OCL_INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor8u, testing::Combine(testing::Values(MatDepth(CV_8U)), Bool())); @@ -417,6 +455,11 @@ OCL_INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor_RGB2YUV_420, testing::Values(MatDepth(CV_8U)), Bool())); +OCL_INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor_YUV2RGB_422, + testing::Combine( + testing::Values(MatDepth(CV_8U)), + Bool())); + } } // namespace cvtest::ocl #endif From 60367907fe75d369dc3158b4a696371bfa2ce6ee Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Tue, 14 Oct 2014 16:31:10 +0400 Subject: [PATCH 6/8] Used direct float calculations --- modules/imgproc/src/opencl/cvtcolor.cl | 205 +++++++++++------------- modules/imgproc/test/ocl/test_color.cpp | 16 +- 2 files changed, 104 insertions(+), 117 deletions(-) diff --git a/modules/imgproc/src/opencl/cvtcolor.cl b/modules/imgproc/src/opencl/cvtcolor.cl index cf7c06ee68..c3cfd0d592 100644 --- a/modules/imgproc/src/opencl/cvtcolor.cl +++ b/modules/imgproc/src/opencl/cvtcolor.cl @@ -77,7 +77,7 @@ enum { yuv_shift = 14, xyz_shift = 12, - hsv_shift = 12, + hsv_shift = 12, R2Y = 4899, G2Y = 9617, B2Y = 1868, @@ -149,7 +149,7 @@ __kernel void RGB2Gray(__global const uchar * srcptr, int src_step, int src_offs #ifdef DEPTH_5 dst[0] = fma(src_pix.B_COMP, 0.114f, fma(src_pix.G_COMP, 0.587f, src_pix.R_COMP * 0.299f)); #else - dst[0] = (DATA_TYPE)CV_DESCALE(mad24(src_pix.B_COMP, B2Y, mad24(src_pix.G_COMP, G2Y, src_pix.R_COMP * R2Y)), yuv_shift); + dst[0] = (DATA_TYPE)CV_DESCALE(mad24(src_pix.B_COMP, B2Y, mad24(src_pix.G_COMP, G2Y, mul24(src_pix.R_COMP, R2Y))), yuv_shift); #endif ++y; src_index += src_step; @@ -224,13 +224,13 @@ __kernel void RGB2YUV(__global const uchar* srcptr, int src_step, int src_offset #ifdef DEPTH_5 __constant float * coeffs = c_RGB2YUVCoeffs_f; - const DATA_TYPE Y = fma(b, coeffs[0], fma(g, coeffs[1], r * coeffs[2])); + const DATA_TYPE Y = fma(b, coeffs[0], fma(g, coeffs[1], r * coeffs[2])); const DATA_TYPE U = fma(b - Y, coeffs[3], HALF_MAX); const DATA_TYPE V = fma(r - Y, coeffs[4], HALF_MAX); #else __constant int * coeffs = c_RGB2YUVCoeffs_i; const int delta = HALF_MAX * (1 << yuv_shift); - const int Y = CV_DESCALE(mad24(b, coeffs[0], mad24(g, coeffs[1], r * coeffs[2])), yuv_shift); + const int Y = CV_DESCALE(mad24(b, coeffs[0], mad24(g, coeffs[1], mul24(r, coeffs[2]))), yuv_shift); const int U = CV_DESCALE(mad24(b - Y, coeffs[3], delta), yuv_shift); const int V = CV_DESCALE(mad24(r - Y, coeffs[4], delta), yuv_shift); #endif @@ -247,8 +247,8 @@ __kernel void RGB2YUV(__global const uchar* srcptr, int src_step, int src_offset } } -__constant float c_YUV2RGBCoeffs_f[5] = { 2.032f, -0.395f, -0.581f, 1.140f }; -__constant int c_YUV2RGBCoeffs_i[5] = { 33292, -6472, -9519, 18678 }; +__constant float c_YUV2RGBCoeffs_f[4] = { 2.032f, -0.395f, -0.581f, 1.140f }; +__constant int c_YUV2RGBCoeffs_i[4] = { 33292, -6472, -9519, 18678 }; __kernel void YUV2RGB(__global const uchar* srcptr, int src_step, int src_offset, __global uchar* dstptr, int dst_step, int dt_offset, @@ -279,9 +279,9 @@ __kernel void YUV2RGB(__global const uchar* srcptr, int src_step, int src_offset float b = fma(U - HALF_MAX, coeffs[0], Y); #else __constant int * coeffs = c_YUV2RGBCoeffs_i; - const int r = Y + CV_DESCALE((V - HALF_MAX) * coeffs[3], yuv_shift); - const int g = Y + CV_DESCALE(mad24(V - HALF_MAX, coeffs[2], (U - HALF_MAX) * coeffs[1]), yuv_shift); - const int b = Y + CV_DESCALE((U - HALF_MAX) * coeffs[0], yuv_shift); + const int r = Y + CV_DESCALE(mul24(V - HALF_MAX, coeffs[3]), yuv_shift); + const int g = Y + CV_DESCALE(mad24(V - HALF_MAX, coeffs[2], mul24(U - HALF_MAX, coeffs[1])), yuv_shift); + const int b = Y + CV_DESCALE(mul24(U - HALF_MAX, coeffs[0]), yuv_shift); #endif dst[bidx] = SAT_CAST( b ); @@ -297,13 +297,8 @@ __kernel void YUV2RGB(__global const uchar* srcptr, int src_step, int src_offset } } } - -__constant int ITUR_BT_601_CY = 1220542; -__constant int ITUR_BT_601_CUB = 2116026; -__constant int ITUR_BT_601_CUG = -409993; -__constant int ITUR_BT_601_CVG = -852492; -__constant int ITUR_BT_601_CVR = 1673527; -__constant int ITUR_BT_601_SHIFT = 20; +__constant float c_YUV2RGBCoeffs_420[5] = { 1.163999557f, 2.017999649f, -0.390999794f, + -0.812999725f, 1.5959997177f }; __kernel void YUV2RGB_NVx(__global const uchar* srcptr, int src_step, int src_offset, __global uchar* dstptr, int dst_step, int dt_offset, @@ -324,46 +319,47 @@ __kernel void YUV2RGB_NVx(__global const uchar* srcptr, int src_step, int src_of __global uchar* dst1 = dstptr + mad24(y << 1, dst_step, mad24(x, dcn<<1, dt_offset)); __global uchar* dst2 = dst1 + dst_step; - int Y1 = ysrc[0]; - int Y2 = ysrc[1]; - int Y3 = ysrc[src_step]; - int Y4 = ysrc[src_step + 1]; + float Y1 = ysrc[0]; + float Y2 = ysrc[1]; + float Y3 = ysrc[src_step]; + float Y4 = ysrc[src_step + 1]; - int U = ((int)usrc[uidx]) - HALF_MAX; - int V = ((int)usrc[1-uidx]) - HALF_MAX; + float U = ((float)usrc[uidx]) - HALF_MAX; + float V = ((float)usrc[1-uidx]) - HALF_MAX; - int ruv = mad24(ITUR_BT_601_CVR, V, (1 << (ITUR_BT_601_SHIFT - 1))); - int guv = mad24(ITUR_BT_601_CVG, V, mad24(ITUR_BT_601_CUG, U, (1 << (ITUR_BT_601_SHIFT - 1)))); - int buv = mad24(ITUR_BT_601_CUB, U, (1 << (ITUR_BT_601_SHIFT - 1))); + __constant float* coeffs = c_YUV2RGBCoeffs_420; + float ruv = fma(coeffs[4], V, 0.5f); + float guv = fma(coeffs[3], V, fma(coeffs[2], U, 0.5f)); + float buv = fma(coeffs[1], U, 0.5f); - Y1 = mul24(max(0, Y1 - 16), ITUR_BT_601_CY); - dst1[2 - bidx] = convert_uchar_sat((Y1 + ruv) >> ITUR_BT_601_SHIFT); - dst1[1] = convert_uchar_sat((Y1 + guv) >> ITUR_BT_601_SHIFT); - dst1[bidx] = convert_uchar_sat((Y1 + buv) >> ITUR_BT_601_SHIFT); + Y1 = max(0.f, Y1 - 16.f) * coeffs[0]; + dst1[2 - bidx] = convert_uchar_sat(Y1 + ruv); + dst1[1] = convert_uchar_sat(Y1 + guv); + dst1[bidx] = convert_uchar_sat(Y1 + buv); #if dcn == 4 dst1[3] = 255; #endif - Y2 = mul24(max(0, Y2 - 16), ITUR_BT_601_CY); - dst1[dcn + 2 - bidx] = convert_uchar_sat((Y2 + ruv) >> ITUR_BT_601_SHIFT); - dst1[dcn + 1] = convert_uchar_sat((Y2 + guv) >> ITUR_BT_601_SHIFT); - dst1[dcn + bidx] = convert_uchar_sat((Y2 + buv) >> ITUR_BT_601_SHIFT); + Y2 = max(0.f, Y2 - 16.f) * coeffs[0]; + dst1[dcn + 2 - bidx] = convert_uchar_sat(Y2 + ruv); + dst1[dcn + 1] = convert_uchar_sat(Y2 + guv); + dst1[dcn + bidx] = convert_uchar_sat(Y2 + buv); #if dcn == 4 dst1[7] = 255; #endif - Y3 = mul24(max(0, Y3 - 16), ITUR_BT_601_CY); - dst2[2 - bidx] = convert_uchar_sat((Y3 + ruv) >> ITUR_BT_601_SHIFT); - dst2[1] = convert_uchar_sat((Y3 + guv) >> ITUR_BT_601_SHIFT); - dst2[bidx] = convert_uchar_sat((Y3 + buv) >> ITUR_BT_601_SHIFT); + Y3 = max(0.f, Y3 - 16.f) * coeffs[0]; + dst2[2 - bidx] = convert_uchar_sat(Y3 + ruv); + dst2[1] = convert_uchar_sat(Y3 + guv); + dst2[bidx] = convert_uchar_sat(Y3 + buv); #if dcn == 4 dst2[3] = 255; #endif - Y4 = mul24(max(0, Y4 - 16), ITUR_BT_601_CY); - dst2[dcn + 2 - bidx] = convert_uchar_sat((Y4 + ruv) >> ITUR_BT_601_SHIFT); - dst2[dcn + 1] = convert_uchar_sat((Y4 + guv) >> ITUR_BT_601_SHIFT); - dst2[dcn + bidx] = convert_uchar_sat((Y4 + buv) >> ITUR_BT_601_SHIFT); + Y4 = max(0.f, Y4 - 16.f) * coeffs[0]; + dst2[dcn + 2 - bidx] = convert_uchar_sat(Y4 + ruv); + dst2[dcn + 1] = convert_uchar_sat(Y4 + guv); + dst2[dcn + bidx] = convert_uchar_sat(Y4 + buv); #if dcn == 4 dst2[7] = 255; #endif @@ -391,56 +387,57 @@ __kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int __global uchar* dst1 = dstptr + mad24(y << 1, dst_step, x * (dcn<<1) + dt_offset); __global uchar* dst2 = dst1 + dst_step; - int Y1 = ysrc[0]; - int Y2 = ysrc[1]; - int Y3 = ysrc[src_step]; - int Y4 = ysrc[src_step + 1]; + float Y1 = ysrc[0]; + float Y2 = ysrc[1]; + float Y3 = ysrc[src_step]; + float Y4 = ysrc[src_step + 1]; #ifdef SRC_CONT __global const uchar* uvsrc = srcptr + mad24(rows, src_step, src_offset); int u_ind = mad24(y, cols >> 1, x); - int uv[2] = { ((int)uvsrc[u_ind]) - HALF_MAX, ((int)uvsrc[u_ind + ((rows * cols) >> 2)]) - HALF_MAX }; + float uv[2] = { ((float)uvsrc[u_ind]) - HALF_MAX, ((float)uvsrc[u_ind + ((rows * cols) >> 2)]) - HALF_MAX }; #else int vsteps[2] = { cols >> 1, src_step - (cols >> 1)}; __global const uchar* usrc = srcptr + mad24(rows + (y>>1), src_step, src_offset + (y%2)*(cols >> 1) + x); __global const uchar* vsrc = usrc + mad24(rows >> 2, src_step, rows % 4 ? vsteps[y%2] : 0); - int uv[2] = { ((int)usrc[0]) - HALF_MAX, ((int)vsrc[0]) - HALF_MAX }; + float uv[2] = { ((float)usrc[0]) - HALF_MAX, ((float)vsrc[0]) - HALF_MAX }; #endif - int U = uv[uidx]; - int V = uv[1-uidx]; + float U = uv[uidx]; + float V = uv[1-uidx]; - int ruv = mad24(ITUR_BT_601_CVR, V, (1 << (ITUR_BT_601_SHIFT - 1))); - int guv = mad24(ITUR_BT_601_CVG, V, mad24(ITUR_BT_601_CUG, U, (1 << (ITUR_BT_601_SHIFT - 1)))); - int buv = mad24(ITUR_BT_601_CUB, U, (1 << (ITUR_BT_601_SHIFT - 1))); + __constant float* coeffs = c_YUV2RGBCoeffs_420; + float ruv = fma(coeffs[4], V, 0.5f); + float guv = fma(coeffs[3], V, fma(coeffs[2], U, 0.5f)); + float buv = fma(coeffs[1], U, 0.5f); - Y1 = mul24(max(0, Y1 - 16), ITUR_BT_601_CY); - dst1[2 - bidx] = convert_uchar_sat((Y1 + ruv) >> ITUR_BT_601_SHIFT); - dst1[1] = convert_uchar_sat((Y1 + guv) >> ITUR_BT_601_SHIFT); - dst1[bidx] = convert_uchar_sat((Y1 + buv) >> ITUR_BT_601_SHIFT); + Y1 = max(0.f, Y1 - 16.f) * coeffs[0]; + dst1[2 - bidx] = convert_uchar_sat(Y1 + ruv); + dst1[1] = convert_uchar_sat(Y1 + guv); + dst1[bidx] = convert_uchar_sat(Y1 + buv); #if dcn == 4 dst1[3] = 255; #endif - Y2 = mul24(max(0, Y2 - 16), ITUR_BT_601_CY); - dst1[dcn + 2 - bidx] = convert_uchar_sat((Y2 + ruv) >> ITUR_BT_601_SHIFT); - dst1[dcn + 1] = convert_uchar_sat((Y2 + guv) >> ITUR_BT_601_SHIFT); - dst1[dcn + bidx] = convert_uchar_sat((Y2 + buv) >> ITUR_BT_601_SHIFT); + Y2 = max(0.f, Y2 - 16.f) * coeffs[0]; + dst1[dcn + 2 - bidx] = convert_uchar_sat(Y2 + ruv); + dst1[dcn + 1] = convert_uchar_sat(Y2 + guv); + dst1[dcn + bidx] = convert_uchar_sat(Y2 + buv); #if dcn == 4 dst1[7] = 255; #endif - Y3 = mul24(max(0, Y3 - 16), ITUR_BT_601_CY); - dst2[2 - bidx] = convert_uchar_sat((Y3 + ruv) >> ITUR_BT_601_SHIFT); - dst2[1] = convert_uchar_sat((Y3 + guv) >> ITUR_BT_601_SHIFT); - dst2[bidx] = convert_uchar_sat((Y3 + buv) >> ITUR_BT_601_SHIFT); + Y3 = max(0.f, Y3 - 16.f) * coeffs[0]; + dst2[2 - bidx] = convert_uchar_sat(Y3 + ruv); + dst2[1] = convert_uchar_sat(Y3 + guv); + dst2[bidx] = convert_uchar_sat(Y3 + buv); #if dcn == 4 dst2[3] = 255; #endif - Y4 = mul24(max(0, Y4 - 16), ITUR_BT_601_CY); - dst2[dcn + 2 - bidx] = convert_uchar_sat((Y4 + ruv) >> ITUR_BT_601_SHIFT); - dst2[dcn + 1] = convert_uchar_sat((Y4 + guv) >> ITUR_BT_601_SHIFT); - dst2[dcn + bidx] = convert_uchar_sat((Y4 + buv) >> ITUR_BT_601_SHIFT); + Y4 = max(0.f, Y4 - 16.f) * coeffs[0]; + dst2[dcn + 2 - bidx] = convert_uchar_sat(Y4 + ruv); + dst2[dcn + 1] = convert_uchar_sat(Y4 + guv); + dst2[dcn + bidx] = convert_uchar_sat(Y4 + buv); #if dcn == 4 dst2[7] = 255; #endif @@ -450,16 +447,8 @@ __kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int } } -__constant int ITUR_BT_601_CRY = 269484; -__constant int ITUR_BT_601_CGY = 528482; -__constant int ITUR_BT_601_CBY = 102760; -__constant int ITUR_BT_601_CRU = -155188; -__constant int ITUR_BT_601_CGU = -305135; -__constant int ITUR_BT_601_CBU = 460324; -__constant int ITUR_BT_601_CGV = -385875; -__constant int ITUR_BT_601_CBV = -74448; -__constant int YSHIFT = 17301504; -__constant int UVSHIFT = 134742016; +__constant float c_RGB2YUVCoeffs_420[8] = { 0.256999969f, 0.50399971f, 0.09799957f, -0.1479988098f, -0.2909994125f, + 0.438999176f, -0.3679990768f, -0.0709991455f }; __kernel void RGB2YUV_YV12_IYUV(__global const uchar* srcptr, int src_step, int src_offset, __global uchar* dstptr, int dst_step, int dst_offset, @@ -488,26 +477,22 @@ __kernel void RGB2YUV_YV12_IYUV(__global const uchar* srcptr, int src_step, int __global uchar* udst = dstptr + mad24(y_rows + (y>>1), dst_step, dst_offset + (y%2)*(cols >> 1) + x); __global uchar* vdst = udst + mad24(y_rows >> 2, dst_step, y_rows % 4 ? vsteps[y%2] : 0); - int4 src_pix1 = convert_int4(vload4(0, src1)); - int4 src_pix2 = convert_int4(vload4(0, src1+scn)); - int4 src_pix3 = convert_int4(vload4(0, src2)); - int4 src_pix4 = convert_int4(vload4(0, src2+scn)); - - int y00 = mad24(ITUR_BT_601_CRY, src_pix1.R_COMP, mad24(ITUR_BT_601_CGY, src_pix1.G_COMP, mad24(ITUR_BT_601_CBY, src_pix1.B_COMP, YSHIFT))); - int y01 = mad24(ITUR_BT_601_CRY, src_pix2.R_COMP, mad24(ITUR_BT_601_CGY, src_pix2.G_COMP, mad24(ITUR_BT_601_CBY, src_pix2.B_COMP, YSHIFT))); - int y10 = mad24(ITUR_BT_601_CRY, src_pix3.R_COMP, mad24(ITUR_BT_601_CGY, src_pix3.G_COMP, mad24(ITUR_BT_601_CBY, src_pix3.B_COMP, YSHIFT))); - int y11 = mad24(ITUR_BT_601_CRY, src_pix4.R_COMP, mad24(ITUR_BT_601_CGY, src_pix4.G_COMP, mad24(ITUR_BT_601_CBY, src_pix4.B_COMP, YSHIFT))); + float4 src_pix1 = convert_float4(vload4(0, src1)); + float4 src_pix2 = convert_float4(vload4(0, src1+scn)); + float4 src_pix3 = convert_float4(vload4(0, src2)); + float4 src_pix4 = convert_float4(vload4(0, src2+scn)); - ydst1[0] = convert_uchar_sat(y00 >> ITUR_BT_601_SHIFT); - ydst1[1] = convert_uchar_sat(y01 >> ITUR_BT_601_SHIFT); - ydst2[0] = convert_uchar_sat(y10 >> ITUR_BT_601_SHIFT); - ydst2[1] = convert_uchar_sat(y11 >> ITUR_BT_601_SHIFT); + __constant float* coeffs = c_RGB2YUVCoeffs_420; + ydst1[0] = convert_uchar_sat(fma(coeffs[0], src_pix1.R_COMP, fma(coeffs[1], src_pix1.G_COMP, fma(coeffs[2], src_pix1.B_COMP, 16.5f)))); + ydst1[1] = convert_uchar_sat(fma(coeffs[0], src_pix2.R_COMP, fma(coeffs[1], src_pix2.G_COMP, fma(coeffs[2], src_pix2.B_COMP, 16.5f)))); + ydst2[0] = convert_uchar_sat(fma(coeffs[0], src_pix3.R_COMP, fma(coeffs[1], src_pix3.G_COMP, fma(coeffs[2], src_pix3.B_COMP, 16.5f)))); + ydst2[1] = convert_uchar_sat(fma(coeffs[0], src_pix4.R_COMP, fma(coeffs[1], src_pix4.G_COMP, fma(coeffs[2], src_pix4.B_COMP, 16.5f)))); - int uv[2] = { mad24(ITUR_BT_601_CRU, src_pix1.R_COMP, mad24(ITUR_BT_601_CGU, src_pix1.G_COMP, mad24(ITUR_BT_601_CBU, src_pix1.B_COMP, UVSHIFT))), - mad24(ITUR_BT_601_CBU, src_pix1.R_COMP, mad24(ITUR_BT_601_CGV, src_pix1.G_COMP, mad24(ITUR_BT_601_CBV, src_pix1.B_COMP, UVSHIFT))) }; + float uv[2] = { fma(coeffs[3], src_pix1.R_COMP, fma(coeffs[4], src_pix1.G_COMP, fma(coeffs[5], src_pix1.B_COMP, 128.5f))), + fma(coeffs[5], src_pix1.R_COMP, fma(coeffs[6], src_pix1.G_COMP, fma(coeffs[7], src_pix1.B_COMP, 128.5f))) }; - udst[0] = convert_uchar_sat(uv[uidx] >> ITUR_BT_601_SHIFT); - vdst[0] = convert_uchar_sat(uv[1-uidx] >> ITUR_BT_601_SHIFT); + udst[0] = convert_uchar_sat(uv[uidx] ); + vdst[0] = convert_uchar_sat(uv[1-uidx]); ++y; src_index += 2*src_step; @@ -534,25 +519,27 @@ __kernel void YUV2RGB_422(__global const uchar* srcptr, int src_step, int src_of { if (y < rows ) { - int U = ((int) src[uidx]) - HALF_MAX; - int V = ((int) src[(2 + uidx) % 4]) - HALF_MAX; + float U = ((float) src[uidx]) - HALF_MAX; + float V = ((float) src[(2 + uidx) % 4]) - HALF_MAX; + - int ruv = mad24(ITUR_BT_601_CVR, V, (1 << (ITUR_BT_601_SHIFT - 1))); - int guv = mad24(ITUR_BT_601_CVG, V, mad24(ITUR_BT_601_CUG, U, (1 << (ITUR_BT_601_SHIFT - 1)))); - int buv = mad24(ITUR_BT_601_CUB, U, (1 << (ITUR_BT_601_SHIFT - 1))); + __constant float* coeffs = c_YUV2RGBCoeffs_420; + float ruv = fma(coeffs[4], V, 0.5f); + float guv = fma(coeffs[3], V, fma(coeffs[2], U, 0.5f)); + float buv = fma(coeffs[1], U, 0.5f); - int y00 = max(0, ((int) src[yidx]) - 16) * ITUR_BT_601_CY; - dst[2 - bidx] = convert_uchar_sat((y00 + ruv) >> ITUR_BT_601_SHIFT); - dst[1] = convert_uchar_sat((y00 + guv) >> ITUR_BT_601_SHIFT); - dst[bidx] = convert_uchar_sat((y00 + buv) >> ITUR_BT_601_SHIFT); + float y00 = max(0.f, ((float) src[yidx]) - 16.f) * coeffs[0]; + dst[2 - bidx] = convert_uchar_sat(y00 + ruv); + dst[1] = convert_uchar_sat(y00 + guv); + dst[bidx] = convert_uchar_sat(y00 + buv); #if dcn == 4 dst[3] = 255; #endif - int y01 = max(0, ((int) src[yidx + 2]) - 16) * ITUR_BT_601_CY; - dst[dcn + 2 - bidx] = convert_uchar_sat((y01 + ruv) >> ITUR_BT_601_SHIFT); - dst[dcn + 1] = convert_uchar_sat((y01 + guv) >> ITUR_BT_601_SHIFT); - dst[dcn + bidx] = convert_uchar_sat((y01 + buv) >> ITUR_BT_601_SHIFT); + float y01 = max(0.f, ((float) src[yidx + 2]) - 16.f) * coeffs[0]; + dst[dcn + 2 - bidx] = convert_uchar_sat(y01 + ruv); + dst[dcn + 1] = convert_uchar_sat(y01 + guv); + dst[dcn + bidx] = convert_uchar_sat(y01 + buv); #if dcn == 4 dst[7] = 255; #endif @@ -599,7 +586,7 @@ __kernel void RGB2YCrCb(__global const uchar* srcptr, int src_step, int src_offs #else __constant int * coeffs = c_RGB2YCrCbCoeffs_i; int delta = HALF_MAX * (1 << yuv_shift); - int Y = CV_DESCALE(mad24(b, coeffs[2], mad24(g, coeffs[1], r * coeffs[0])), yuv_shift); + int Y = CV_DESCALE(mad24(b, coeffs[2], mad24(g, coeffs[1], mul24(r, coeffs[0]))), yuv_shift); int Cr = CV_DESCALE(mad24(r - Y, coeffs[3], delta), yuv_shift); int Cb = CV_DESCALE(mad24(b - Y, coeffs[4], delta), yuv_shift); #endif diff --git a/modules/imgproc/test/ocl/test_color.cpp b/modules/imgproc/test/ocl/test_color.cpp index 89affcfac7..c53607fdbd 100644 --- a/modules/imgproc/test/ocl/test_color.cpp +++ b/modules/imgproc/test/ocl/test_color.cpp @@ -387,14 +387,14 @@ struct CvtColor_RGB2YUV_420 : } }; -OCL_TEST_P(CvtColor_RGB2YUV_420, RGBA2YUV_YV12) { performTest(4, 1, CVTCODE(RGBA2YUV_YV12)); } -OCL_TEST_P(CvtColor_RGB2YUV_420, BGRA2YUV_YV12) { performTest(4, 1, CVTCODE(BGRA2YUV_YV12)); } -OCL_TEST_P(CvtColor_RGB2YUV_420, RGB2YUV_YV12) { performTest(3, 1, CVTCODE(RGB2YUV_YV12)); } -OCL_TEST_P(CvtColor_RGB2YUV_420, BGR2YUV_YV12) { performTest(3, 1, CVTCODE(BGR2YUV_YV12)); } -OCL_TEST_P(CvtColor_RGB2YUV_420, RGBA2YUV_IYUV) { performTest(4, 1, CVTCODE(RGBA2YUV_IYUV)); } -OCL_TEST_P(CvtColor_RGB2YUV_420, BGRA2YUV_IYUV) { performTest(4, 1, CVTCODE(BGRA2YUV_IYUV)); } -OCL_TEST_P(CvtColor_RGB2YUV_420, RGB2YUV_IYUV) { performTest(3, 1, CVTCODE(RGB2YUV_IYUV)); } -OCL_TEST_P(CvtColor_RGB2YUV_420, BGR2YUV_IYUV) { performTest(3, 1, CVTCODE(BGR2YUV_IYUV)); } +OCL_TEST_P(CvtColor_RGB2YUV_420, RGBA2YUV_YV12) { performTest(4, 1, CVTCODE(RGBA2YUV_YV12), 1); } +OCL_TEST_P(CvtColor_RGB2YUV_420, BGRA2YUV_YV12) { performTest(4, 1, CVTCODE(BGRA2YUV_YV12), 1); } +OCL_TEST_P(CvtColor_RGB2YUV_420, RGB2YUV_YV12) { performTest(3, 1, CVTCODE(RGB2YUV_YV12), 1); } +OCL_TEST_P(CvtColor_RGB2YUV_420, BGR2YUV_YV12) { performTest(3, 1, CVTCODE(BGR2YUV_YV12), 1); } +OCL_TEST_P(CvtColor_RGB2YUV_420, RGBA2YUV_IYUV) { performTest(4, 1, CVTCODE(RGBA2YUV_IYUV), 1); } +OCL_TEST_P(CvtColor_RGB2YUV_420, BGRA2YUV_IYUV) { performTest(4, 1, CVTCODE(BGRA2YUV_IYUV), 1); } +OCL_TEST_P(CvtColor_RGB2YUV_420, RGB2YUV_IYUV) { performTest(3, 1, CVTCODE(RGB2YUV_IYUV), 1); } +OCL_TEST_P(CvtColor_RGB2YUV_420, BGR2YUV_IYUV) { performTest(3, 1, CVTCODE(BGR2YUV_IYUV), 1); } // YUV422 -> RGBA From 1466621f99f0418fb575c68caabb8c200e43904f Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Mon, 27 Oct 2014 14:52:17 +0300 Subject: [PATCH 7/8] Added loading 4 pixels in line instead of 2 to RGB[A] -> YUV(420) kernel --- modules/imgproc/src/color.cpp | 17 +++++-- modules/imgproc/src/opencl/cvtcolor.cl | 62 +++++++++++++++++++++++--- 2 files changed, 71 insertions(+), 8 deletions(-) diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index f363189579..dcbfb8f79f 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -4857,6 +4857,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) ocl::Device dev = ocl::Device::getDefault(); int pxPerWIy = dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU) ? 4 : 1; + int pxPerWIx = 1; size_t globalsize[] = { src.cols, (src.rows + pxPerWIy - 1) / pxPerWIy }; cv::String opts = format("-D depth=%d -D scn=%d -D PIX_PER_WI_Y=%d ", @@ -5025,10 +5026,20 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) CV_Assert( sz.width % 2 == 0 && sz.height % 2 == 0 ); dstSz = Size(sz.width, sz.height / 2 * 3); - globalsize[0] = dstSz.width / 2; globalsize[1] = (dstSz.height/3 + pxPerWIy - 1) / pxPerWIy; + _dst.create(dstSz, CV_MAKETYPE(depth, dcn)); + dst = _dst.getUMat(); + + if (dev.isIntel() && src.cols % 4 == 0 && src.step % 4 == 0 && src.offset % 4 == 0 && + dst.step % 4 == 0 && dst.offset % 4 == 0) + { + pxPerWIx = 2; + } + globalsize[0] = dstSz.width / (2 * pxPerWIx); globalsize[1] = (dstSz.height/3 + pxPerWIy - 1) / pxPerWIy; + k.create("RGB2YUV_YV12_IYUV", ocl::imgproc::cvtcolor_oclsrc, - opts + format("-D dcn=%d -D bidx=%d -D uidx=%d", dcn, bidx, uidx)); - break; + opts + format("-D dcn=%d -D bidx=%d -D uidx=%d -D PIX_PER_WI_X=%d", dcn, bidx, uidx, pxPerWIx)); + k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst)); + return k.run(2, globalsize, NULL, false); } case COLOR_YUV2RGB_UYVY: case COLOR_YUV2BGR_UYVY: case COLOR_YUV2RGBA_UYVY: case COLOR_YUV2BGRA_UYVY: case COLOR_YUV2RGB_YUY2: case COLOR_YUV2BGR_YUY2: case COLOR_YUV2RGB_YVYU: case COLOR_YUV2BGR_YVYU: diff --git a/modules/imgproc/src/opencl/cvtcolor.cl b/modules/imgproc/src/opencl/cvtcolor.cl index c3cfd0d592..e660a52204 100644 --- a/modules/imgproc/src/opencl/cvtcolor.cl +++ b/modules/imgproc/src/opencl/cvtcolor.cl @@ -119,6 +119,10 @@ enum #define yidx 0 #endif +#ifndef PIX_PER_WI_X +#define PIX_PER_WI_X 1 +#endif + #define __CAT(x, y) x##y #define CAT(x, y) __CAT(x, y) @@ -454,7 +458,7 @@ __kernel void RGB2YUV_YV12_IYUV(__global const uchar* srcptr, int src_step, int __global uchar* dstptr, int dst_step, int dst_offset, int rows, int cols) { - int x = get_global_id(0); + int x = get_global_id(0) * PIX_PER_WI_X; int y = get_global_id(1) * PIX_PER_WI_Y; if (x < cols/2) @@ -463,6 +467,7 @@ __kernel void RGB2YUV_YV12_IYUV(__global const uchar* srcptr, int src_step, int int ydst_index = mad24(y << 1, dst_step, (x << 1) + dst_offset); int y_rows = rows / 3 * 2; int vsteps[2] = { cols >> 1, dst_step - (cols >> 1)}; + __constant float* coeffs = c_RGB2YUVCoeffs_420; #pragma unroll for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) @@ -477,12 +482,61 @@ __kernel void RGB2YUV_YV12_IYUV(__global const uchar* srcptr, int src_step, int __global uchar* udst = dstptr + mad24(y_rows + (y>>1), dst_step, dst_offset + (y%2)*(cols >> 1) + x); __global uchar* vdst = udst + mad24(y_rows >> 2, dst_step, y_rows % 4 ? vsteps[y%2] : 0); +#if PIX_PER_WI_X == 2 + int s11 = *((__global const int*) src1); + int s12 = *((__global const int*) src1 + 1); + int s13 = *((__global const int*) src1 + 2); +#if scn == 4 + int s14 = *((__global const int*) src1 + 3); +#endif + int s21 = *((__global const int*) src2); + int s22 = *((__global const int*) src2 + 1); + int s23 = *((__global const int*) src2 + 2); +#if scn == 4 + int s24 = *((__global const int*) src2 + 3); +#endif + float src_pix1[scn * 4], src_pix2[scn * 4]; + + *((float4*) src_pix1) = convert_float4(as_uchar4(s11)); + *((float4*) src_pix1 + 1) = convert_float4(as_uchar4(s12)); + *((float4*) src_pix1 + 2) = convert_float4(as_uchar4(s13)); +#if scn == 4 + *((float4*) src_pix1 + 3) = convert_float4(as_uchar4(s14)); +#endif + *((float4*) src_pix2) = convert_float4(as_uchar4(s21)); + *((float4*) src_pix2 + 1) = convert_float4(as_uchar4(s22)); + *((float4*) src_pix2 + 2) = convert_float4(as_uchar4(s23)); +#if scn == 4 + *((float4*) src_pix2 + 3) = convert_float4(as_uchar4(s24)); +#endif + uchar4 y1, y2; + y1.x = convert_uchar_sat(fma(coeffs[0], src_pix1[ 2-bidx], fma(coeffs[1], src_pix1[ 1], fma(coeffs[2], src_pix1[ bidx], 16.5f)))); + y1.y = convert_uchar_sat(fma(coeffs[0], src_pix1[ scn+2-bidx], fma(coeffs[1], src_pix1[ scn+1], fma(coeffs[2], src_pix1[ scn+bidx], 16.5f)))); + y1.z = convert_uchar_sat(fma(coeffs[0], src_pix1[2*scn+2-bidx], fma(coeffs[1], src_pix1[2*scn+1], fma(coeffs[2], src_pix1[2*scn+bidx], 16.5f)))); + y1.w = convert_uchar_sat(fma(coeffs[0], src_pix1[3*scn+2-bidx], fma(coeffs[1], src_pix1[3*scn+1], fma(coeffs[2], src_pix1[3*scn+bidx], 16.5f)))); + y2.x = convert_uchar_sat(fma(coeffs[0], src_pix2[ 2-bidx], fma(coeffs[1], src_pix2[ 1], fma(coeffs[2], src_pix2[ bidx], 16.5f)))); + y2.y = convert_uchar_sat(fma(coeffs[0], src_pix2[ scn+2-bidx], fma(coeffs[1], src_pix2[ scn+1], fma(coeffs[2], src_pix2[ scn+bidx], 16.5f)))); + y2.z = convert_uchar_sat(fma(coeffs[0], src_pix2[2*scn+2-bidx], fma(coeffs[1], src_pix2[2*scn+1], fma(coeffs[2], src_pix2[2*scn+bidx], 16.5f)))); + y2.w = convert_uchar_sat(fma(coeffs[0], src_pix2[3*scn+2-bidx], fma(coeffs[1], src_pix2[3*scn+1], fma(coeffs[2], src_pix2[3*scn+bidx], 16.5f)))); + + *((__global int*) ydst1) = as_int(y1); + *((__global int*) ydst2) = as_int(y2); + + float uv[4] = { fma(coeffs[3], src_pix1[ 2-bidx], fma(coeffs[4], src_pix1[ 1], fma(coeffs[5], src_pix1[ bidx], 128.5f))), + fma(coeffs[5], src_pix1[ 2-bidx], fma(coeffs[6], src_pix1[ 1], fma(coeffs[7], src_pix1[ bidx], 128.5f))), + fma(coeffs[3], src_pix1[2*scn+2-bidx], fma(coeffs[4], src_pix1[2*scn+1], fma(coeffs[5], src_pix1[2*scn+bidx], 128.5f))), + fma(coeffs[5], src_pix1[2*scn+2-bidx], fma(coeffs[6], src_pix1[2*scn+1], fma(coeffs[7], src_pix1[2*scn+bidx], 128.5f))) }; + + udst[0] = convert_uchar_sat(uv[uidx] ); + vdst[0] = convert_uchar_sat(uv[1 - uidx]); + udst[1] = convert_uchar_sat(uv[2 + uidx]); + vdst[1] = convert_uchar_sat(uv[3 - uidx]); +#else float4 src_pix1 = convert_float4(vload4(0, src1)); float4 src_pix2 = convert_float4(vload4(0, src1+scn)); float4 src_pix3 = convert_float4(vload4(0, src2)); float4 src_pix4 = convert_float4(vload4(0, src2+scn)); - __constant float* coeffs = c_RGB2YUVCoeffs_420; ydst1[0] = convert_uchar_sat(fma(coeffs[0], src_pix1.R_COMP, fma(coeffs[1], src_pix1.G_COMP, fma(coeffs[2], src_pix1.B_COMP, 16.5f)))); ydst1[1] = convert_uchar_sat(fma(coeffs[0], src_pix2.R_COMP, fma(coeffs[1], src_pix2.G_COMP, fma(coeffs[2], src_pix2.B_COMP, 16.5f)))); ydst2[0] = convert_uchar_sat(fma(coeffs[0], src_pix3.R_COMP, fma(coeffs[1], src_pix3.G_COMP, fma(coeffs[2], src_pix3.B_COMP, 16.5f)))); @@ -493,7 +547,7 @@ __kernel void RGB2YUV_YV12_IYUV(__global const uchar* srcptr, int src_step, int udst[0] = convert_uchar_sat(uv[uidx] ); vdst[0] = convert_uchar_sat(uv[1-uidx]); - +#endif ++y; src_index += 2*src_step; ydst_index += 2*dst_step; @@ -522,7 +576,6 @@ __kernel void YUV2RGB_422(__global const uchar* srcptr, int src_step, int src_of float U = ((float) src[uidx]) - HALF_MAX; float V = ((float) src[(2 + uidx) % 4]) - HALF_MAX; - __constant float* coeffs = c_YUV2RGBCoeffs_420; float ruv = fma(coeffs[4], V, 0.5f); float guv = fma(coeffs[3], V, fma(coeffs[2], U, 0.5f)); @@ -535,7 +588,6 @@ __kernel void YUV2RGB_422(__global const uchar* srcptr, int src_step, int src_of #if dcn == 4 dst[3] = 255; #endif - float y01 = max(0.f, ((float) src[yidx + 2]) - 16.f) * coeffs[0]; dst[dcn + 2 - bidx] = convert_uchar_sat(y01 + ruv); dst[dcn + 1] = convert_uchar_sat(y01 + guv); From 643c906f3d79933480fcbebdbf16fc431bece1da Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Tue, 28 Oct 2014 15:07:51 +0300 Subject: [PATCH 8/8] Added optimized loading to YUV2RGB_422 kernel --- modules/imgproc/src/color.cpp | 3 ++- modules/imgproc/src/opencl/cvtcolor.cl | 17 ++++++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index dcbfb8f79f..f0a8fd8584 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -5060,7 +5060,8 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) CV_Assert( scn == 2 && depth == CV_8U ); k.create("YUV2RGB_422", ocl::imgproc::cvtcolor_oclsrc, - opts + format("-D dcn=%d -D bidx=%d -D uidx=%d -D yidx=%d", dcn, bidx, uidx, yidx)); + opts + format("-D dcn=%d -D bidx=%d -D uidx=%d -D yidx=%d%s", dcn, bidx, uidx, yidx, + src.offset % 4 == 0 && src.step % 4 == 0 ? " -D USE_OPTIMIZED_LOAD" : "")); break; } case COLOR_BGR2YCrCb: diff --git a/modules/imgproc/src/opencl/cvtcolor.cl b/modules/imgproc/src/opencl/cvtcolor.cl index e660a52204..a7cc776503 100644 --- a/modules/imgproc/src/opencl/cvtcolor.cl +++ b/modules/imgproc/src/opencl/cvtcolor.cl @@ -573,22 +573,33 @@ __kernel void YUV2RGB_422(__global const uchar* srcptr, int src_step, int src_of { if (y < rows ) { + __constant float* coeffs = c_YUV2RGBCoeffs_420; + +#ifndef USE_OPTIMIZED_LOAD float U = ((float) src[uidx]) - HALF_MAX; float V = ((float) src[(2 + uidx) % 4]) - HALF_MAX; + float y00 = max(0.f, ((float) src[yidx]) - 16.f) * coeffs[0]; + float y01 = max(0.f, ((float) src[yidx + 2]) - 16.f) * coeffs[0]; +#else + int load_src = *((__global int*) src); + float vec_src[4] = { load_src & 0xff, (load_src >> 8) & 0xff, (load_src >> 16) & 0xff, (load_src >> 24) & 0xff}; + float U = vec_src[uidx] - HALF_MAX; + float V = vec_src[(2 + uidx) % 4] - HALF_MAX; + float y00 = max(0.f, vec_src[yidx] - 16.f) * coeffs[0]; + float y01 = max(0.f, vec_src[yidx + 2] - 16.f) * coeffs[0]; +#endif - __constant float* coeffs = c_YUV2RGBCoeffs_420; float ruv = fma(coeffs[4], V, 0.5f); float guv = fma(coeffs[3], V, fma(coeffs[2], U, 0.5f)); float buv = fma(coeffs[1], U, 0.5f); - float y00 = max(0.f, ((float) src[yidx]) - 16.f) * coeffs[0]; dst[2 - bidx] = convert_uchar_sat(y00 + ruv); dst[1] = convert_uchar_sat(y00 + guv); dst[bidx] = convert_uchar_sat(y00 + buv); #if dcn == 4 dst[3] = 255; #endif - float y01 = max(0.f, ((float) src[yidx + 2]) - 16.f) * coeffs[0]; + dst[dcn + 2 - bidx] = convert_uchar_sat(y01 + ruv); dst[dcn + 1] = convert_uchar_sat(y01 + guv); dst[dcn + bidx] = convert_uchar_sat(y01 + buv);