From e8dd31aacd08c9d1754871068aa5f708246c7c96 Mon Sep 17 00:00:00 2001 From: krodyush Date: Fri, 20 Dec 2013 13:51:51 +0400 Subject: [PATCH] change code according reviewer suggesions --- modules/ocl/perf/perf_color.cpp | 93 ++++++++--------------------- modules/ocl/src/opencl/cvt_color.cl | 87 +++++++++++++-------------- 2 files changed, 67 insertions(+), 113 deletions(-) diff --git a/modules/ocl/perf/perf_color.cpp b/modules/ocl/perf/perf_color.cpp index 75e6820fcb..8433315189 100644 --- a/modules/ocl/perf/perf_color.cpp +++ b/modules/ocl/perf/perf_color.cpp @@ -57,39 +57,9 @@ CV_ENUM(ConversionTypes, CV_RGB2GRAY, CV_RGB2BGR, CV_RGB2YUV, CV_YUV2RGB, CV_RGB CV_HLS2RGB, CV_BGR5652BGR, CV_BGR2BGR565, CV_RGBA2mRGBA, CV_mRGBA2RGBA, CV_YUV2RGB_NV12) typedef tuple > cvtColorParams; -typedef TestBaseWithParam cvtColorU8Fixture; -typedef TestBaseWithParam cvtColorF32Fixture; -typedef TestBaseWithParam cvtColorU16Fixture; +typedef TestBaseWithParam cvtColorFixture; -#define RUN_CVT_PERF_TEST \ - cvtColorParams params = GetParam();\ - const Size srcSize = get<0>(params);\ - const tuple conversionParams = get<1>(params);\ - const int code = get<0>(conversionParams), scn = get<1>(conversionParams),\ - dcn = get<2>(conversionParams);\ -\ - Mat src(srcSize, CV_8UC(scn)), dst(srcSize, CV_8UC(scn));\ - declare.in(src, WARMUP_RNG).out(dst);\ -\ - if (RUN_OCL_IMPL)\ - {\ - ocl::oclMat oclSrc(src), oclDst(src.size(), dst.type());\ -\ - OCL_TEST_CYCLE() ocl::cvtColor(oclSrc, oclDst, code, dcn);\ - oclDst.download(dst);\ -\ - SANITY_CHECK(dst, 1);\ - }\ - else if (RUN_PLAIN_IMPL)\ - {\ - TEST_CYCLE() cv::cvtColor(src, dst, code, dcn);\ -\ - SANITY_CHECK(dst);\ - }\ - else\ - OCL_PERF_ELSE\ - -PERF_TEST_P(cvtColorU8Fixture, cvtColor, testing::Combine( +PERF_TEST_P(cvtColorFixture, cvtColor, testing::Combine( testing::Values(Size(1000, 1002), Size(2000, 2004), Size(4000, 4008)), testing::Values( make_tuple(ConversionTypes(CV_RGB2GRAY), 3, 1), @@ -111,41 +81,30 @@ PERF_TEST_P(cvtColorU8Fixture, cvtColor, testing::Combine( make_tuple(ConversionTypes(CV_YUV2RGB_NV12), 1, 3) ))) { - RUN_CVT_PERF_TEST -} + cvtColorParams params = GetParam(); + const Size srcSize = get<0>(params); + const tuple conversionParams = get<1>(params); + const int code = get<0>(conversionParams), scn = get<1>(conversionParams), + dcn = get<2>(conversionParams); -PERF_TEST_P(cvtColorF32Fixture, cvtColor, testing::Combine( - testing::Values(Size(1000, 1002), Size(2000, 2004), Size(4000, 4008)), - testing::Values( - make_tuple(ConversionTypes(CV_RGB2GRAY), 3, 1), - make_tuple(ConversionTypes(CV_RGB2BGR), 3, 3), - make_tuple(ConversionTypes(CV_RGB2YUV), 3, 3), - make_tuple(ConversionTypes(CV_YUV2RGB), 3, 3), - make_tuple(ConversionTypes(CV_RGB2YCrCb), 3, 3), - make_tuple(ConversionTypes(CV_YCrCb2RGB), 3, 3), - make_tuple(ConversionTypes(CV_RGB2XYZ), 3, 3), - make_tuple(ConversionTypes(CV_XYZ2RGB), 3, 3), - make_tuple(ConversionTypes(CV_RGB2HSV), 3, 3), - make_tuple(ConversionTypes(CV_HSV2RGB), 3, 3), - make_tuple(ConversionTypes(CV_RGB2HLS), 3, 3), - make_tuple(ConversionTypes(CV_HLS2RGB), 3, 3) - ))) -{ - RUN_CVT_PERF_TEST -} + Mat src(srcSize, CV_8UC(scn)), dst(srcSize, CV_8UC(scn)); + declare.in(src, WARMUP_RNG).out(dst); -PERF_TEST_P(cvtColorU16Fixture, cvtColor, testing::Combine( - testing::Values(Size(1000, 1002), Size(2000, 2004), Size(4000, 4008)), - testing::Values( - make_tuple(ConversionTypes(CV_RGB2GRAY), 3, 1), - make_tuple(ConversionTypes(CV_RGB2BGR), 3, 3), - make_tuple(ConversionTypes(CV_RGB2YUV), 3, 3), - make_tuple(ConversionTypes(CV_YUV2RGB), 3, 3), - make_tuple(ConversionTypes(CV_RGB2YCrCb), 3, 3), - make_tuple(ConversionTypes(CV_YCrCb2RGB), 3, 3), - make_tuple(ConversionTypes(CV_RGB2XYZ), 3, 3), - make_tuple(ConversionTypes(CV_XYZ2RGB), 3, 3) - ))) -{ - RUN_CVT_PERF_TEST + if (RUN_OCL_IMPL) + { + ocl::oclMat oclSrc(src), oclDst(src.size(), dst.type()); + + OCL_TEST_CYCLE() ocl::cvtColor(oclSrc, oclDst, code, dcn); + oclDst.download(dst); + + SANITY_CHECK(dst, 1); + } + else if (RUN_PLAIN_IMPL) + { + TEST_CYCLE() cv::cvtColor(src, dst, code, dcn); + + SANITY_CHECK(dst); + } + else + OCL_PERF_ELSE } diff --git a/modules/ocl/src/opencl/cvt_color.cl b/modules/ocl/src/opencl/cvt_color.cl index 2313af1527..5c236f0e05 100644 --- a/modules/ocl/src/opencl/cvt_color.cl +++ b/modules/ocl/src/opencl/cvt_color.cl @@ -133,12 +133,14 @@ __kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step, int dst_idx = mad24(y, dst_step, dst_offset + x); #ifndef INTEL_DEVICE + #ifdef DEPTH_5 dst[dst_idx] = src[src_idx + bidx] * 0.114f + src[src_idx + 1] * 0.587f + src[src_idx + (bidx^2)] * 0.299f; #else dst[dst_idx] = (DATA_TYPE)CV_DESCALE((src[src_idx + bidx] * B2Y + src[src_idx + 1] * G2Y + src[src_idx + (bidx^2)] * R2Y), yuv_shift); #endif -#else + +#else //INTEL_DEVICE global DATA_TYPE *src_ptr = (global DATA_TYPE *)(src + src_idx); global DATA_TYPE *dst_ptr = (global DATA_TYPE *)(dst + dst_idx); @@ -148,7 +150,7 @@ __kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step, __constant int * coeffs = c_RGB2GrayCoeffs_i; #endif - if (1 == pixels_per_work_item) +#if (1 == pixels_per_work_item) { #ifdef DEPTH_5 *dst_ptr = src_ptr[bidx] * coeffs[0] + src_ptr[1] * coeffs[1] + src_ptr[(bidx^2)] *coeffs[2]; @@ -156,7 +158,7 @@ __kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step, *dst_ptr = (DATA_TYPE)CV_DESCALE((src_ptr[bidx] * coeffs[0] + src_ptr[1] * coeffs[1] + src_ptr[(bidx^2)] * coeffs[2]), yuv_shift); #endif } - else if (2 == pixels_per_work_item) +#elif (2 == pixels_per_work_item) { const VECTOR8 r0 = vload8(0, src_ptr); @@ -177,7 +179,7 @@ __kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step, vstore2(Y, 0, dst_ptr); } - else if (4 == pixels_per_work_item) +#elif (4 == pixels_per_work_item) { #ifndef DEPTH_5 const VECTOR16 r0 = vload16(0, src_ptr); @@ -190,6 +192,7 @@ __kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step, vstore4(SAT_CAST4(Y), 0, dst_ptr); #endif } +#endif //pixels_per_work_item #endif //INTEL_DEVICE } } @@ -244,7 +247,7 @@ __kernel void RGB2YUV(int cols, int rows, int src_step, int dst_step, const int delta = HALF_MAX * (1 << yuv_shift); #endif - if (1 == pixels_per_work_item) +#if (1 == pixels_per_work_item) { const DATA_TYPE rgb[] = {src_ptr[0], src_ptr[1], src_ptr[2]}; @@ -262,8 +265,7 @@ __kernel void RGB2YUV(int cols, int rows, int src_step, int dst_step, dst_ptr[1] = SAT_CAST( U ); dst_ptr[2] = SAT_CAST( V ); } -#ifdef INTEL_DEVICE - else if (2 == pixels_per_work_item) +#elif (2 == pixels_per_work_item) { const VECTOR8 r0 = vload8(0, src_ptr); @@ -291,7 +293,7 @@ __kernel void RGB2YUV(int cols, int rows, int src_step, int dst_step, vstore8((VECTOR8)(Y.s0, U.s0, V.s0, 0, Y.s1, U.s1, V.s1, 0), 0, dst_ptr); } - else if (4 == pixels_per_work_item) +#elif (4 == pixels_per_work_item) { #ifndef DEPTH_5 const VECTOR16 r0 = vload16(0, src_ptr); @@ -311,7 +313,7 @@ __kernel void RGB2YUV(int cols, int rows, int src_step, int dst_step, vstore16((VECTOR16)(Y.s0, U.s0, V.s0, 0, Y.s1, U.s1, V.s1, 0, Y.s2, U.s2, V.s2, 0, Y.s3, U.s3, V.s3, 0), 0, dst_ptr); #endif } -#endif //INTEL_DEVICE +#endif //pixels_per_work_item } } @@ -340,7 +342,7 @@ __kernel void YUV2RGB(int cols, int rows, int src_step, int dst_step, __constant int * coeffs = c_YUV2RGBCoeffs_i; #endif - if (1 == pixels_per_work_item) +#if (1 == pixels_per_work_item) { const DATA_TYPE yuv[] = {src_ptr[0], src_ptr[1], src_ptr[2]}; @@ -361,8 +363,7 @@ __kernel void YUV2RGB(int cols, int rows, int src_step, int dst_step, dst_ptr[3] = MAX_NUM; #endif } -#ifdef INTEL_DEVICE - else if (2 == pixels_per_work_item) +#elif (2 == pixels_per_work_item) { const VECTOR8 r0 = vload8(0, src_ptr); @@ -394,7 +395,7 @@ __kernel void YUV2RGB(int cols, int rows, int src_step, int dst_step, vstore8((VECTOR8)(c0.s0, c1.s0, c2.s0, 0, c0.s1, c1.s1, c2.s1, 0), 0, dst_ptr); #endif } - else if (4 == pixels_per_work_item) +#elif (4 == pixels_per_work_item) { #ifndef DEPTH_5 const VECTOR16 r0 = vload16(0, src_ptr); @@ -418,7 +419,7 @@ __kernel void YUV2RGB(int cols, int rows, int src_step, int dst_step, #endif #endif } -#endif //INTEL_DEVICE +#endif //pixels_per_work_item } } @@ -509,7 +510,7 @@ __kernel void RGB2YCrCb(int cols, int rows, int src_step, int dst_step, const int delta = HALF_MAX * (1 << yuv_shift); #endif - if (1 == pixels_per_work_item) +#if (1 == pixels_per_work_item) { const DATA_TYPE rgb[] = {src_ptr[0], src_ptr[1], src_ptr[2]}; @@ -527,8 +528,7 @@ __kernel void RGB2YCrCb(int cols, int rows, int src_step, int dst_step, dst_ptr[1] = SAT_CAST( Cr ); dst_ptr[2] = SAT_CAST( Cb ); } -#ifdef INTEL_DEVICE - else if (2 == pixels_per_work_item) +#elif (2 == pixels_per_work_item) { const VECTOR8 r0 = vload8(0, src_ptr); @@ -556,7 +556,7 @@ __kernel void RGB2YCrCb(int cols, int rows, int src_step, int dst_step, vstore8((VECTOR8)(Y.s0, Cr.s0, Cb.s0, 0, Y.s1, Cr.s1, Cb.s1, 0), 0, dst_ptr); } - else if (4 == pixels_per_work_item) +#elif (4 == pixels_per_work_item) { #ifndef DEPTH_5 const VECTOR16 r0 = vload16(0, src_ptr); @@ -575,7 +575,7 @@ __kernel void RGB2YCrCb(int cols, int rows, int src_step, int dst_step, vstore16((VECTOR16)(Y.s0, Cr.s0, Cb.s0, 0, Y.s1, Cr.s1, Cb.s1, 0, Y.s2, Cr.s2, Cb.s2, 0, Y.s3, Cr.s3, Cb.s3, 0), 0, dst_ptr); #endif } -#endif //INTEL_DEVICE +#endif //pixels_per_work_item } } @@ -604,7 +604,7 @@ __kernel void YCrCb2RGB(int cols, int rows, int src_step, int dst_step, __constant int * coeffs = c_YCrCb2RGBCoeffs_i; #endif - if (1 == pixels_per_work_item) +#if (1 == pixels_per_work_item) { const DATA_TYPE ycrcb[] = {src_ptr[0], src_ptr[1], src_ptr[2]}; @@ -625,8 +625,7 @@ __kernel void YCrCb2RGB(int cols, int rows, int src_step, int dst_step, dst_ptr[3] = MAX_NUM; #endif } -#ifdef INTEL_DEVICE - else if (2 == pixels_per_work_item) +#elif (2 == pixels_per_work_item) { const VECTOR8 r0 = vload8(0, src_ptr); @@ -658,7 +657,7 @@ __kernel void YCrCb2RGB(int cols, int rows, int src_step, int dst_step, vstore8((VECTOR8)(c0.s0, c1.s0, c2.s0, 0, c0.s1, c1.s1, c2.s1, 0), 0, dst_ptr); #endif } - else if (4 == pixels_per_work_item) +#elif (4 == pixels_per_work_item) { #ifndef DEPTH_5 const VECTOR16 r0 = vload16(0, src_ptr); @@ -682,7 +681,7 @@ __kernel void YCrCb2RGB(int cols, int rows, int src_step, int dst_step, #endif #endif } -#endif //INTEL_DEVICE +#endif //pixels_per_work_item } } @@ -704,7 +703,7 @@ __kernel void RGB2XYZ(int cols, int rows, int src_step, int dst_step, global DATA_TYPE *src_ptr = (global DATA_TYPE *)(src + src_idx); global DATA_TYPE *dst_ptr = (global DATA_TYPE *)(dst + dst_idx); - if (1 == pixels_per_work_item) +#if (1 == pixels_per_work_item) { DATA_TYPE R = src_ptr[0], G = src_ptr[1], B = src_ptr[2]; @@ -722,8 +721,7 @@ __kernel void RGB2XYZ(int cols, int rows, int src_step, int dst_step, dst_ptr[1] = SAT_CAST( Y ); dst_ptr[2] = SAT_CAST( Z ); } -#ifdef INTEL_DEVICE - else if (2 == pixels_per_work_item) +#elif (2 == pixels_per_work_item) { const VECTOR8 r0 = vload8(0, src_ptr); @@ -751,7 +749,7 @@ __kernel void RGB2XYZ(int cols, int rows, int src_step, int dst_step, vstore8((VECTOR8)(X.s0, Y.s0, Z.s0, 0, X.s1, Y.s1, Z.s1, 0), 0, dst_ptr); } - else if (4 == pixels_per_work_item) +#elif (4 == pixels_per_work_item) { #ifndef DEPTH_5 const VECTOR16 r0 = vload16(0, src_ptr); @@ -771,7 +769,7 @@ __kernel void RGB2XYZ(int cols, int rows, int src_step, int dst_step, vstore16((VECTOR16)(X.s0, Y.s0, Z.s0, 0, X.s1, Y.s1, Z.s1, 0, X.s2, Y.s2, Z.s2, 0, X.s3, Y.s3, Z.s3, 0), 0, dst_ptr); #endif } -#endif //INTEL_DEVICE +#endif //pixels_per_work_item } } @@ -791,7 +789,7 @@ __kernel void XYZ2RGB(int cols, int rows, int src_step, int dst_step, global DATA_TYPE *src_ptr = (global DATA_TYPE *)(src + src_idx); global DATA_TYPE *dst_ptr = (global DATA_TYPE *)(dst + dst_idx); - if (1 == pixels_per_work_item) +#if (1 == pixels_per_work_item) { const DATA_TYPE X = src_ptr[0], Y = src_ptr[1], Z = src_ptr[2]; @@ -812,8 +810,7 @@ __kernel void XYZ2RGB(int cols, int rows, int src_step, int dst_step, dst_ptr[3] = MAX_NUM; #endif } -#ifdef INTEL_DEVICE - else if (2 == pixels_per_work_item) +#elif (2 == pixels_per_work_item) { const VECTOR8 r0 = vload8(0, src_ptr); @@ -845,7 +842,7 @@ __kernel void XYZ2RGB(int cols, int rows, int src_step, int dst_step, vstore8((VECTOR8)(B.s0, G.s0, R.s0, 0, B.s1, G.s1, R.s1, 0), 0, dst_ptr); #endif } - else if (4 == pixels_per_work_item) +#elif (4 == pixels_per_work_item) { #ifndef DEPTH_5 const VECTOR16 r0 = vload16(0, src_ptr); @@ -869,7 +866,7 @@ __kernel void XYZ2RGB(int cols, int rows, int src_step, int dst_step, #endif #endif } -#endif //INTEL_DEVICE +#endif // pixels_per_work_item } } @@ -906,7 +903,7 @@ __kernel void RGB(int cols, int rows, int src_step, int dst_step, dst[dst_idx + 3] = src[src_idx + 3]; #endif #endif -#else +#else //INTEL_DEVICE global DATA_TYPE *src_ptr = (global DATA_TYPE *)(src + src_idx); global DATA_TYPE *dst_ptr = (global DATA_TYPE *)(dst + dst_idx); @@ -936,7 +933,7 @@ __kernel void RGB(int cols, int rows, int src_step, int dst_step, vstore4(r0, 0, dst_ptr); } #endif -#endif +#endif //INTEL_DEVICE } } @@ -1476,7 +1473,7 @@ __kernel void RGBA2mRGBA(int cols, int rows, int src_step, int dst_step, global DATA_TYPE *src_ptr = (global DATA_TYPE *)(src + src_idx); global DATA_TYPE *dst_ptr = (global DATA_TYPE *)(dst + dst_idx); - if (1 == pixels_per_work_item) +#if (1 == pixels_per_work_item) { const uchar4 r0 = vload4(0, src_ptr); @@ -1485,8 +1482,7 @@ __kernel void RGBA2mRGBA(int cols, int rows, int src_step, int dst_step, dst_ptr[2] = (r0.s2 * r0.s3 + HALF_MAX) / MAX_NUM; dst_ptr[3] = r0.s3; } -#ifdef INTEL_DEVICE - else if (2 == pixels_per_work_item) +#elif (2 == pixels_per_work_item) { const uchar8 r0 = vload8(0, src_ptr); @@ -1505,7 +1501,7 @@ __kernel void RGBA2mRGBA(int cols, int rows, int src_step, int dst_step, vstore8((uchar8)(r.s0, g.s0, b.s0, v3.s0, r.s1, g.s1, b.s1, v3.s1), 0, dst_ptr); } - else if (4 == pixels_per_work_item) +#elif (4 == pixels_per_work_item) { const uchar16 r0 = vload16(0, src_ptr); @@ -1524,7 +1520,7 @@ __kernel void RGBA2mRGBA(int cols, int rows, int src_step, int dst_step, vstore16((uchar16)(r.s0, g.s0, b.s0, v3.s0, r.s1, g.s1, b.s1, v3.s1, r.s2, g.s2, b.s2, v3.s2, r.s3, g.s3, b.s3, v3.s3), 0, dst_ptr); } -#endif //INTEL_DEVICE +#endif // pixels_per_work_item } } @@ -1544,7 +1540,7 @@ __kernel void mRGBA2RGBA(int cols, int rows, int src_step, int dst_step, global DATA_TYPE *src_ptr = (global DATA_TYPE *)(src + src_idx); global DATA_TYPE *dst_ptr = (global DATA_TYPE *)(dst + dst_idx); - if (1 == pixels_per_work_item) +#if (1 == pixels_per_work_item) { const uchar4 r0 = vload4(0, src_ptr); const uchar v3_half = r0.s3 / 2; @@ -1555,8 +1551,7 @@ __kernel void mRGBA2RGBA(int cols, int rows, int src_step, int dst_step, vstore4((uchar4)(r, g, b, r0.s3), 0, dst_ptr); } -#ifdef INTEL_DEVICE - else if (2 == pixels_per_work_item) +#elif (2 == pixels_per_work_item) { const uchar8 r0 = vload8(0, src_ptr); @@ -1576,7 +1571,7 @@ __kernel void mRGBA2RGBA(int cols, int rows, int src_step, int dst_step, vstore8((uchar8)(r.s0, g.s0, b.s0, v3.s0, r.s1, g.s1, b.s1, v3.s1), 0, dst_ptr); } - else if (4 == pixels_per_work_item) +#elif (4 == pixels_per_work_item) { const uchar16 r0 = vload16(0, src_ptr); @@ -1597,7 +1592,7 @@ __kernel void mRGBA2RGBA(int cols, int rows, int src_step, int dst_step, vstore16((uchar16)(r.s0, g.s0, b.s0, v3.s0, r.s1, g.s1, b.s1, v3.s1, r.s2, g.s2, b.s2, v3.s2, r.s3, g.s3, b.s3, v3.s3), 0, dst_ptr); } -#endif //INTEL_DEVICE +#endif // pixels_per_work_item } }