change code according reviewer suggesions

pull/2023/head
krodyush 11 years ago
parent 27c1bd2762
commit e8dd31aacd
  1. 93
      modules/ocl/perf/perf_color.cpp
  2. 87
      modules/ocl/src/opencl/cvt_color.cl

@ -57,39 +57,9 @@ CV_ENUM(ConversionTypes, CV_RGB2GRAY, CV_RGB2BGR, CV_RGB2YUV, CV_YUV2RGB, CV_RGB
CV_HLS2RGB, CV_BGR5652BGR, CV_BGR2BGR565, CV_RGBA2mRGBA, CV_mRGBA2RGBA, CV_YUV2RGB_NV12)
typedef tuple<Size, tuple<ConversionTypes, int, int> > cvtColorParams;
typedef TestBaseWithParam<cvtColorParams> cvtColorU8Fixture;
typedef TestBaseWithParam<cvtColorParams> cvtColorF32Fixture;
typedef TestBaseWithParam<cvtColorParams> cvtColorU16Fixture;
typedef TestBaseWithParam<cvtColorParams> cvtColorFixture;
#define RUN_CVT_PERF_TEST \
cvtColorParams params = GetParam();\
const Size srcSize = get<0>(params);\
const tuple<int, int, int> conversionParams = get<1>(params);\
const int code = get<0>(conversionParams), scn = get<1>(conversionParams),\
dcn = get<2>(conversionParams);\
\
Mat src(srcSize, CV_8UC(scn)), dst(srcSize, CV_8UC(scn));\
declare.in(src, WARMUP_RNG).out(dst);\
\
if (RUN_OCL_IMPL)\
{\
ocl::oclMat oclSrc(src), oclDst(src.size(), dst.type());\
\
OCL_TEST_CYCLE() ocl::cvtColor(oclSrc, oclDst, code, dcn);\
oclDst.download(dst);\
\
SANITY_CHECK(dst, 1);\
}\
else if (RUN_PLAIN_IMPL)\
{\
TEST_CYCLE() cv::cvtColor(src, dst, code, dcn);\
\
SANITY_CHECK(dst);\
}\
else\
OCL_PERF_ELSE\
PERF_TEST_P(cvtColorU8Fixture, cvtColor, testing::Combine(
PERF_TEST_P(cvtColorFixture, cvtColor, testing::Combine(
testing::Values(Size(1000, 1002), Size(2000, 2004), Size(4000, 4008)),
testing::Values(
make_tuple(ConversionTypes(CV_RGB2GRAY), 3, 1),
@ -111,41 +81,30 @@ PERF_TEST_P(cvtColorU8Fixture, cvtColor, testing::Combine(
make_tuple(ConversionTypes(CV_YUV2RGB_NV12), 1, 3)
)))
{
RUN_CVT_PERF_TEST
}
cvtColorParams params = GetParam();
const Size srcSize = get<0>(params);
const tuple<int, int, int> conversionParams = get<1>(params);
const int code = get<0>(conversionParams), scn = get<1>(conversionParams),
dcn = get<2>(conversionParams);
PERF_TEST_P(cvtColorF32Fixture, cvtColor, testing::Combine(
testing::Values(Size(1000, 1002), Size(2000, 2004), Size(4000, 4008)),
testing::Values(
make_tuple(ConversionTypes(CV_RGB2GRAY), 3, 1),
make_tuple(ConversionTypes(CV_RGB2BGR), 3, 3),
make_tuple(ConversionTypes(CV_RGB2YUV), 3, 3),
make_tuple(ConversionTypes(CV_YUV2RGB), 3, 3),
make_tuple(ConversionTypes(CV_RGB2YCrCb), 3, 3),
make_tuple(ConversionTypes(CV_YCrCb2RGB), 3, 3),
make_tuple(ConversionTypes(CV_RGB2XYZ), 3, 3),
make_tuple(ConversionTypes(CV_XYZ2RGB), 3, 3),
make_tuple(ConversionTypes(CV_RGB2HSV), 3, 3),
make_tuple(ConversionTypes(CV_HSV2RGB), 3, 3),
make_tuple(ConversionTypes(CV_RGB2HLS), 3, 3),
make_tuple(ConversionTypes(CV_HLS2RGB), 3, 3)
)))
{
RUN_CVT_PERF_TEST
}
Mat src(srcSize, CV_8UC(scn)), dst(srcSize, CV_8UC(scn));
declare.in(src, WARMUP_RNG).out(dst);
PERF_TEST_P(cvtColorU16Fixture, cvtColor, testing::Combine(
testing::Values(Size(1000, 1002), Size(2000, 2004), Size(4000, 4008)),
testing::Values(
make_tuple(ConversionTypes(CV_RGB2GRAY), 3, 1),
make_tuple(ConversionTypes(CV_RGB2BGR), 3, 3),
make_tuple(ConversionTypes(CV_RGB2YUV), 3, 3),
make_tuple(ConversionTypes(CV_YUV2RGB), 3, 3),
make_tuple(ConversionTypes(CV_RGB2YCrCb), 3, 3),
make_tuple(ConversionTypes(CV_YCrCb2RGB), 3, 3),
make_tuple(ConversionTypes(CV_RGB2XYZ), 3, 3),
make_tuple(ConversionTypes(CV_XYZ2RGB), 3, 3)
)))
{
RUN_CVT_PERF_TEST
if (RUN_OCL_IMPL)
{
ocl::oclMat oclSrc(src), oclDst(src.size(), dst.type());
OCL_TEST_CYCLE() ocl::cvtColor(oclSrc, oclDst, code, dcn);
oclDst.download(dst);
SANITY_CHECK(dst, 1);
}
else if (RUN_PLAIN_IMPL)
{
TEST_CYCLE() cv::cvtColor(src, dst, code, dcn);
SANITY_CHECK(dst);
}
else
OCL_PERF_ELSE
}

@ -133,12 +133,14 @@ __kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step,
int dst_idx = mad24(y, dst_step, dst_offset + x);
#ifndef INTEL_DEVICE
#ifdef DEPTH_5
dst[dst_idx] = src[src_idx + bidx] * 0.114f + src[src_idx + 1] * 0.587f + src[src_idx + (bidx^2)] * 0.299f;
#else
dst[dst_idx] = (DATA_TYPE)CV_DESCALE((src[src_idx + bidx] * B2Y + src[src_idx + 1] * G2Y + src[src_idx + (bidx^2)] * R2Y), yuv_shift);
#endif
#else
#else //INTEL_DEVICE
global DATA_TYPE *src_ptr = (global DATA_TYPE *)(src + src_idx);
global DATA_TYPE *dst_ptr = (global DATA_TYPE *)(dst + dst_idx);
@ -148,7 +150,7 @@ __kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step,
__constant int * coeffs = c_RGB2GrayCoeffs_i;
#endif
if (1 == pixels_per_work_item)
#if (1 == pixels_per_work_item)
{
#ifdef DEPTH_5
*dst_ptr = src_ptr[bidx] * coeffs[0] + src_ptr[1] * coeffs[1] + src_ptr[(bidx^2)] *coeffs[2];
@ -156,7 +158,7 @@ __kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step,
*dst_ptr = (DATA_TYPE)CV_DESCALE((src_ptr[bidx] * coeffs[0] + src_ptr[1] * coeffs[1] + src_ptr[(bidx^2)] * coeffs[2]), yuv_shift);
#endif
}
else if (2 == pixels_per_work_item)
#elif (2 == pixels_per_work_item)
{
const VECTOR8 r0 = vload8(0, src_ptr);
@ -177,7 +179,7 @@ __kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step,
vstore2(Y, 0, dst_ptr);
}
else if (4 == pixels_per_work_item)
#elif (4 == pixels_per_work_item)
{
#ifndef DEPTH_5
const VECTOR16 r0 = vload16(0, src_ptr);
@ -190,6 +192,7 @@ __kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step,
vstore4(SAT_CAST4(Y), 0, dst_ptr);
#endif
}
#endif //pixels_per_work_item
#endif //INTEL_DEVICE
}
}
@ -244,7 +247,7 @@ __kernel void RGB2YUV(int cols, int rows, int src_step, int dst_step,
const int delta = HALF_MAX * (1 << yuv_shift);
#endif
if (1 == pixels_per_work_item)
#if (1 == pixels_per_work_item)
{
const DATA_TYPE rgb[] = {src_ptr[0], src_ptr[1], src_ptr[2]};
@ -262,8 +265,7 @@ __kernel void RGB2YUV(int cols, int rows, int src_step, int dst_step,
dst_ptr[1] = SAT_CAST( U );
dst_ptr[2] = SAT_CAST( V );
}
#ifdef INTEL_DEVICE
else if (2 == pixels_per_work_item)
#elif (2 == pixels_per_work_item)
{
const VECTOR8 r0 = vload8(0, src_ptr);
@ -291,7 +293,7 @@ __kernel void RGB2YUV(int cols, int rows, int src_step, int dst_step,
vstore8((VECTOR8)(Y.s0, U.s0, V.s0, 0, Y.s1, U.s1, V.s1, 0), 0, dst_ptr);
}
else if (4 == pixels_per_work_item)
#elif (4 == pixels_per_work_item)
{
#ifndef DEPTH_5
const VECTOR16 r0 = vload16(0, src_ptr);
@ -311,7 +313,7 @@ __kernel void RGB2YUV(int cols, int rows, int src_step, int dst_step,
vstore16((VECTOR16)(Y.s0, U.s0, V.s0, 0, Y.s1, U.s1, V.s1, 0, Y.s2, U.s2, V.s2, 0, Y.s3, U.s3, V.s3, 0), 0, dst_ptr);
#endif
}
#endif //INTEL_DEVICE
#endif //pixels_per_work_item
}
}
@ -340,7 +342,7 @@ __kernel void YUV2RGB(int cols, int rows, int src_step, int dst_step,
__constant int * coeffs = c_YUV2RGBCoeffs_i;
#endif
if (1 == pixels_per_work_item)
#if (1 == pixels_per_work_item)
{
const DATA_TYPE yuv[] = {src_ptr[0], src_ptr[1], src_ptr[2]};
@ -361,8 +363,7 @@ __kernel void YUV2RGB(int cols, int rows, int src_step, int dst_step,
dst_ptr[3] = MAX_NUM;
#endif
}
#ifdef INTEL_DEVICE
else if (2 == pixels_per_work_item)
#elif (2 == pixels_per_work_item)
{
const VECTOR8 r0 = vload8(0, src_ptr);
@ -394,7 +395,7 @@ __kernel void YUV2RGB(int cols, int rows, int src_step, int dst_step,
vstore8((VECTOR8)(c0.s0, c1.s0, c2.s0, 0, c0.s1, c1.s1, c2.s1, 0), 0, dst_ptr);
#endif
}
else if (4 == pixels_per_work_item)
#elif (4 == pixels_per_work_item)
{
#ifndef DEPTH_5
const VECTOR16 r0 = vload16(0, src_ptr);
@ -418,7 +419,7 @@ __kernel void YUV2RGB(int cols, int rows, int src_step, int dst_step,
#endif
#endif
}
#endif //INTEL_DEVICE
#endif //pixels_per_work_item
}
}
@ -509,7 +510,7 @@ __kernel void RGB2YCrCb(int cols, int rows, int src_step, int dst_step,
const int delta = HALF_MAX * (1 << yuv_shift);
#endif
if (1 == pixels_per_work_item)
#if (1 == pixels_per_work_item)
{
const DATA_TYPE rgb[] = {src_ptr[0], src_ptr[1], src_ptr[2]};
@ -527,8 +528,7 @@ __kernel void RGB2YCrCb(int cols, int rows, int src_step, int dst_step,
dst_ptr[1] = SAT_CAST( Cr );
dst_ptr[2] = SAT_CAST( Cb );
}
#ifdef INTEL_DEVICE
else if (2 == pixels_per_work_item)
#elif (2 == pixels_per_work_item)
{
const VECTOR8 r0 = vload8(0, src_ptr);
@ -556,7 +556,7 @@ __kernel void RGB2YCrCb(int cols, int rows, int src_step, int dst_step,
vstore8((VECTOR8)(Y.s0, Cr.s0, Cb.s0, 0, Y.s1, Cr.s1, Cb.s1, 0), 0, dst_ptr);
}
else if (4 == pixels_per_work_item)
#elif (4 == pixels_per_work_item)
{
#ifndef DEPTH_5
const VECTOR16 r0 = vload16(0, src_ptr);
@ -575,7 +575,7 @@ __kernel void RGB2YCrCb(int cols, int rows, int src_step, int dst_step,
vstore16((VECTOR16)(Y.s0, Cr.s0, Cb.s0, 0, Y.s1, Cr.s1, Cb.s1, 0, Y.s2, Cr.s2, Cb.s2, 0, Y.s3, Cr.s3, Cb.s3, 0), 0, dst_ptr);
#endif
}
#endif //INTEL_DEVICE
#endif //pixels_per_work_item
}
}
@ -604,7 +604,7 @@ __kernel void YCrCb2RGB(int cols, int rows, int src_step, int dst_step,
__constant int * coeffs = c_YCrCb2RGBCoeffs_i;
#endif
if (1 == pixels_per_work_item)
#if (1 == pixels_per_work_item)
{
const DATA_TYPE ycrcb[] = {src_ptr[0], src_ptr[1], src_ptr[2]};
@ -625,8 +625,7 @@ __kernel void YCrCb2RGB(int cols, int rows, int src_step, int dst_step,
dst_ptr[3] = MAX_NUM;
#endif
}
#ifdef INTEL_DEVICE
else if (2 == pixels_per_work_item)
#elif (2 == pixels_per_work_item)
{
const VECTOR8 r0 = vload8(0, src_ptr);
@ -658,7 +657,7 @@ __kernel void YCrCb2RGB(int cols, int rows, int src_step, int dst_step,
vstore8((VECTOR8)(c0.s0, c1.s0, c2.s0, 0, c0.s1, c1.s1, c2.s1, 0), 0, dst_ptr);
#endif
}
else if (4 == pixels_per_work_item)
#elif (4 == pixels_per_work_item)
{
#ifndef DEPTH_5
const VECTOR16 r0 = vload16(0, src_ptr);
@ -682,7 +681,7 @@ __kernel void YCrCb2RGB(int cols, int rows, int src_step, int dst_step,
#endif
#endif
}
#endif //INTEL_DEVICE
#endif //pixels_per_work_item
}
}
@ -704,7 +703,7 @@ __kernel void RGB2XYZ(int cols, int rows, int src_step, int dst_step,
global DATA_TYPE *src_ptr = (global DATA_TYPE *)(src + src_idx);
global DATA_TYPE *dst_ptr = (global DATA_TYPE *)(dst + dst_idx);
if (1 == pixels_per_work_item)
#if (1 == pixels_per_work_item)
{
DATA_TYPE R = src_ptr[0], G = src_ptr[1], B = src_ptr[2];
@ -722,8 +721,7 @@ __kernel void RGB2XYZ(int cols, int rows, int src_step, int dst_step,
dst_ptr[1] = SAT_CAST( Y );
dst_ptr[2] = SAT_CAST( Z );
}
#ifdef INTEL_DEVICE
else if (2 == pixels_per_work_item)
#elif (2 == pixels_per_work_item)
{
const VECTOR8 r0 = vload8(0, src_ptr);
@ -751,7 +749,7 @@ __kernel void RGB2XYZ(int cols, int rows, int src_step, int dst_step,
vstore8((VECTOR8)(X.s0, Y.s0, Z.s0, 0, X.s1, Y.s1, Z.s1, 0), 0, dst_ptr);
}
else if (4 == pixels_per_work_item)
#elif (4 == pixels_per_work_item)
{
#ifndef DEPTH_5
const VECTOR16 r0 = vload16(0, src_ptr);
@ -771,7 +769,7 @@ __kernel void RGB2XYZ(int cols, int rows, int src_step, int dst_step,
vstore16((VECTOR16)(X.s0, Y.s0, Z.s0, 0, X.s1, Y.s1, Z.s1, 0, X.s2, Y.s2, Z.s2, 0, X.s3, Y.s3, Z.s3, 0), 0, dst_ptr);
#endif
}
#endif //INTEL_DEVICE
#endif //pixels_per_work_item
}
}
@ -791,7 +789,7 @@ __kernel void XYZ2RGB(int cols, int rows, int src_step, int dst_step,
global DATA_TYPE *src_ptr = (global DATA_TYPE *)(src + src_idx);
global DATA_TYPE *dst_ptr = (global DATA_TYPE *)(dst + dst_idx);
if (1 == pixels_per_work_item)
#if (1 == pixels_per_work_item)
{
const DATA_TYPE X = src_ptr[0], Y = src_ptr[1], Z = src_ptr[2];
@ -812,8 +810,7 @@ __kernel void XYZ2RGB(int cols, int rows, int src_step, int dst_step,
dst_ptr[3] = MAX_NUM;
#endif
}
#ifdef INTEL_DEVICE
else if (2 == pixels_per_work_item)
#elif (2 == pixels_per_work_item)
{
const VECTOR8 r0 = vload8(0, src_ptr);
@ -845,7 +842,7 @@ __kernel void XYZ2RGB(int cols, int rows, int src_step, int dst_step,
vstore8((VECTOR8)(B.s0, G.s0, R.s0, 0, B.s1, G.s1, R.s1, 0), 0, dst_ptr);
#endif
}
else if (4 == pixels_per_work_item)
#elif (4 == pixels_per_work_item)
{
#ifndef DEPTH_5
const VECTOR16 r0 = vload16(0, src_ptr);
@ -869,7 +866,7 @@ __kernel void XYZ2RGB(int cols, int rows, int src_step, int dst_step,
#endif
#endif
}
#endif //INTEL_DEVICE
#endif // pixels_per_work_item
}
}
@ -906,7 +903,7 @@ __kernel void RGB(int cols, int rows, int src_step, int dst_step,
dst[dst_idx + 3] = src[src_idx + 3];
#endif
#endif
#else
#else //INTEL_DEVICE
global DATA_TYPE *src_ptr = (global DATA_TYPE *)(src + src_idx);
global DATA_TYPE *dst_ptr = (global DATA_TYPE *)(dst + dst_idx);
@ -936,7 +933,7 @@ __kernel void RGB(int cols, int rows, int src_step, int dst_step,
vstore4(r0, 0, dst_ptr);
}
#endif
#endif
#endif //INTEL_DEVICE
}
}
@ -1476,7 +1473,7 @@ __kernel void RGBA2mRGBA(int cols, int rows, int src_step, int dst_step,
global DATA_TYPE *src_ptr = (global DATA_TYPE *)(src + src_idx);
global DATA_TYPE *dst_ptr = (global DATA_TYPE *)(dst + dst_idx);
if (1 == pixels_per_work_item)
#if (1 == pixels_per_work_item)
{
const uchar4 r0 = vload4(0, src_ptr);
@ -1485,8 +1482,7 @@ __kernel void RGBA2mRGBA(int cols, int rows, int src_step, int dst_step,
dst_ptr[2] = (r0.s2 * r0.s3 + HALF_MAX) / MAX_NUM;
dst_ptr[3] = r0.s3;
}
#ifdef INTEL_DEVICE
else if (2 == pixels_per_work_item)
#elif (2 == pixels_per_work_item)
{
const uchar8 r0 = vload8(0, src_ptr);
@ -1505,7 +1501,7 @@ __kernel void RGBA2mRGBA(int cols, int rows, int src_step, int dst_step,
vstore8((uchar8)(r.s0, g.s0, b.s0, v3.s0, r.s1, g.s1, b.s1, v3.s1), 0, dst_ptr);
}
else if (4 == pixels_per_work_item)
#elif (4 == pixels_per_work_item)
{
const uchar16 r0 = vload16(0, src_ptr);
@ -1524,7 +1520,7 @@ __kernel void RGBA2mRGBA(int cols, int rows, int src_step, int dst_step,
vstore16((uchar16)(r.s0, g.s0, b.s0, v3.s0, r.s1, g.s1, b.s1, v3.s1, r.s2, g.s2, b.s2, v3.s2, r.s3, g.s3, b.s3, v3.s3), 0, dst_ptr);
}
#endif //INTEL_DEVICE
#endif // pixels_per_work_item
}
}
@ -1544,7 +1540,7 @@ __kernel void mRGBA2RGBA(int cols, int rows, int src_step, int dst_step,
global DATA_TYPE *src_ptr = (global DATA_TYPE *)(src + src_idx);
global DATA_TYPE *dst_ptr = (global DATA_TYPE *)(dst + dst_idx);
if (1 == pixels_per_work_item)
#if (1 == pixels_per_work_item)
{
const uchar4 r0 = vload4(0, src_ptr);
const uchar v3_half = r0.s3 / 2;
@ -1555,8 +1551,7 @@ __kernel void mRGBA2RGBA(int cols, int rows, int src_step, int dst_step,
vstore4((uchar4)(r, g, b, r0.s3), 0, dst_ptr);
}
#ifdef INTEL_DEVICE
else if (2 == pixels_per_work_item)
#elif (2 == pixels_per_work_item)
{
const uchar8 r0 = vload8(0, src_ptr);
@ -1576,7 +1571,7 @@ __kernel void mRGBA2RGBA(int cols, int rows, int src_step, int dst_step,
vstore8((uchar8)(r.s0, g.s0, b.s0, v3.s0, r.s1, g.s1, b.s1, v3.s1), 0, dst_ptr);
}
else if (4 == pixels_per_work_item)
#elif (4 == pixels_per_work_item)
{
const uchar16 r0 = vload16(0, src_ptr);
@ -1597,7 +1592,7 @@ __kernel void mRGBA2RGBA(int cols, int rows, int src_step, int dst_step,
vstore16((uchar16)(r.s0, g.s0, b.s0, v3.s0, r.s1, g.s1, b.s1, v3.s1, r.s2, g.s2, b.s2, v3.s2, r.s3, g.s3, b.s3, v3.s3), 0, dst_ptr);
}
#endif //INTEL_DEVICE
#endif // pixels_per_work_item
}
}

Loading…
Cancel
Save