From 634e8d37cc208f5f0f54891d7dc7d4f991769e7f Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 19 Feb 2014 18:13:09 +0400 Subject: [PATCH] fixed for 2 channels --- modules/photo/perf/opencl/perf_denoising.cpp | 4 +- ...fast_nlmeans_denoising_invoker_commons.hpp | 35 +++++++++- modules/photo/src/opencl/nlmeans.cl | 70 ++++++++++++------- modules/photo/test/ocl/test_denoising.cpp | 4 +- 4 files changed, 84 insertions(+), 29 deletions(-) diff --git a/modules/photo/perf/opencl/perf_denoising.cpp b/modules/photo/perf/opencl/perf_denoising.cpp index 66903457d2..0a0a2d6b53 100644 --- a/modules/photo/perf/opencl/perf_denoising.cpp +++ b/modules/photo/perf/opencl/perf_denoising.cpp @@ -45,7 +45,7 @@ OCL_PERF_TEST(Photo, DenoisingColored) SANITY_CHECK(result); } -OCL_PERF_TEST(Photo, DenoisingGrayscaleMulti) +OCL_PERF_TEST(Photo, DESABLED_DenoisingGrayscaleMulti) { const int imgs_count = 3; @@ -68,7 +68,7 @@ OCL_PERF_TEST(Photo, DenoisingGrayscaleMulti) SANITY_CHECK(result); } -OCL_PERF_TEST(Photo, DenoisingColoredMulti) +OCL_PERF_TEST(Photo, DESABLED_DenoisingColoredMulti) { const int imgs_count = 3; diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp index 7a2650077b..ab7db5d2d7 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp @@ -84,7 +84,6 @@ template static inline void incWithWeight(int* estimation, int weig template <> inline void incWithWeight(int* estimation, int weight, uchar p) { - estimation[0] += weight * p; } @@ -101,6 +100,24 @@ template <> inline void incWithWeight(int* estimation, int weight, Vec3b p) estimation[2] += weight * p[2]; } +template <> inline void incWithWeight(int* estimation, int weight, int p) +{ + estimation[0] += weight * p; +} + +template <> inline void incWithWeight(int* estimation, int weight, Vec2i p) +{ + estimation[0] += weight * p[0]; + estimation[1] += weight * p[1]; +} + +template <> inline void incWithWeight(int* estimation, int weight, Vec3i p) +{ + estimation[0] += weight * p[0]; + estimation[1] += weight * p[1]; + estimation[2] += weight * p[2]; +} + template static inline T saturateCastFromArray(int* estimation); template <> inline uchar saturateCastFromArray(int* estimation) @@ -125,4 +142,20 @@ template <> inline Vec3b saturateCastFromArray(int* estimation) return res; } +template <> inline int saturateCastFromArray(int* estimation) +{ + return estimation[0]; +} + +template <> inline Vec2i saturateCastFromArray(int* estimation) +{ + estimation[1] = 0; + return Vec2i(estimation); +} + +template <> inline Vec3i saturateCastFromArray(int* estimation) +{ + return Vec3i(estimation); +} + #endif diff --git a/modules/photo/src/opencl/nlmeans.cl b/modules/photo/src/opencl/nlmeans.cl index 583d42e96c..7b13ea5e11 100644 --- a/modules/photo/src/opencl/nlmeans.cl +++ b/modules/photo/src/opencl/nlmeans.cl @@ -40,41 +40,61 @@ __kernel void calcAlmostDist2Weight(__global int * almostDist2Weight, int almost #elif defined OP_CALC_FASTNLMEANS +#define noconvert + #define SEARCH_SIZE_SQ (SEARCH_SIZE * SEARCH_SIZE) -inline int_t calcDist(uchar_t a, uchar_t b) +inline int calcDist(uchar_t a, uchar_t b) { int_t diff = convert_int_t(a) - convert_int_t(b); - return diff * diff; + int_t retval = diff * diff; + +#if cn == 1 + return retval; +#elif cn == 2 + return retval.x + retval.y; +#else +#error "cn should be either 1 or 2" +#endif } -inline int_t calcDistUpDown(uchar_t down_value, uchar_t down_value_t, uchar_t up_value, uchar_t up_value_t) +inline int calcDistUpDown(uchar_t down_value, uchar_t down_value_t, uchar_t up_value, uchar_t up_value_t) { int_t A = convert_int_t(down_value) - convert_int_t(down_value_t); int_t B = convert_int_t(up_value) - convert_int_t(up_value_t); - return (A - B) * (A + B); + int_t retval = (A - B) * (A + B); + +#if cn == 1 + return retval; +#elif cn == 2 + return retval.x + retval.y; +#else +#error "cn should be either 1 or 2" +#endif } +#define COND if (x == 0 && y == 0) + inline void calcFirstElementInRow(__global const uchar * src, int src_step, int src_offset, - __local int_t * dists, int y, int x, int id, - __global int_t * col_dists, __global int_t * up_col_dists) + __local int * dists, int y, int x, int id, + __global int * col_dists, __global int * up_col_dists) { y -= TEMPLATE_SIZE2; int sx = x - SEARCH_SIZE2, sy = y - SEARCH_SIZE2; - int_t col_dists_current_private[TEMPLATE_SIZE]; + int col_dists_current_private[TEMPLATE_SIZE]; for (int i = id, size = SEARCH_SIZE_SQ; i < size; i += CTA_SIZE) { - int_t dist = (int_t)(0), value; + int dist = 0, value; __global const uchar_t * src_template = (__global const uchar_t *)(src + mad24(sy + i / SEARCH_SIZE, src_step, mad24(cn, sx + i % SEARCH_SIZE, src_offset))); __global const uchar_t * src_current = (__global const uchar_t *)(src + mad24(y, src_step, mad24(cn, x, src_offset))); - __global int_t * col_dists_current = col_dists + i * TEMPLATE_SIZE; + __global int * col_dists_current = col_dists + i * TEMPLATE_SIZE; #pragma unroll for (int j = 0; j < TEMPLATE_SIZE; ++j) - col_dists_current_private[j] = (int_t)(0); + col_dists_current_private[j] = 0; for (int ty = 0; ty < TEMPLATE_SIZE; ++ty) { @@ -95,14 +115,16 @@ inline void calcFirstElementInRow(__global const uchar * src, int src_step, int for (int j = 0; j < TEMPLATE_SIZE; ++j) col_dists_current[j] = col_dists_current_private[j]; +// COND printf("%d %d\n", i, convert_int(dist)); + dists[i] = dist; up_col_dists[0 + i] = col_dists[TEMPLATE_SIZE - 1]; } } inline void calcElementInFirstRow(__global const uchar * src, int src_step, int src_offset, - __local int_t * dists, int y, int x0, int x, int id, int first, - __global int_t * col_dists, __global int_t * up_col_dists) + __local int * dists, int y, int x0, int x, int id, int first, + __global int * col_dists, __global int * up_col_dists) { x += TEMPLATE_SIZE2; y -= TEMPLATE_SIZE2; @@ -113,9 +135,9 @@ inline void calcElementInFirstRow(__global const uchar * src, int src_step, int __global const uchar_t * src_current = (__global const uchar_t *)(src + mad24(y, src_step, mad24(cn, x, src_offset))); __global const uchar_t * src_template = (__global const uchar_t *)(src + mad24(sy + i / SEARCH_SIZE, src_step, mad24(cn, sx + i % SEARCH_SIZE, src_offset))); - __global int_t * col_dists_current = col_dists + TEMPLATE_SIZE * i; + __global int * col_dists_current = col_dists + TEMPLATE_SIZE * i; - int_t col_dist = (int_t)(0); + int col_dist = 0; #pragma unroll for (int ty = 0; ty < TEMPLATE_SIZE; ++ty) @@ -133,8 +155,8 @@ inline void calcElementInFirstRow(__global const uchar * src, int src_step, int } inline void calcElement(__global const uchar * src, int src_step, int src_offset, - __local int_t * dists, int y, int x0, int x, int id, int first, - __global int_t * col_dists, __global int_t * up_col_dists) + __local int * dists, int y, int x0, int x, int id, int first, + __global int * col_dists, __global int * up_col_dists) { int sx = x + TEMPLATE_SIZE2; int sy_up = y - TEMPLATE_SIZE2 - 1; @@ -154,10 +176,10 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset uchar_t up_value_t = *(__global const uchar_t *)(src + mad24(sy_up + wy, src_step, mad24(cn, sx + wx, src_offset))); uchar_t down_value_t = *(__global const uchar_t *)(src + mad24(sy_down + wy, src_step, mad24(cn, sx + wx, src_offset))); - __global int_t * col_dists_current = col_dists + mad24(i, TEMPLATE_SIZE, first); - __global int_t * up_col_dists_current = up_col_dists + mad24(x0, SEARCH_SIZE_SQ, i); + __global int * col_dists_current = col_dists + mad24(i, TEMPLATE_SIZE, first); + __global int * up_col_dists_current = up_col_dists + mad24(x0, SEARCH_SIZE_SQ, i); - int_t col_dist = up_col_dists_current[0] + calcDistUpDown(down_value, down_value_t, up_value, up_value_t); + int col_dist = up_col_dists_current[0] + calcDistUpDown(down_value, down_value_t, up_value, up_value_t); dists[i] += col_dist - col_dists_current[0]; col_dists_current[0] = col_dist; @@ -219,7 +241,7 @@ inline void convolveWindow(__global const uchar * src, int src_step, int src_off weighted_sum_local[2] + weighted_sum_local[3]; int weights_local_0 = weights_local[0] + weights_local[1] + weights_local[2] + weights_local[3]; - *(__global uchar_t *)(dst + dst_index) = convert_uchar_t(weighted_sum_local_0 / weights_local_0); + *(__global uchar_t *)(dst + dst_index) = convert_uchar_t(weighted_sum_local_0 / (int_t)(weights_local_0)); } } @@ -232,8 +254,8 @@ __kernel void fastNlMeansDenoising(__global const uchar * src, int src_step, int int block_y = get_group_id(1); int id = get_local_id(0), first; - __local int_t dists[SEARCH_SIZE_SQ], weighted_sum[CTA_SIZE2]; - __local int weights[CTA_SIZE2]; + __local int dists[SEARCH_SIZE_SQ], weights[CTA_SIZE2]; + __local int_t weighted_sum[CTA_SIZE2]; int x0 = block_x * BLOCK_COLS, x1 = min(x0 + BLOCK_COLS, dst_cols); int y0 = block_y * BLOCK_ROWS, y1 = min(y0 + BLOCK_ROWS, dst_rows); @@ -241,8 +263,8 @@ __kernel void fastNlMeansDenoising(__global const uchar * src, int src_step, int // for each group we need SEARCH_SIZE_SQ * TEMPLATE_SIZE integer buffer for storing part column sum for current element // and SEARCH_SIZE_SQ * BLOCK_COLS integer buffer for storing last column sum for each element of search window of up row int block_data_start = SEARCH_SIZE_SQ * (mad24(block_y, dst_cols, x0) + mad24(block_y, nblocks_x, block_x) * TEMPLATE_SIZE); - __global int_t * col_dists = (__global int_t *)(buffer + block_data_start * sizeof(int_t)); - __global int_t * up_col_dists = col_dists + SEARCH_SIZE_SQ * TEMPLATE_SIZE; + __global int * col_dists = (__global int *)(buffer + block_data_start * sizeof(int)); + __global int * up_col_dists = col_dists + SEARCH_SIZE_SQ * TEMPLATE_SIZE; for (int y = y0; y < y1; ++y) for (int x = x0; x < x1; ++x) diff --git a/modules/photo/test/ocl/test_denoising.cpp b/modules/photo/test/ocl/test_denoising.cpp index 9f504d0a02..6630b0291c 100644 --- a/modules/photo/test/ocl/test_denoising.cpp +++ b/modules/photo/test/ocl/test_denoising.cpp @@ -71,7 +71,7 @@ PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool) { const int type = CV_8UC(cn); - Size roiSize = randomSize(10, MAX_VALUE); + Size roiSize = randomSize(1, MAX_VALUE); Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); randomSubMat(src, src_roi, roiSize, srcBorder, type, 0, 255); @@ -98,7 +98,7 @@ OCL_TEST_P(FastNlMeansDenoising, Mat) } } -OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising, Combine(Values((Channels)1), Bool())); +OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising, Combine(Values(1, 2), Bool())); } } // namespace cvtest::ocl