From dd0fa63ca87d1a32ea8f48f893042f38d36778c8 Mon Sep 17 00:00:00 2001 From: yao Date: Fri, 25 Oct 2013 16:01:41 +0800 Subject: [PATCH 1/2] fix the bug of ocl::bruteForceMatcher --- modules/ocl/perf/perf_brute_force_matcher.cpp | 14 +++++++------- modules/ocl/src/opencl/brute_force_match.cl | 7 ++++++- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/modules/ocl/perf/perf_brute_force_matcher.cpp b/modules/ocl/perf/perf_brute_force_matcher.cpp index 33c42c72dc..09b99f5e2f 100644 --- a/modules/ocl/perf/perf_brute_force_matcher.cpp +++ b/modules/ocl/perf/perf_brute_force_matcher.cpp @@ -53,8 +53,8 @@ using namespace perf; typedef TestBaseWithParam BruteForceMatcherFixture; -PERF_TEST_P(BruteForceMatcherFixture, DISABLED_match, - OCL_BFMATCHER_TYPICAL_MAT_SIZES) // TODO too big difference between implementations +PERF_TEST_P(BruteForceMatcherFixture, match, + OCL_BFMATCHER_TYPICAL_MAT_SIZES) { const Size srcSize = GetParam(); @@ -82,14 +82,14 @@ PERF_TEST_P(BruteForceMatcherFixture, DISABLED_match, oclMatcher.matchDownload(oclTrainIdx, oclDistance, matches); - SANITY_CHECK_MATCHES(matches); + SANITY_CHECK_MATCHES(matches, 1e-5); } else OCL_PERF_ELSE } -PERF_TEST_P(BruteForceMatcherFixture, DISABLED_knnMatch, - OCL_BFMATCHER_TYPICAL_MAT_SIZES) // TODO too big difference between implementations +PERF_TEST_P(BruteForceMatcherFixture, knnMatch, + OCL_BFMATCHER_TYPICAL_MAT_SIZES) { const Size srcSize = GetParam(); @@ -123,8 +123,8 @@ PERF_TEST_P(BruteForceMatcherFixture, DISABLED_knnMatch, oclMatcher.knnMatchDownload(oclTrainIdx, oclDistance, matches); std::vector & matches0 = matches[0], & matches1 = matches[1]; - SANITY_CHECK_MATCHES(matches0); - SANITY_CHECK_MATCHES(matches1); + SANITY_CHECK_MATCHES(matches0, 1e-5); + SANITY_CHECK_MATCHES(matches1, 1e-5); } else OCL_PERF_ELSE diff --git a/modules/ocl/src/opencl/brute_force_match.cl b/modules/ocl/src/opencl/brute_force_match.cl index a05c98ee03..ad668e6e32 100644 --- a/modules/ocl/src/opencl/brute_force_match.cl +++ b/modules/ocl/src/opencl/brute_force_match.cl @@ -17,6 +17,7 @@ // @Authors // Nathan, liujun@multicorewareinc.com // Peng Xiao, pengxiao@outlook.com +// Baichuan Su, baichuan@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -128,7 +129,7 @@ result_type reduce_multi_block( s_query[lidy * MAX_DESC_LEN + block_index * BLOCK_SIZE + j], s_train[j * BLOCK_SIZE + lidx]); } - return DIST_RES(result); + return result; } /* 2dim launch, global size: dim0 is (query rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, dim1 is BLOCK_SIZE @@ -187,6 +188,8 @@ __kernel void BruteForceMatch_UnrollMatch( barrier(CLK_LOCAL_MEM_FENCE); } + result = DIST_RES(result); + int trainIdx = t * BLOCK_SIZE + lidx; if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance/* && mask(queryIdx, trainIdx)*/) @@ -493,6 +496,8 @@ __kernel void BruteForceMatch_knnUnrollMatch( barrier(CLK_LOCAL_MEM_FENCE); } + result = DIST_RES(result); + const int trainIdx = t * BLOCK_SIZE + lidx; if (queryIdx < query_rows && trainIdx < train_rows) From 632452cdd855144afbf5c638ad69a02b8b2c45db Mon Sep 17 00:00:00 2001 From: yao Date: Mon, 28 Oct 2013 16:32:46 +0800 Subject: [PATCH 2/2] fix the mismatch running on cpu devices --- modules/ocl/src/opencl/brute_force_match.cl | 26 +++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/modules/ocl/src/opencl/brute_force_match.cl b/modules/ocl/src/opencl/brute_force_match.cl index ad668e6e32..cb0aba255d 100644 --- a/modules/ocl/src/opencl/brute_force_match.cl +++ b/modules/ocl/src/opencl/brute_force_match.cl @@ -113,6 +113,24 @@ result_type reduce_block( return DIST_RES(result); } +result_type reduce_block_match( + __local value_type *s_query, + __local value_type *s_train, + int lidx, + int lidy + ) +{ + result_type result = 0; + #pragma unroll + for (int j = 0 ; j < BLOCK_SIZE ; j++) + { + result += DIST( + s_query[lidy * BLOCK_SIZE + j], + s_train[j * BLOCK_SIZE + lidx]); + } + return (result); +} + result_type reduce_multi_block( __local value_type *s_query, __local value_type *s_train, @@ -275,11 +293,13 @@ __kernel void BruteForceMatch_Match( barrier(CLK_LOCAL_MEM_FENCE); - result += reduce_block(s_query, s_train, lidx, lidy); + result += reduce_block_match(s_query, s_train, lidx, lidy); barrier(CLK_LOCAL_MEM_FENCE); } + result = DIST_RES(result); + const int trainIdx = t * BLOCK_SIZE + lidx; if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance /*&& mask(queryIdx, trainIdx)*/) @@ -636,11 +656,13 @@ __kernel void BruteForceMatch_knnMatch( barrier(CLK_LOCAL_MEM_FENCE); - result += reduce_block(s_query, s_train, lidx, lidy); + result += reduce_block_match(s_query, s_train, lidx, lidy); barrier(CLK_LOCAL_MEM_FENCE); } + result = DIST_RES(result); + const int trainIdx = t * BLOCK_SIZE + lidx; if (queryIdx < query_rows && trainIdx < train_rows /*&& mask(queryIdx, trainIdx)*/)