diff --git a/modules/gpu/src/brute_force_matcher.cpp b/modules/gpu/src/brute_force_matcher.cpp index 1dd3c0a86e..ca9960dcce 100644 --- a/modules/gpu/src/brute_force_matcher.cpp +++ b/modules/gpu/src/brute_force_matcher.cpp @@ -492,6 +492,10 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& queryDescs, radiusMatchL2_gpu, radiusMatchL2_gpu, radiusMatchL2_gpu, 0, 0 } }; + + int major, minor; + getComputeCapability(getDevice(), major, minor); + CV_Assert(100 * major + 10 * minor >= 110); // works onle on device with CC >= 1.1 const int nQuery = queryDescs.rows; const int nTrain = trainDescs.rows; diff --git a/modules/gpu/src/cuda/brute_force_matcher.cu b/modules/gpu/src/cuda/brute_force_matcher.cu index c2e2c1cef9..f42938f5b7 100644 --- a/modules/gpu/src/cuda/brute_force_matcher.cu +++ b/modules/gpu/src/cuda/brute_force_matcher.cu @@ -1104,6 +1104,8 @@ namespace cv { namespace gpu { namespace bfmatcher __global__ void radiusMatch(PtrStep_ queryDescs_, DevMem2D_ trainDescs_, float maxDistance, Mask mask, DevMem2Di trainIdx_, unsigned int* nMatches, PtrStepf distance) { + #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 110 + __shared__ float sdiff[BLOCK_DIM_X * BLOCK_DIM_Y]; float* sdiff_row = sdiff + BLOCK_DIM_X * threadIdx.y; @@ -1135,6 +1137,8 @@ namespace cv { namespace gpu { namespace bfmatcher } } } + + #endif } /////////////////////////////////////////////////////////////////////////////// diff --git a/tests/gpu/src/brute_force_matcher.cpp b/tests/gpu/src/brute_force_matcher.cpp index b4867e1f2b..cc182b2391 100644 --- a/tests/gpu/src/brute_force_matcher.cpp +++ b/tests/gpu/src/brute_force_matcher.cpp @@ -67,7 +67,7 @@ protected: const int desc_len = rng.uniform(40, 300); Mat queryCPU(rng.uniform(100, 300), desc_len, CV_32F); - rng.fill(queryCPU, cv::RNG::UNIFORM, cv::Scalar::all(0.0), cv::Scalar::all(1.0)); + rng.fill(queryCPU, cv::RNG::UNIFORM, cv::Scalar::all(0.0), cv::Scalar::all(10.0)); GpuMat queryGPU(queryCPU); const int nTrains = rng.uniform(1, 5); @@ -81,7 +81,7 @@ protected: for (int i = 0; i < nTrains; ++i) { Mat train(rng.uniform(100, 300), desc_len, CV_32F); - rng.fill(train, cv::RNG::UNIFORM, cv::Scalar::all(0.0), cv::Scalar::all(1.0)); + rng.fill(train, cv::RNG::UNIFORM, cv::Scalar::all(0.0), cv::Scalar::all(10.0)); trainsCPU[i] = train; trainsGPU[i].upload(train); @@ -89,7 +89,7 @@ protected: bool with_mask = rng.uniform(0, 10) < 5; if (with_mask) { - Mat mask(queryCPU.rows, train.rows, CV_8U, Scalar::all(1)); + Mat mask(queryCPU.rows, train.rows, CV_8U); rng.fill(mask, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(200)); masksCPU[i] = mask; @@ -111,8 +111,8 @@ protected: const int knn = rng.uniform(3, 10); - matcherCPU.knnMatch(queryCPU, knnMatchesCPU, knn, masksCPU); - matcherGPU.knnMatch(queryGPU, knnMatchesGPU, knn, masksGPU); + matcherCPU.knnMatch(queryCPU, knnMatchesCPU, knn, masksCPU, true); + matcherGPU.knnMatch(queryGPU, knnMatchesGPU, knn, masksGPU, true); if (!compareMatches(knnMatchesCPU, knnMatchesGPU)) { @@ -120,10 +120,10 @@ protected: return; } - const float maxDistance = rng.uniform(0.01f, 0.3f); + const float maxDistance = rng.uniform(25.0f, 65.0f); - matcherCPU.radiusMatch(queryCPU, radiusMatchesCPU, maxDistance, masksCPU); - matcherGPU.radiusMatch(queryGPU, radiusMatchesGPU, maxDistance, masksGPU); + matcherCPU.radiusMatch(queryCPU, radiusMatchesCPU, maxDistance, masksCPU, true); + matcherGPU.radiusMatch(queryGPU, radiusMatchesGPU, maxDistance, masksGPU, true); if (!compareMatches(radiusMatchesCPU, radiusMatchesGPU)) {