From 6ef4d9b1ddd2cd5383390b74bc940616bd62d38a Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy Date: Mon, 6 Dec 2010 07:47:26 +0000 Subject: [PATCH] added sipport of BORDER_REPLICATE mode into gpu::corner* functions --- modules/gpu/src/cuda/border_interpolate.hpp | 14 ++-- modules/gpu/src/cuda/cuda_shared.hpp | 4 +- modules/gpu/src/cuda/imgproc.cu | 87 ++++++++++++++++++++- modules/gpu/src/cuda/linear_filters_beta.cu | 8 +- modules/gpu/src/imgproc_gpu.cpp | 12 ++- tests/gpu/src/imgproc_gpu.cpp | 59 ++++++++++++-- 6 files changed, 160 insertions(+), 24 deletions(-) diff --git a/modules/gpu/src/cuda/border_interpolate.hpp b/modules/gpu/src/cuda/border_interpolate.hpp index 7bfcf82aad..ec8f474ef0 100644 --- a/modules/gpu/src/cuda/border_interpolate.hpp +++ b/modules/gpu/src/cuda/border_interpolate.hpp @@ -44,7 +44,7 @@ namespace cv { namespace gpu { struct BrdReflect101 { - BrdReflect101(int len) : last(len - 1) {} + BrdReflect101(int len): last(len - 1) {} __device__ int idx_low(int i) const { @@ -73,7 +73,7 @@ namespace cv { namespace gpu { template struct BrdRowReflect101: BrdReflect101 { - BrdRowReflect101(int len) : BrdReflect101(len) {} + BrdRowReflect101(int len): BrdReflect101(len) {} __device__ float at_low(int i, const T* data) const { @@ -90,7 +90,7 @@ namespace cv { namespace gpu { template struct BrdColReflect101: BrdReflect101 { - BrdColReflect101(int len, int step) : BrdReflect101(len), step(step) {} + BrdColReflect101(int len, int step): BrdReflect101(len), step(step) {} __device__ float at_low(int i, const T* data) const { @@ -108,7 +108,7 @@ namespace cv { namespace gpu { struct BrdReplicate { - BrdReplicate(int len) : last(len - 1) {} + BrdReplicate(int len): last(len - 1) {} __device__ int idx_low(int i) const { @@ -122,7 +122,7 @@ namespace cv { namespace gpu { __device__ int idx(int i) const { - return min(max(i, last), 0); + return max(min(i, last), 0); } bool is_range_safe(int mini, int maxi) const @@ -137,7 +137,7 @@ namespace cv { namespace gpu { template struct BrdRowReplicate: BrdReplicate { - BrdRowReplicate(int len) : BrdReplicate(len) {} + BrdRowReplicate(int len): BrdReplicate(len) {} __device__ float at_low(int i, const T* data) const { @@ -154,7 +154,7 @@ namespace cv { namespace gpu { template struct BrdColReplicate: BrdReplicate { - BrdColReplicate(int len, int step) : BrdReplicate(len), step(step) {} + BrdColReplicate(int len, int step): BrdReplicate(len), step(step) {} __device__ float at_low(int i, const T* data) const { diff --git a/modules/gpu/src/cuda/cuda_shared.hpp b/modules/gpu/src/cuda/cuda_shared.hpp index 1bc7e1cf56..0b82f8bc72 100644 --- a/modules/gpu/src/cuda/cuda_shared.hpp +++ b/modules/gpu/src/cuda/cuda_shared.hpp @@ -99,9 +99,9 @@ namespace cv // border interpolation modes) enum { - BORDER_REFLECT101 = 0 + BORDER_REFLECT101 = 0, + BORDER_REPLICATE }; - } } diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpu/src/cuda/imgproc.cu index f964d2b1f2..8d43ac6a1b 100644 --- a/modules/gpu/src/cuda/imgproc.cu +++ b/modules/gpu/src/cuda/imgproc.cu @@ -42,6 +42,7 @@ #include "cuda_shared.hpp" #include "border_interpolate.hpp" +#include using namespace cv::gpu; @@ -498,6 +499,39 @@ namespace cv { namespace gpu { namespace imgproc texture harrisDxTex; texture harrisDyTex; + __global__ void cornerHarris_kernel(const int cols, const int rows, const int block_size, const float k, + PtrStep dst) + { + const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x < cols && y < rows) + { + float a = 0.f; + float b = 0.f; + float c = 0.f; + + const int ibegin = y - (block_size / 2); + const int jbegin = x - (block_size / 2); + const int iend = ibegin + block_size; + const int jend = jbegin + block_size; + + for (int i = ibegin; i < iend; ++i) + { + for (int j = jbegin; j < jend; ++j) + { + float dx = tex2D(harrisDxTex, j, i); + float dy = tex2D(harrisDyTex, j, i); + a += dx * dx; + b += dx * dy; + c += dy * dy; + } + } + + ((float*)dst.ptr(y))[x] = a * c - b * b - k * (a + c) * (a + c); + } + } + template __global__ void cornerHarris_kernel(const int cols, const int rows, const int block_size, const float k, PtrStep dst, B border_row, B border_col) @@ -555,6 +589,13 @@ namespace cv { namespace gpu { namespace imgproc cornerHarris_kernel<<>>( cols, rows, block_size, k, dst, BrdReflect101(cols), BrdReflect101(rows)); break; + case BORDER_REPLICATE: + harrisDxTex.addressMode[0] = cudaAddressModeClamp; + harrisDxTex.addressMode[1] = cudaAddressModeClamp; + harrisDyTex.addressMode[0] = cudaAddressModeClamp; + harrisDyTex.addressMode[1] = cudaAddressModeClamp; + cornerHarris_kernel<<>>(cols, rows, block_size, k, dst); + break; } cudaSafeCall(cudaThreadSynchronize()); @@ -567,6 +608,42 @@ namespace cv { namespace gpu { namespace imgproc texture minEigenValDxTex; texture minEigenValDyTex; + __global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size, + PtrStep dst) + { + const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x < cols && y < rows) + { + float a = 0.f; + float b = 0.f; + float c = 0.f; + + const int ibegin = y - (block_size / 2); + const int jbegin = x - (block_size / 2); + const int iend = ibegin + block_size; + const int jend = jbegin + block_size; + + for (int i = ibegin; i < iend; ++i) + { + for (int j = jbegin; j < jend; ++j) + { + float dx = tex2D(minEigenValDxTex, j, i); + float dy = tex2D(minEigenValDyTex, j, i); + a += dx * dx; + b += dx * dy; + c += dy * dy; + } + } + + a *= 0.5f; + c *= 0.5f; + ((float*)dst.ptr(y))[x] = (a + c) - sqrtf((a - c) * (a - c) + b * b); + } + } + + template __global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size, PtrStep dst, B border_row, B border_col) @@ -624,8 +701,14 @@ namespace cv { namespace gpu { namespace imgproc { case BORDER_REFLECT101: cornerMinEigenVal_kernel<<>>( - cols, rows, block_size, dst, - BrdReflect101(cols), BrdReflect101(rows)); + cols, rows, block_size, dst, BrdReflect101(cols), BrdReflect101(rows)); + break; + case BORDER_REPLICATE: + minEigenValDxTex.addressMode[0] = cudaAddressModeClamp; + minEigenValDxTex.addressMode[1] = cudaAddressModeClamp; + minEigenValDyTex.addressMode[0] = cudaAddressModeClamp; + minEigenValDyTex.addressMode[1] = cudaAddressModeClamp; + cornerMinEigenVal_kernel<<>>(cols, rows, block_size, dst); break; } diff --git a/modules/gpu/src/cuda/linear_filters_beta.cu b/modules/gpu/src/cuda/linear_filters_beta.cu index c951dafb0b..084161873d 100644 --- a/modules/gpu/src/cuda/linear_filters_beta.cu +++ b/modules/gpu/src/cuda/linear_filters_beta.cu @@ -150,7 +150,8 @@ void rowFilterCaller(const DevMem2D_ src, PtrStepf dst, int anchor, static const Caller callers[] = { - rowFilterCaller > + rowFilterCaller >, + rowFilterCaller > }; callers[brd_interp](src, dst, anchor, kernel, ksize); @@ -251,7 +252,8 @@ void colFilterCaller(const DevMem2D_ src, PtrStepf dst, int anchor, static const Caller callers[] = { - colFilterCaller > + colFilterCaller >, + colFilterCaller > }; callers[brd_interp](src, dst, anchor, kernel, ksize); @@ -261,4 +263,4 @@ void colFilterCaller(const DevMem2D_ src, PtrStepf dst, int anchor, template void colFilterCaller(const DevMem2D_, PtrStepf, int, const float*, int, int); template void colFilterCaller(const DevMem2D_, PtrStepf, int, const float*, int, int); -}}} \ No newline at end of file +}}} diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp index 9becef07dc..712956a63a 100644 --- a/modules/gpu/src/imgproc_gpu.cpp +++ b/modules/gpu/src/imgproc_gpu.cpp @@ -944,10 +944,13 @@ void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ks switch (borderType) { case cv::BORDER_REFLECT101: - gpuBorderType = cv::gpu::BORDER_REFLECT101; + gpuBorderType = cv::gpu::BORDER_REFLECT101; + break; + case cv::BORDER_REPLICATE: + gpuBorderType = cv::gpu::BORDER_REPLICATE; break; default: - CV_Error(CV_StsBadArg, "cornerHarris: unsupported border type"); + CV_Error(CV_StsBadArg, "cornerHarris: unsupported border extrapolation mode"); } GpuMat Dx, Dy; @@ -964,8 +967,11 @@ void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, i case cv::BORDER_REFLECT101: gpuBorderType = cv::gpu::BORDER_REFLECT101; break; + case cv::BORDER_REPLICATE: + gpuBorderType = cv::gpu::BORDER_REPLICATE; + break; default: - CV_Error(CV_StsBadArg, "cornerMinEigenVal: unsupported border type"); + CV_Error(CV_StsBadArg, "cornerMinEigenVal: unsupported border extrapolation mode"); } GpuMat Dx, Dy; diff --git a/tests/gpu/src/imgproc_gpu.cpp b/tests/gpu/src/imgproc_gpu.cpp index ead568abad..f9fe87876b 100644 --- a/tests/gpu/src/imgproc_gpu.cpp +++ b/tests/gpu/src/imgproc_gpu.cpp @@ -640,15 +640,37 @@ struct CV_GpuCornerHarrisTest: CvTest rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(256)); double k = 0.1; - int borderType = BORDER_REFLECT101; cv::Mat dst_gold; + cv::gpu::GpuMat dst; + cv::Mat dsth; + int borderType; + + borderType = BORDER_REFLECT101; cv::cornerHarris(src, dst_gold, blockSize, apertureSize, k, borderType); + cv::gpu::cornerHarris(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, k, borderType); - cv::gpu::GpuMat dst; + dsth = dst; + for (int i = 0; i < dst.rows; ++i) + { + for (int j = 0; j < dst.cols; ++j) + { + float a = dst_gold.at(i, j); + float b = dsth.at(i, j); + if (fabs(a - b) > 1e-3f) + { + ts->printf(CvTS::CONSOLE, "%d %d %f %f %d\n", i, j, a, b, apertureSize); + ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); + return false; + }; + } + } + + borderType = BORDER_REPLICATE; + cv::cornerHarris(src, dst_gold, blockSize, apertureSize, k, borderType); cv::gpu::cornerHarris(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, k, borderType); - cv::Mat dsth = dst; + dsth = dst; for (int i = 0; i < dst.rows; ++i) { for (int j = 0; j < dst.cols; ++j) @@ -703,15 +725,37 @@ struct CV_GpuCornerMinEigenValTest: CvTest else if (depth == CV_8U) rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(256)); - int borderType = BORDER_REFLECT101; - cv::Mat dst_gold; + cv::gpu::GpuMat dst; + cv::Mat dsth; + + int borderType; + + borderType = BORDER_REFLECT101; cv::cornerMinEigenVal(src, dst_gold, blockSize, apertureSize, borderType); + cv::gpu::cornerMinEigenVal(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, borderType); - cv::gpu::GpuMat dst; + dsth = dst; + for (int i = 0; i < dst.rows; ++i) + { + for (int j = 0; j < dst.cols; ++j) + { + float a = dst_gold.at(i, j); + float b = dsth.at(i, j); + if (fabs(a - b) > 1e-2f) + { + ts->printf(CvTS::CONSOLE, "%d %d %f %f %d %d\n", i, j, a, b, apertureSize, blockSize); + ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); + return false; + }; + } + } + + borderType = BORDER_REPLICATE; + cv::cornerMinEigenVal(src, dst_gold, blockSize, apertureSize, borderType); cv::gpu::cornerMinEigenVal(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, borderType); - cv::Mat dsth = dst; + dsth = dst; for (int i = 0; i < dst.rows; ++i) { for (int j = 0; j < dst.cols; ++j) @@ -726,6 +770,7 @@ struct CV_GpuCornerMinEigenValTest: CvTest }; } } + return true; } };