added support of BORDER_REFLECT into gpu::cornerHarris and etc, added support of border extr. int linear filters

pull/13383/head
Alexey Spizhevoy 14 years ago
parent 4789306499
commit ab04a03621
  1. 6
      modules/gpu/include/opencv2/gpu/gpu.hpp
  2. 108
      modules/gpu/src/cuda/border_interpolate.hpp
  3. 8
      modules/gpu/src/cuda/cuda_shared.hpp
  4. 3
      modules/gpu/src/cuda/hog.cu
  5. 112
      modules/gpu/src/cuda/imgproc.cu
  6. 264
      modules/gpu/src/cuda/linear_filters_beta.cu
  7. 120
      modules/gpu/src/imgproc_gpu.cpp
  8. 34
      tests/gpu/src/imgproc_gpu.cpp

@ -628,13 +628,11 @@ namespace cv
CV_EXPORTS void Canny(const GpuMat& image, GpuMat& edges, double threshold1, double threshold2, int apertureSize = 3);
//! computes Harris cornerness criteria at each image pixel
// (does BORDER_CONSTANT interpolation with 0 as the fill value)
CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k);
CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType=BORDER_REFLECT101);
//! computes minimum eigen value of 2x2 derivative covariation matrix at each pixel - the cornerness criteria
// (does BORDER_CONSTANT interpolation with 0 as the fill value)
CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize=3);
CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType=BORDER_REFLECT101);
//////////////////////////////// Filter Engine ////////////////////////////////

@ -0,0 +1,108 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
namespace cv { namespace gpu {
struct BrdReflect101
{
BrdReflect101(int len) : last(len - 1) {}
__device__ int idx_low(int i) const
{
return abs(i);
}
__device__ int idx_high(int i) const
{
return last - abs(i - last);
}
__device__ int idx(int i) const
{
return i <= last ? idx_low(i) : idx_high(i);
}
bool is_range_safe(int mini, int maxi) const
{
return -last <= mini && maxi <= 2 * last;
}
int last;
};
template <typename T>
struct BrdRowReflect101: BrdReflect101
{
BrdRowReflect101(int len) : BrdReflect101(len) {}
__device__ float at_low(int i, const T* data) const
{
return data[idx_low(i)];
}
__device__ float at_high(int i, const T* data) const
{
return data[idx_high(i)];
}
};
template <typename T>
struct BrdColReflect101: BrdReflect101
{
BrdColReflect101(int len, int step) : BrdReflect101(len), step(step) {}
__device__ float at_low(int i, const T* data) const
{
return data[idx_low(i) * step];
}
__device__ float at_high(int i, const T* data) const
{
return data[idx_high(i) * step];
}
int step;
};
}}

@ -94,6 +94,14 @@ namespace cv
cudaSafeCall( cudaGetTextureReference(&tex, name) );
cudaSafeCall( cudaUnbindTexture(tex) );
}
// Available GPU border interpolation modes (named as CPU
// border interpolation modes)
enum
{
BORDER_REFLECT101 = 0,
};
}
}

@ -322,7 +322,8 @@ void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
normalize_hists_kernel_many_blocks<256, nblocks><<<grid, threads>>>(block_hist_size, img_block_width, block_hists, threshold);
else if (nthreads == 512)
normalize_hists_kernel_many_blocks<512, nblocks><<<grid, threads>>>(block_hist_size, img_block_width, block_hists, threshold);
// We don't support bigger sizes of the block histograms
else
cv::gpu::error("normalize_hists: histogram's size is too big, try to decrease number of bins", __FILE__, __LINE__);
cudaSafeCall(cudaThreadSynchronize());
}

@ -41,6 +41,7 @@
//M*/
#include "cuda_shared.hpp"
#include "border_interpolate.hpp"
using namespace cv::gpu;
@ -464,10 +465,40 @@ namespace cv { namespace gpu { namespace imgproc
reprojectImageTo3D_caller(disp, xyzw, q, stream);
}
//////////////////////////////////////// Extract Cov Data ////////////////////////////////////////////////
__global__ void extractCovData_kernel(const int cols, const int rows, const PtrStepf Dx,
const PtrStepf Dy, PtrStepf dst)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < cols && y < rows)
{
float dx = Dx.ptr(y)[x];
float dy = Dy.ptr(y)[x];
dst.ptr(y)[x] = dx * dx;
dst.ptr(y + rows)[x] = dx * dy;
dst.ptr(y + (rows << 1))[x] = dy * dy;
}
}
void extractCovData_caller(const DevMem2Df Dx, const DevMem2Df Dy, PtrStepf dst)
{
dim3 threads(32, 8);
dim3 grid(divUp(Dx.cols, threads.x), divUp(Dx.rows, threads.y));
extractCovData_kernel<<<grid, threads>>>(Dx.cols, Dx.rows, Dx, Dy, dst);
cudaSafeCall(cudaThreadSynchronize());
}
/////////////////////////////////////////// Corner Harris /////////////////////////////////////////////////
template <typename B>
__global__ void cornerHarris_kernel(const int cols, const int rows, const int block_size, const float k,
const PtrStep Dx, const PtrStep Dy, PtrStep dst)
const PtrStep Dx, const PtrStep Dy, PtrStep dst, B border_row,
B border_col)
{
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
@ -478,22 +509,21 @@ namespace cv { namespace gpu { namespace imgproc
float b = 0.f;
float c = 0.f;
int offset1 = -(block_size / 2);
int offset2 = offset1 + block_size;
unsigned int j_begin = max(x + offset1, 0);
unsigned int i_begin = max(y + offset1, 0);
unsigned int j_end = min(x + offset2, cols);
unsigned int i_end = min(y + offset2, rows);
const int ibegin = y - (block_size / 2);
const int jbegin = x - (block_size / 2);
const int iend = ibegin + block_size;
const int jend = jbegin + block_size;
for (unsigned int i = i_begin; i < i_end; ++i)
for (int i = ibegin; i < iend; ++i)
{
const float* dx_row = (const float*)Dx.ptr(i);
const float* dy_row = (const float*)Dy.ptr(i);
for (unsigned int j = j_begin; j < j_end; ++j)
int y = border_col.idx(i);
const float* dx_row = (const float*)Dx.ptr(y);
const float* dy_row = (const float*)Dy.ptr(y);
for (int j = jbegin; j < jend; ++j)
{
float dx = dx_row[j];
float dy = dy_row[j];
int x = border_row.idx(j);
float dx = dx_row[x];
float dy = dy_row[x];
a += dx * dx;
b += dx * dy;
c += dy * dy;
@ -504,7 +534,8 @@ namespace cv { namespace gpu { namespace imgproc
}
}
void cornerHarris_caller(const int block_size, const float k, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst)
void cornerHarris_caller(const int block_size, const float k, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst,
int border_type)
{
const int rows = Dx.rows;
const int cols = Dx.cols;
@ -512,14 +543,22 @@ namespace cv { namespace gpu { namespace imgproc
dim3 threads(32, 8);
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
cornerHarris_kernel<<<grid, threads>>>(cols, rows, block_size, k, Dx, Dy, dst);
switch (border_type)
{
case BORDER_REFLECT101:
cornerHarris_kernel<<<grid, threads>>>(
cols, rows, block_size, k, Dx, Dy, dst,
BrdReflect101(cols), BrdReflect101(rows));
break;
}
cudaSafeCall(cudaThreadSynchronize());
}
/////////////////////////////////////////// Corner Min Eigen Val /////////////////////////////////////////////////
__global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size,
const PtrStep Dx, const PtrStep Dy, PtrStep dst)
template <typename B>
__global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size, const PtrStep Dx,
const PtrStep Dy, PtrStep dst, B border_row, B border_col)
{
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
@ -530,22 +569,21 @@ namespace cv { namespace gpu { namespace imgproc
float b = 0.f;
float c = 0.f;
int offset1 = -(block_size / 2);
int offset2 = offset1 + block_size;
unsigned int j_begin = max(x + offset1, 0);
unsigned int i_begin = max(y + offset1, 0);
unsigned int j_end = min(x + offset2, cols);
unsigned int i_end = min(y + offset2, rows);
const int ibegin = y - (block_size / 2);
const int jbegin = x - (block_size / 2);
const int iend = ibegin + block_size;
const int jend = jbegin + block_size;
for (unsigned int i = i_begin; i < i_end; ++i)
for (int i = ibegin; i < iend; ++i)
{
const float* dx_row = (const float*)Dx.ptr(i);
const float* dy_row = (const float*)Dy.ptr(i);
for (unsigned int j = j_begin; j < j_end; ++j)
int y = border_col.idx(i);
const float* dx_row = (const float*)Dx.ptr(y);
const float* dy_row = (const float*)Dy.ptr(y);
for (int j = jbegin; j < jend; ++j)
{
float dx = dx_row[j];
float dy = dy_row[j];
int x = border_row.idx(j);
float dx = dx_row[x];
float dy = dy_row[x];
a += dx * dx;
b += dx * dy;
c += dy * dy;
@ -558,7 +596,8 @@ namespace cv { namespace gpu { namespace imgproc
}
}
void cornerMinEigenVal_caller(const int block_size, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst)
void cornerMinEigenVal_caller(const int block_size, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst,
int border_type)
{
const int rows = Dx.rows;
const int cols = Dx.cols;
@ -566,7 +605,14 @@ namespace cv { namespace gpu { namespace imgproc
dim3 threads(32, 8);
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
cornerMinEigenVal_kernel<<<grid, threads>>>(cols, rows, block_size, Dx, Dy, dst);
switch (border_type)
{
case BORDER_REFLECT101:
cornerMinEigenVal_kernel<<<grid, threads>>>(
cols, rows, block_size, Dx, Dy, dst,
BrdReflect101(cols), BrdReflect101(rows));
break;
}
cudaSafeCall(cudaThreadSynchronize());
}
}}}

@ -0,0 +1,264 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/gpu/devmem2d.hpp"
#include "safe_call.hpp"
#include "cuda_shared.hpp"
#include "border_interpolate.hpp"
#define BLOCK_DIM_X 16
#define BLOCK_DIM_Y 16
#define MAX_KERNEL_SIZE 16
using namespace cv::gpu;
namespace cv { namespace gpu { namespace linear_filters {
// Global linear kernel data storage
__constant__ float ckernel[MAX_KERNEL_SIZE];
void loadKernel(const float* kernel, int ksize)
{
cudaSafeCall(cudaMemcpyToSymbol(ckernel, kernel, ksize * sizeof(float)));
}
template <typename T, typename B, int ksize>
__global__ void rowFilterKernel(const DevMem2D_<T> src, PtrStepf dst,
int anchor, B border)
{
__shared__ float smem[BLOCK_DIM_X * BLOCK_DIM_Y * 3];
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
float* srow = smem + threadIdx.y * blockDim.x * 3;
if (y < src.rows)
{
const T* src_row = src.ptr(y);
srow[threadIdx.x + blockDim.x] = border.at_high(x, src_row);
srow[threadIdx.x] = border.at_low(x - blockDim.x, src_row);
srow[threadIdx.x + (blockDim.x << 1)] = border.at_high(x + blockDim.x, src_row);
__syncthreads();
if (x < src.cols)
{
srow += threadIdx.x + blockDim.x - anchor;
float sum = 0.f;
for (int i = 0; i < ksize; ++i)
sum += srow[i] * ckernel[i];
dst.ptr(y)[x] = sum;
}
}
}
template <typename T, typename B, int ksize>
void rowFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor)
{
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y));
B border(src.cols);
if (!border.is_range_safe(-BLOCK_DIM_X, (grid.x + 1) * BLOCK_DIM_X - 1))
cv::gpu::error("rowFilterCaller: can't use specified border extrapolation, image is too small, "
"try bigger image or another border extrapolation mode", __FILE__, __LINE__);
rowFilterKernel<T, B, ksize><<<grid, threads>>>(src, dst, anchor, border);
cudaSafeCall(cudaThreadSynchronize());
}
template <typename T, typename B>
void rowFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor,
const float* kernel, int ksize)
{
typedef void (*Caller)(const DevMem2D_<T>, PtrStepf, int);
static const Caller callers[] =
{
0, rowFilterCaller<T, B, 1>,
rowFilterCaller<T, B, 2>, rowFilterCaller<T, B, 3>,
rowFilterCaller<T, B, 4>, rowFilterCaller<T, B, 5>,
rowFilterCaller<T, B, 6>, rowFilterCaller<T, B, 7>,
rowFilterCaller<T, B, 8>, rowFilterCaller<T, B, 9>,
rowFilterCaller<T, B, 10>, rowFilterCaller<T, B, 11>,
rowFilterCaller<T, B, 12>, rowFilterCaller<T, B, 13>,
rowFilterCaller<T, B, 14>, rowFilterCaller<T, B, 15>
};
loadKernel(kernel, ksize);
callers[ksize](src, dst, anchor);
}
template <typename T>
void rowFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor,
const float* kernel, int ksize, int brd_interp)
{
typedef void (*Caller)(const DevMem2D_<T>, PtrStepf, int, const float*, int);
static const Caller callers[] =
{
rowFilterCaller<T, BrdRowReflect101<T> >
};
callers[brd_interp](src, dst, anchor, kernel, ksize);
}
template void rowFilterCaller<unsigned char>(const DevMem2D_<unsigned char>, PtrStepf, int, const float*, int, int);
template void rowFilterCaller<float>(const DevMem2D_<float>, PtrStepf, int, const float*, int, int);
template <typename T, typename B, int ksize>
__global__ void colFilterKernel(const DevMem2D_<T> src, PtrStepf dst, int anchor, B border)
{
__shared__ float smem[BLOCK_DIM_X * BLOCK_DIM_Y * 3];
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
const int smem_step = blockDim.x;
float* scol = smem + threadIdx.x;
if (x < src.cols)
{
const T* src_col = src.data + x;
scol[(threadIdx.y + blockDim.y) * smem_step] = border.at_high(y, src_col);
scol[threadIdx.y * smem_step] = border.at_low(y - blockDim.y, src_col);
scol[(threadIdx.y + (blockDim.y << 1)) * smem_step] = border.at_high(y + blockDim.y, src_col);
__syncthreads();
if (y < src.rows)
{
scol += (threadIdx.y + blockDim.y - anchor)* smem_step;
float sum = 0.f;
for(int i = 0; i < ksize; ++i)
sum += scol[i * smem_step] * ckernel[i];
dst.ptr(y)[x] = sum;
}
}
}
template <typename T, typename B, int ksize>
void colFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor)
{
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y));
B border(src.rows, src.step / src.elem_size);
if (src.step - border.step * src.elem_size != 0)
cv::gpu::error("colFilterCaller: src step must be multiple of its element size",
__FILE__, __LINE__);
if (!border.is_range_safe(-BLOCK_DIM_Y, (grid.y + 1) * BLOCK_DIM_Y - 1))
cv::gpu::error("colFilterCaller: can't use specified border extrapolation, image is too small, "
"try bigger image or another border extrapolation mode", __FILE__, __LINE__);
colFilterKernel<T, B, ksize><<<grid, threads>>>(src, dst, anchor, border);
cudaSafeCall(cudaThreadSynchronize());
}
template <typename T, typename B>
void colFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor,
const float* kernel, int ksize)
{
typedef void (*Caller)(const DevMem2D_<T>, PtrStepf, int);
static const Caller callers[] =
{
0, colFilterCaller<T, B, 1>,
colFilterCaller<T, B, 2>, colFilterCaller<T, B, 3>,
colFilterCaller<T, B, 4>, colFilterCaller<T, B, 5>,
colFilterCaller<T, B, 6>, colFilterCaller<T, B, 7>,
colFilterCaller<T, B, 8>, colFilterCaller<T, B, 9>,
colFilterCaller<T, B, 10>, colFilterCaller<T, B, 11>,
colFilterCaller<T, B, 12>, colFilterCaller<T, B, 13>,
colFilterCaller<T, B, 14>, colFilterCaller<T, B, 15>
};
loadKernel(kernel, ksize);
callers[ksize](src, dst, anchor);
}
template <typename T>
void colFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor,
const float* kernel, int ksize, int brd_interp)
{
typedef void (*Caller)(const DevMem2D_<T>, PtrStepf, int, const float*, int);
static const Caller callers[] =
{
colFilterCaller<T, BrdColReflect101<T> >
};
callers[brd_interp](src, dst, anchor, kernel, ksize);
}
template void colFilterCaller<unsigned char>(const DevMem2D_<unsigned char>, PtrStepf, int, const float*, int, int);
template void colFilterCaller<float>(const DevMem2D_<float>, PtrStepf, int, const float*, int, int);
}}}

@ -68,8 +68,8 @@ void cv::gpu::histEven(const GpuMat&, GpuMat&, int, int, int) { throw_nogpu(); }
void cv::gpu::histEven(const GpuMat&, GpuMat*, int*, int*, int*) { throw_nogpu(); }
void cv::gpu::histRange(const GpuMat&, GpuMat&, const GpuMat&) { throw_nogpu(); }
void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*) { throw_nogpu(); }
void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, int, int, double) { throw_nogpu(); }
void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, int, int) { throw_nogpu(); }
void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, int, int, double, int) { throw_nogpu(); }
void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, int, int, int) { throw_nogpu(); }
#else /* !defined (HAVE_CUDA) */
@ -861,49 +861,119 @@ void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4
namespace cv { namespace gpu { namespace imgproc {
void cornerHarris_caller(const int block_size, const float k, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst);
void cornerMinEigenVal_caller(const int block_size, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst);
void extractCovData_caller(const DevMem2Df Dx, const DevMem2Df Dy, PtrStepf dst);
void cornerHarris_caller(const int block_size, const float k, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst, int border_type);
void cornerMinEigenVal_caller(const int block_size, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst, int border_type);
}}}
namespace cv { namespace gpu { namespace linear_filters {
template <typename T>
void rowFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor, const float* kernel,
int ksize, int brd_interp);
template <typename T>
void colFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor, const float* kernel,
int ksize, int brd_interp);
}}}
namespace
{
void computeGradients(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize)
{
CV_Assert(src.type() == CV_32F);
template <typename T>
void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int gpuBorderType)
{
double scale = (double)(1 << ((ksize > 0 ? ksize : 3) - 1)) * blockSize;
if (ksize < 0) scale *= 2.;
if (ksize < 0)
scale *= 2.;
if (src.depth() == CV_8U)
scale *= 255.;
scale = 1./scale;
if (ksize > 0)
{
Sobel(src, Dx, CV_32F, 1, 0, ksize, scale);
Sobel(src, Dy, CV_32F, 0, 1, ksize, scale);
}
else
GpuMat tmp_buf(src.size(), CV_32F);
Dx.create(src.size(), CV_32F);
Dy.create(src.size(), CV_32F);
Mat kx, ky;
getDerivKernels(kx, ky, 1, 0, ksize, false, CV_32F);
kx = kx.reshape(1, 1) * scale;
ky = ky.reshape(1, 1);
linear_filters::rowFilterCaller<T>(
src, tmp_buf, kx.cols >> 1, kx.ptr<float>(0), kx.cols,
gpuBorderType);
linear_filters::colFilterCaller<float>(
tmp_buf, Dx, ky.cols >> 1, ky.ptr<float>(0), ky.cols,
gpuBorderType);
getDerivKernels(kx, ky, 0, 1, ksize, false, CV_32F);
kx = kx.reshape(1, 1);
ky = ky.reshape(1, 1) * scale;
linear_filters::rowFilterCaller<T>(
src, tmp_buf, kx.cols >> 1, kx.ptr<float>(0), kx.cols,
gpuBorderType);
linear_filters::colFilterCaller<float>(
tmp_buf, Dy, ky.cols >> 1, ky.ptr<float>(0), ky.cols,
gpuBorderType);
}
void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int gpuBorderType)
{
switch (src.type())
{
Scharr(src, Dx, CV_32F, 1, 0, scale);
Scharr(src, Dy, CV_32F, 0, 1, scale);
case CV_8U:
extractCovData<unsigned char>(src, Dx, Dy, blockSize, ksize, gpuBorderType);
break;
case CV_32F:
extractCovData<float>(src, Dx, Dy, blockSize, ksize, gpuBorderType);
break;
default:
CV_Error(CV_StsBadArg, "extractCovData: unsupported type of the source matrix");
}
}
}
void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k)
} // Anonymous namespace
void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType)
{
int gpuBorderType;
switch (borderType)
{
case cv::BORDER_REFLECT101:
gpuBorderType = cv::gpu::BORDER_REFLECT101;
break;
default:
CV_Error(CV_StsBadArg, "cornerHarris: unsupported border type");
}
GpuMat Dx, Dy;
computeGradients(src, Dx, Dy, blockSize, ksize);
extractCovData(src, Dx, Dy, blockSize, ksize, gpuBorderType);
dst.create(src.size(), CV_32F);
imgproc::cornerHarris_caller(blockSize, (float)k, Dx, Dy, dst);
imgproc::cornerHarris_caller(blockSize, (float)k, Dx, Dy, dst, gpuBorderType);
}
void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize)
{
void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType)
{
int gpuBorderType;
switch (borderType)
{
case cv::BORDER_REFLECT101:
gpuBorderType = cv::gpu::BORDER_REFLECT101;
break;
default:
CV_Error(CV_StsBadArg, "cornerMinEigenVal: unsupported border type");
}
GpuMat Dx, Dy;
computeGradients(src, Dx, Dy, blockSize, ksize);
extractCovData(src, Dx, Dy, blockSize, ksize, gpuBorderType);
dst.create(src.size(), CV_32F);
imgproc::cornerMinEigenVal_caller(blockSize, Dx, Dy, dst);
imgproc::cornerMinEigenVal_caller(blockSize, Dx, Dy, dst, gpuBorderType);
}
#endif /* !defined (HAVE_CUDA) */

@ -616,9 +616,11 @@ struct CV_GpuCornerHarrisTest: CvTest
{
for (int i = 0; i < 5; ++i)
{
int rows = 10 + rand() % 300, cols = 10 + rand() % 300;
int rows = 25 + rand() % 300, cols = 25 + rand() % 300;
if (!compareToCpuTest(rows, cols, CV_32F, 1 + rand() % 5, 1 + 2 * (rand() % 4))) return;
if (!compareToCpuTest(rows, cols, CV_32F, 1 + rand() % 5, -1)) return;
if (!compareToCpuTest(rows, cols, CV_8U, 1 + rand() % 5, 1 + 2 * (rand() % 4))) return;
if (!compareToCpuTest(rows, cols, CV_8U, 1 + rand() % 5, -1)) return;
}
}
catch (const Exception& e)
@ -634,22 +636,22 @@ struct CV_GpuCornerHarrisTest: CvTest
cv::Mat src(rows, cols, depth);
if (depth == CV_32F)
rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(1));
else if (depth == CV_8U)
rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(256));
double k = 0.1;
int borderType = BORDER_DEFAULT;
int borderType = BORDER_REFLECT101;
cv::Mat dst_gold;
cv::cornerHarris(src, dst_gold, blockSize, apertureSize, k, borderType);
cv::gpu::GpuMat dst;
cv::gpu::cornerHarris(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, k);
int asize = apertureSize > 0 ? apertureSize : 3;
cv::gpu::cornerHarris(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, k, borderType);
cv::Mat dsth = dst;
for (int i = max(blockSize, asize) + 2; i < dst.rows - max(blockSize, asize) - 2; ++i)
for (int i = 0; i < dst.rows; ++i)
{
for (int j = max(blockSize, asize) + 2; j < dst.cols - max(blockSize, asize) - 2; ++j)
for (int j = 0; j < dst.cols; ++j)
{
float a = dst_gold.at<float>(i, j);
float b = dsth.at<float>(i, j);
@ -678,9 +680,11 @@ struct CV_GpuCornerMinEigenValTest: CvTest
{
for (int i = 0; i < 3; ++i)
{
int rows = 10 + rand() % 300, cols = 10 + rand() % 300;
int rows = 25 + rand() % 300, cols = 25 + rand() % 300;
if (!compareToCpuTest(rows, cols, CV_32F, 1 + rand() % 5, -1)) return;
if (!compareToCpuTest(rows, cols, CV_32F, 1 + rand() % 5, 1 + 2 * (rand() % 4))) return;
if (!compareToCpuTest(rows, cols, CV_8U, 1 + rand() % 5, -1)) return;
if (!compareToCpuTest(rows, cols, CV_8U, 1 + rand() % 5, 1 + 2 * (rand() % 4))) return;
}
}
catch (const Exception& e)
@ -696,25 +700,25 @@ struct CV_GpuCornerMinEigenValTest: CvTest
cv::Mat src(rows, cols, depth);
if (depth == CV_32F)
rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(1));
else if (depth == CV_8U)
rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(256));
int borderType = BORDER_DEFAULT;
int borderType = BORDER_REFLECT101;
cv::Mat dst_gold;
cv::cornerMinEigenVal(src, dst_gold, blockSize, apertureSize, borderType);
cv::gpu::GpuMat dst;
cv::gpu::cornerMinEigenVal(cv::gpu::GpuMat(src), dst, blockSize, apertureSize);
int asize = apertureSize > 0 ? apertureSize : 3;
cv::gpu::cornerMinEigenVal(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, borderType);
cv::Mat dsth = dst;
for (int i = max(blockSize, asize) + 2; i < dst.rows - max(blockSize, asize) - 2; ++i)
for (int i = 0; i < dst.rows; ++i)
{
for (int j = max(blockSize, asize) + 2; j < dst.cols - max(blockSize, asize) - 2; ++j)
for (int j = 0; j < dst.cols; ++j)
{
float a = dst_gold.at<float>(i, j);
float b = dsth.at<float>(i, j);
if (fabs(a - b) > 1e-3f)
if (fabs(a - b) > 1e-2f)
{
ts->printf(CvTS::CONSOLE, "%d %d %f %f %d %d\n", i, j, a, b, apertureSize, blockSize);
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);

Loading…
Cancel
Save