|
|
|
@ -46,7 +46,7 @@ using namespace cv::gpu; |
|
|
|
|
|
|
|
|
|
namespace imgproc |
|
|
|
|
{ |
|
|
|
|
texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex; |
|
|
|
|
texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex1; |
|
|
|
|
|
|
|
|
|
__global__ void kernel_remap(const float *mapx, const float *mapy, size_t map_step, unsigned char* out, size_t out_step, int width, int height) |
|
|
|
|
{ |
|
|
|
@ -59,7 +59,78 @@ namespace imgproc |
|
|
|
|
float xcoo = mapx[idx]; |
|
|
|
|
float ycoo = mapy[idx]; |
|
|
|
|
|
|
|
|
|
out[y * out_step + x] = (unsigned char)(255.f * tex2D(tex, xcoo, ycoo)); |
|
|
|
|
out[y * out_step + x] = (unsigned char)(255.f * tex2D(tex1, xcoo, ycoo)); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
texture< uchar4, 2, cudaReadModeElementType > tex_meanshift; |
|
|
|
|
|
|
|
|
|
extern "C" __global__ void meanshift_kernel( unsigned char* out, int out_step, int cols, int rows, int sp, int sr, int maxIter, float eps ) |
|
|
|
|
{ |
|
|
|
|
int x0 = blockIdx.x * blockDim.x + threadIdx.x; |
|
|
|
|
int y0 = blockIdx.y * blockDim.y + threadIdx.y; |
|
|
|
|
|
|
|
|
|
if( x0 < cols && y0 < rows ) |
|
|
|
|
{ |
|
|
|
|
|
|
|
|
|
int isr2 = sr*sr; |
|
|
|
|
uchar4 c = tex2D( tex_meanshift, x0, y0 ); |
|
|
|
|
// iterate meanshift procedure |
|
|
|
|
for( int iter = 0; iter < maxIter; iter++ ) |
|
|
|
|
{ |
|
|
|
|
int count = 0; |
|
|
|
|
int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0; |
|
|
|
|
float icount; |
|
|
|
|
|
|
|
|
|
//mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp) |
|
|
|
|
int minx = x0-sp; |
|
|
|
|
int miny = y0-sp; |
|
|
|
|
int maxx = x0+sp; |
|
|
|
|
int maxy = y0+sp; |
|
|
|
|
|
|
|
|
|
for( int y = miny; y <= maxy; y++) |
|
|
|
|
{ |
|
|
|
|
int rowCount = 0; |
|
|
|
|
for( int x = minx; x <= maxx; x++ ) |
|
|
|
|
{ |
|
|
|
|
uchar4 t = tex2D( tex_meanshift, x, y ); |
|
|
|
|
|
|
|
|
|
int norm2 = (t.x - c.x) * (t.x - c.x) + (t.y - c.y) * (t.y - c.y) + (t.z - c.z) * (t.z - c.z); |
|
|
|
|
if( norm2 <= isr2 ) |
|
|
|
|
{ |
|
|
|
|
s0 += t.x; s1 += t.y; s2 += t.z; |
|
|
|
|
sx += x; rowCount++; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
count += rowCount; |
|
|
|
|
sy += y*rowCount; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if( count == 0 ) |
|
|
|
|
break; |
|
|
|
|
|
|
|
|
|
icount = 1./count; |
|
|
|
|
int x1 = floor(sx*icount); |
|
|
|
|
int y1 = floor(sy*icount); |
|
|
|
|
s0 = floor(s0*icount); |
|
|
|
|
s1 = floor(s1*icount); |
|
|
|
|
s2 = floor(s2*icount); |
|
|
|
|
|
|
|
|
|
int norm2 = (s0 - c.x) * (s0 - c.x) + (s1 - c.y) * (s1 - c.y) + (s2 - c.z) * (s2 - c.z); |
|
|
|
|
|
|
|
|
|
bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1-x0) + abs(y1-y0) + norm2 <= eps); |
|
|
|
|
|
|
|
|
|
x0 = x1; y0 = y1; |
|
|
|
|
c.x = s0; c.y = s1; c.z = s2; |
|
|
|
|
|
|
|
|
|
if( stopFlag ) |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
int base = (blockIdx.y * blockDim.y + threadIdx.y) * out_step + (blockIdx.x * blockDim.x + threadIdx.x) * 3 * sizeof(uchar); |
|
|
|
|
out[base+0] = c.x; |
|
|
|
|
out[base+1] = c.y; |
|
|
|
|
out[base+2] = c.z; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
@ -75,14 +146,31 @@ namespace cv { namespace gpu { namespace impl |
|
|
|
|
grid.x = divUp(dst.cols, block.x); |
|
|
|
|
grid.y = divUp(dst.rows, block.y); |
|
|
|
|
|
|
|
|
|
tex.filterMode = cudaFilterModeLinear; |
|
|
|
|
tex.addressMode[0] = tex.addressMode[1] = cudaAddressModeWrap; |
|
|
|
|
tex1.filterMode = cudaFilterModeLinear; |
|
|
|
|
tex1.addressMode[0] = tex1.addressMode[1] = cudaAddressModeWrap; |
|
|
|
|
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>(); |
|
|
|
|
cudaSafeCall( cudaBindTexture2D(0, tex, src.ptr, desc, dst.cols, dst.rows, src.step) ); |
|
|
|
|
cudaSafeCall( cudaBindTexture2D(0, tex1, src.ptr, desc, dst.cols, dst.rows, src.step) ); |
|
|
|
|
|
|
|
|
|
kernel_remap<<<grid, block>>>(xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows); |
|
|
|
|
|
|
|
|
|
cudaSafeCall( cudaThreadSynchronize() ); |
|
|
|
|
cudaSafeCall( cudaUnbindTexture(tex) ); |
|
|
|
|
cudaSafeCall( cudaUnbindTexture(tex1) ); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, float sp, float sr, int maxIter, float eps) |
|
|
|
|
{ |
|
|
|
|
dim3 grid(1, 1, 1); |
|
|
|
|
dim3 threads(32, 16, 1); |
|
|
|
|
grid.x = divUp(src.cols, threads.x); |
|
|
|
|
grid.y = divUp(src.rows, threads.y); |
|
|
|
|
|
|
|
|
|
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>(); |
|
|
|
|
cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.ptr, desc, src.cols * 4, src.rows, src.step ) ); |
|
|
|
|
|
|
|
|
|
meanshift_kernel<<< grid, threads >>>( dst.ptr, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps ); |
|
|
|
|
cudaSafeCall( cudaThreadSynchronize() ); |
|
|
|
|
cudaSafeCall( cudaUnbindTexture( tex_meanshift ) ); |
|
|
|
|
} |
|
|
|
|
}}} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|