parent
0e9405e591
commit
78e89890b0
6 changed files with 1668 additions and 0 deletions
@ -0,0 +1,831 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. |
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// @Authors |
||||
// Dachuan Zhao, dachuan@multicorewareinc.com |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other oclMaterials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors as is and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
//#pragma OPENCL EXTENSION cl_amd_printf : enable |
||||
|
||||
|
||||
__kernel void calcSharrDeriv_vertical_C1_D0(__global const uchar* src, int srcStep, int rows, int cols, int cn, __global short* dx_buf, int dx_bufStep, __global short* dy_buf, int dy_bufStep) |
||||
{ |
||||
const int x = get_global_id(0); |
||||
const int y = get_global_id(1); |
||||
|
||||
if (y < rows && x < cols * cn) |
||||
{ |
||||
const uchar src_val0 = (src + (y > 0 ? y-1 : rows > 1 ? 1 : 0) * srcStep)[x]; |
||||
const uchar src_val1 = (src + y * srcStep)[x]; |
||||
const uchar src_val2 = (src + (y < rows-1 ? y+1 : rows > 1 ? rows-2 : 0) * srcStep)[x]; |
||||
|
||||
((__global short*)((__global char*)dx_buf + y * dx_bufStep / 2))[x] = (src_val0 + src_val2) * 3 + src_val1 * 10; |
||||
((__global short*)((__global char*)dy_buf + y * dy_bufStep / 2))[x] = src_val2 - src_val0; |
||||
} |
||||
} |
||||
|
||||
__kernel void calcSharrDeriv_vertical_C4_D0(__global const uchar* src, int srcStep, int rows, int cols, int cn, __global short* dx_buf, int dx_bufStep, __global short* dy_buf, int dy_bufStep) |
||||
{ |
||||
const int x = get_global_id(0); |
||||
const int y = get_global_id(1); |
||||
|
||||
if (y < rows && x < cols * cn) |
||||
{ |
||||
const uchar src_val0 = (src + (y > 0 ? y - 1 : 1) * srcStep)[x]; |
||||
const uchar src_val1 = (src + y * srcStep)[x]; |
||||
const uchar src_val2 = (src + (y < rows - 1 ? y + 1 : rows - 2) * srcStep)[x]; |
||||
|
||||
((__global short*)((__global char*)dx_buf + y * dx_bufStep / 2))[x] = (src_val0 + src_val2) * 3 + src_val1 * 10; |
||||
((__global short*)((__global char*)dy_buf + y * dy_bufStep / 2))[x] = src_val2 - src_val0; |
||||
} |
||||
} |
||||
|
||||
__kernel void calcSharrDeriv_horizontal_C1_D0(int rows, int cols, int cn, __global const short* dx_buf, int dx_bufStep, __global const short* dy_buf, int dy_bufStep, __global short* dIdx, int dIdxStep, __global short* dIdy, int dIdyStep) |
||||
{ |
||||
const int x = get_global_id(0); |
||||
const int y = get_global_id(1); |
||||
|
||||
const int colsn = cols * cn; |
||||
|
||||
if (y < rows && x < colsn) |
||||
{ |
||||
__global const short* dx_buf_row = dx_buf + y * dx_bufStep; |
||||
__global const short* dy_buf_row = dy_buf + y * dy_bufStep; |
||||
|
||||
const int xr = x + cn < colsn ? x + cn : (cols - 2) * cn + x + cn - colsn; |
||||
const int xl = x - cn >= 0 ? x - cn : cn + x; |
||||
|
||||
((__global short*)((__global char*)dIdx + y * dIdxStep / 2))[x] = dx_buf_row[xr] - dx_buf_row[xl]; |
||||
((__global short*)((__global char*)dIdy + y * dIdyStep / 2))[x] = (dy_buf_row[xr] + dy_buf_row[xl]) * 3 + dy_buf_row[x] * 10; |
||||
} |
||||
} |
||||
|
||||
__kernel void calcSharrDeriv_horizontal_C4_D0(int rows, int cols, int cn, __global const short* dx_buf, int dx_bufStep, __global const short* dy_buf, int dy_bufStep, __global short* dIdx, int dIdxStep, __global short* dIdy, int dIdyStep) |
||||
{ |
||||
const int x = get_global_id(0); |
||||
const int y = get_global_id(1); |
||||
|
||||
const int colsn = cols * cn; |
||||
|
||||
if (y < rows && x < colsn) |
||||
{ |
||||
__global const short* dx_buf_row = dx_buf + y * dx_bufStep; |
||||
__global const short* dy_buf_row = dy_buf + y * dy_bufStep; |
||||
|
||||
const int xr = x + cn < colsn ? x + cn : (cols - 2) * cn + x + cn - colsn; |
||||
const int xl = x - cn >= 0 ? x - cn : cn + x; |
||||
|
||||
((__global short*)((__global char*)dIdx + y * dIdxStep / 2))[x] = dx_buf_row[xr] - dx_buf_row[xl]; |
||||
((__global short*)((__global char*)dIdy + y * dIdyStep / 2))[x] = (dy_buf_row[xr] + dy_buf_row[xl]) * 3 + dy_buf_row[x] * 10; |
||||
} |
||||
} |
||||
|
||||
#define W_BITS 14 |
||||
#define W_BITS1 14 |
||||
|
||||
#define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n)) |
||||
|
||||
int linearFilter_uchar(__global const uchar* src, int srcStep, int cn, float2 pt, int x, int y) |
||||
{ |
||||
int2 ipt; |
||||
ipt.x = convert_int_sat_rtn(pt.x); |
||||
ipt.y = convert_int_sat_rtn(pt.y); |
||||
|
||||
float a = pt.x - ipt.x; |
||||
float b = pt.y - ipt.y; |
||||
|
||||
int iw00 = convert_int_sat_rte((1.0f - a) * (1.0f - b) * (1 << W_BITS)); |
||||
int iw01 = convert_int_sat_rte(a * (1.0f - b) * (1 << W_BITS)); |
||||
int iw10 = convert_int_sat_rte((1.0f - a) * b * (1 << W_BITS)); |
||||
int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10; |
||||
|
||||
__global const uchar* src_row = src + (ipt.y + y) * srcStep + ipt.x * cn; |
||||
__global const uchar* src_row1 = src + (ipt.y + y + 1) * srcStep + ipt.x * cn; |
||||
|
||||
return CV_DESCALE(src_row[x] * iw00 + src_row[x + cn] * iw01 + src_row1[x] * iw10 + src_row1[x + cn] * iw11, W_BITS1 - 5); |
||||
} |
||||
|
||||
int linearFilter_short(__global const short* src, int srcStep, int cn, float2 pt, int x, int y) |
||||
{ |
||||
int2 ipt; |
||||
ipt.x = convert_int_sat_rtn(pt.x); |
||||
ipt.y = convert_int_sat_rtn(pt.y); |
||||
|
||||
float a = pt.x - ipt.x; |
||||
float b = pt.y - ipt.y; |
||||
|
||||
int iw00 = convert_int_sat_rte((1.0f - a) * (1.0f - b) * (1 << W_BITS)); |
||||
int iw01 = convert_int_sat_rte(a * (1.0f - b) * (1 << W_BITS)); |
||||
int iw10 = convert_int_sat_rte((1.0f - a) * b * (1 << W_BITS)); |
||||
int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10; |
||||
|
||||
__global const short* src_row = src + (ipt.y + y) * srcStep + ipt.x * cn; |
||||
__global const short* src_row1 = src + (ipt.y + y + 1) * srcStep + ipt.x * cn; |
||||
|
||||
return CV_DESCALE(src_row[x] * iw00 + src_row[x + cn] * iw01 + src_row1[x] * iw10 + src_row1[x + cn] * iw11, W_BITS1); |
||||
} |
||||
|
||||
float linearFilter_float(__global const float* src, int srcStep, int cn, float2 pt, float x, float y) |
||||
{ |
||||
int2 ipt; |
||||
ipt.x = convert_int_sat_rtn(pt.x); |
||||
ipt.y = convert_int_sat_rtn(pt.y); |
||||
|
||||
float a = pt.x - ipt.x; |
||||
float b = pt.y - ipt.y; |
||||
|
||||
float iw00 = ((1.0f - a) * (1.0f - b) * (1 << W_BITS)); |
||||
float iw01 = (a * (1.0f - b) * (1 << W_BITS)); |
||||
float iw10 = ((1.0f - a) * b * (1 << W_BITS)); |
||||
float iw11 = (1 << W_BITS) - iw00 - iw01 - iw10; |
||||
|
||||
__global const float* src_row = src + (int)(ipt.y + y) * srcStep / 4 + ipt.x * cn; |
||||
__global const float* src_row1 = src + (int)(ipt.y + y + 1) * srcStep / 4 + ipt.x * cn; |
||||
|
||||
return src_row[(int)x] * iw00 + src_row[(int)x + cn] * iw01 + src_row1[(int)x] * iw10 + src_row1[(int)x + cn] * iw11, W_BITS1 - 5; |
||||
} |
||||
|
||||
void reduce3(float val1, float val2, float val3, __local float* smem1, __local float* smem2, __local float* smem3, int tid) |
||||
{ |
||||
smem1[tid] = val1; |
||||
smem2[tid] = val2; |
||||
smem3[tid] = val3; |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
if (tid < 128) |
||||
{ |
||||
smem1[tid] = val1 += smem1[tid + 128]; |
||||
smem2[tid] = val2 += smem2[tid + 128]; |
||||
smem3[tid] = val3 += smem3[tid + 128]; |
||||
} |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
if (tid < 64) |
||||
{ |
||||
smem1[tid] = val1 += smem1[tid + 64]; |
||||
smem2[tid] = val2 += smem2[tid + 64]; |
||||
smem3[tid] = val3 += smem3[tid + 64]; |
||||
} |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
if (tid < 32) |
||||
{ |
||||
volatile __local float* vmem1 = smem1; |
||||
volatile __local float* vmem2 = smem2; |
||||
volatile __local float* vmem3 = smem3; |
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 32]; |
||||
vmem2[tid] = val2 += vmem2[tid + 32]; |
||||
vmem3[tid] = val3 += vmem3[tid + 32]; |
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 16]; |
||||
vmem2[tid] = val2 += vmem2[tid + 16]; |
||||
vmem3[tid] = val3 += vmem3[tid + 16]; |
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 8]; |
||||
vmem2[tid] = val2 += vmem2[tid + 8]; |
||||
vmem3[tid] = val3 += vmem3[tid + 8]; |
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 4]; |
||||
vmem2[tid] = val2 += vmem2[tid + 4]; |
||||
vmem3[tid] = val3 += vmem3[tid + 4]; |
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 2]; |
||||
vmem2[tid] = val2 += vmem2[tid + 2]; |
||||
vmem3[tid] = val3 += vmem3[tid + 2]; |
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 1]; |
||||
vmem2[tid] = val2 += vmem2[tid + 1]; |
||||
vmem3[tid] = val3 += vmem3[tid + 1]; |
||||
} |
||||
} |
||||
|
||||
void reduce2(float val1, float val2, __local float* smem1, __local float* smem2, int tid) |
||||
{ |
||||
smem1[tid] = val1; |
||||
smem2[tid] = val2; |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
if (tid < 128) |
||||
{ |
||||
smem1[tid] = val1 += smem1[tid + 128]; |
||||
smem2[tid] = val2 += smem2[tid + 128]; |
||||
} |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
if (tid < 64) |
||||
{ |
||||
smem1[tid] = val1 += smem1[tid + 64]; |
||||
smem2[tid] = val2 += smem2[tid + 64]; |
||||
} |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
if (tid < 32) |
||||
{ |
||||
volatile __local float* vmem1 = smem1; |
||||
volatile __local float* vmem2 = smem2; |
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 32]; |
||||
vmem2[tid] = val2 += vmem2[tid + 32]; |
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 16]; |
||||
vmem2[tid] = val2 += vmem2[tid + 16]; |
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 8]; |
||||
vmem2[tid] = val2 += vmem2[tid + 8]; |
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 4]; |
||||
vmem2[tid] = val2 += vmem2[tid + 4]; |
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 2]; |
||||
vmem2[tid] = val2 += vmem2[tid + 2]; |
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 1]; |
||||
vmem2[tid] = val2 += vmem2[tid + 1]; |
||||
} |
||||
} |
||||
|
||||
void reduce1(float val1, __local float* smem1, int tid) |
||||
{ |
||||
smem1[tid] = val1; |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
if (tid < 128) |
||||
{ |
||||
smem1[tid] = val1 += smem1[tid + 128]; |
||||
} |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
if (tid < 64) |
||||
{ |
||||
smem1[tid] = val1 += smem1[tid + 64]; |
||||
} |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
if (tid < 32) |
||||
{ |
||||
volatile __local float* vmem1 = smem1; |
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 32]; |
||||
vmem1[tid] = val1 += vmem1[tid + 16]; |
||||
vmem1[tid] = val1 += vmem1[tid + 8]; |
||||
vmem1[tid] = val1 += vmem1[tid + 4]; |
||||
vmem1[tid] = val1 += vmem1[tid + 2]; |
||||
vmem1[tid] = val1 += vmem1[tid + 1]; |
||||
} |
||||
} |
||||
|
||||
#define SCALE (1.0f / (1 << 20)) |
||||
|
||||
// Image read mode |
||||
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR; |
||||
|
||||
__kernel void lkSparse_C1_D5(image2d_t I, image2d_t J, |
||||
__global const float2* prevPts, int prevPtsStep, __global float2* nextPts, int nextPtsStep, __global uchar* status/*, __global float* err*/, const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr, char GET_MIN_EIGENVALS) |
||||
{ |
||||
__local float smem1[256]; |
||||
__local float smem2[256]; |
||||
__local float smem3[256]; |
||||
|
||||
int c_halfWin_x = (c_winSize_x - 1) / 2; |
||||
int c_halfWin_y = (c_winSize_y - 1) / 2; |
||||
|
||||
const int tid = get_local_id(1) * get_local_size(0) + get_local_id(0); |
||||
|
||||
float2 prevPt = prevPts[get_group_id(0)]; |
||||
prevPt.x *= (1.0f / (1 << level)); |
||||
prevPt.y *= (1.0f / (1 << level)); |
||||
|
||||
if (prevPt.x < 0 || prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows) |
||||
{ |
||||
if (level == 0 && tid == 0) |
||||
{ |
||||
status[get_group_id(0)] = 0; |
||||
|
||||
//if (calcErr) |
||||
// err[get_group_id(0)] = 0; |
||||
} |
||||
|
||||
return; |
||||
} |
||||
|
||||
prevPt.x -= c_halfWin_x; |
||||
prevPt.y -= c_halfWin_y; |
||||
|
||||
// extract the patch from the first image, compute covariation matrix of derivatives |
||||
|
||||
float A11 = 0; |
||||
float A12 = 0; |
||||
float A22 = 0; |
||||
|
||||
float I_patch[21][21]; |
||||
float dIdx_patch[21][21]; |
||||
float dIdy_patch[21][21]; |
||||
|
||||
for (int yBase = get_local_id(1), i = 0; yBase < c_winSize_y; yBase += get_local_size(1), ++i) |
||||
{ |
||||
for (int xBase = get_local_id(0), j = 0; xBase < c_winSize_x; xBase += get_local_size(0), ++j) |
||||
{ |
||||
float x = (prevPt.x + xBase + 0.5f); |
||||
float y = (prevPt.y + yBase + 0.5f); |
||||
|
||||
I_patch[i][j] = read_imagef(I, sampler, (float2)(x, y)).x; |
||||
|
||||
float dIdx = 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x + 1, y)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)).x - |
||||
(3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x - 1, y)).x + 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)).x); |
||||
|
||||
float dIdy = 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x, y + 1)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)).x - |
||||
(3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x, y - 1)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)).x); |
||||
|
||||
dIdx_patch[i][j] = dIdx; |
||||
dIdy_patch[i][j] = dIdy; |
||||
|
||||
A11 += dIdx * dIdx; |
||||
A12 += dIdx * dIdy; |
||||
A22 += dIdy * dIdy; |
||||
} |
||||
} |
||||
|
||||
reduce3(A11, A12, A22, smem1, smem2, smem3, tid); |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
A11 = smem1[0]; |
||||
A12 = smem2[0]; |
||||
A22 = smem3[0]; |
||||
|
||||
float D = A11 * A22 - A12 * A12; |
||||
|
||||
//if (calcErr && GET_MIN_EIGENVALS && tid == 0) |
||||
// err[get_group_id(0)] = minEig; |
||||
|
||||
if (D < 1.192092896e-07f) |
||||
{ |
||||
if (level == 0 && tid == 0) |
||||
status[get_group_id(0)] = 0; |
||||
|
||||
return; |
||||
} |
||||
|
||||
D = 1.f / D; |
||||
|
||||
A11 *= D; |
||||
A12 *= D; |
||||
A22 *= D; |
||||
|
||||
float2 nextPt = nextPts[get_group_id(0)]; |
||||
nextPt.x *= 2.0f; |
||||
nextPt.y *= 2.0f; |
||||
|
||||
nextPt.x -= c_halfWin_x; |
||||
nextPt.y -= c_halfWin_y; |
||||
|
||||
for (int k = 0; k < c_iters; ++k) |
||||
{ |
||||
if (nextPt.x < -c_halfWin_x || nextPt.x >= cols || nextPt.y < -c_halfWin_y || nextPt.y >= rows) |
||||
{ |
||||
if (tid == 0 && level == 0) |
||||
status[get_group_id(0)] = 0; |
||||
return; |
||||
} |
||||
|
||||
float b1 = 0; |
||||
float b2 = 0; |
||||
|
||||
for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i) |
||||
{ |
||||
for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j) |
||||
{ |
||||
float a = (nextPt.x + x + 0.5f); |
||||
float b = (nextPt.y + y + 0.5f); |
||||
|
||||
float I_val = I_patch[i][j]; |
||||
float J_val = read_imagef(J, sampler, (float2)(a, b)).x; |
||||
|
||||
float diff = (J_val - I_val) * 32.0f; |
||||
|
||||
b1 += diff * dIdx_patch[i][j]; |
||||
b2 += diff * dIdy_patch[i][j]; |
||||
} |
||||
} |
||||
|
||||
reduce2(b1, b2, smem1, smem2, tid); |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
b1 = smem1[0]; |
||||
b2 = smem2[0]; |
||||
|
||||
float2 delta; |
||||
delta.x = A12 * b2 - A22 * b1; |
||||
delta.y = A12 * b1 - A11 * b2; |
||||
|
||||
nextPt.x += delta.x; |
||||
nextPt.y += delta.y; |
||||
|
||||
if (fabs(delta.x) < 0.01f && fabs(delta.y) < 0.01f) |
||||
break; |
||||
} |
||||
|
||||
float errval = 0.0f; |
||||
if (calcErr) |
||||
{ |
||||
for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i) |
||||
{ |
||||
for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j) |
||||
{ |
||||
float a = (nextPt.x + x + 0.5f); |
||||
float b = (nextPt.y + y + 0.5f); |
||||
|
||||
float I_val = I_patch[i][j]; |
||||
float J_val = read_imagef(J, sampler, (float2)(a, b)).x; |
||||
|
||||
float diff = J_val - I_val; |
||||
|
||||
errval += fabs((float)diff); |
||||
} |
||||
} |
||||
|
||||
reduce1(errval, smem1, tid); |
||||
} |
||||
|
||||
if (tid == 0) |
||||
{ |
||||
nextPt.x += c_halfWin_x; |
||||
nextPt.y += c_halfWin_y; |
||||
|
||||
nextPts[get_group_id(0)] = nextPt; |
||||
|
||||
//if (calcErr && !GET_MIN_EIGENVALS) |
||||
// err[get_group_id(0)] = errval; |
||||
} |
||||
} |
||||
__kernel void lkSparse_C4_D5(image2d_t I, image2d_t J, |
||||
__global const float2* prevPts, int prevPtsStep, __global float2* nextPts, int nextPtsStep, __global uchar* status/*, __global float* err*/, const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr, char GET_MIN_EIGENVALS) |
||||
{ |
||||
__local float smem1[256]; |
||||
__local float smem2[256]; |
||||
__local float smem3[256]; |
||||
|
||||
int c_halfWin_x = (c_winSize_x - 1) / 2; |
||||
int c_halfWin_y = (c_winSize_y - 1) / 2; |
||||
|
||||
const int tid = get_local_id(1) * get_local_size(0) + get_local_id(0); |
||||
|
||||
float2 prevPt = prevPts[get_group_id(0)]; |
||||
prevPt.x *= (1.0f / (1 << level)); |
||||
prevPt.y *= (1.0f / (1 << level)); |
||||
|
||||
if (prevPt.x < 0 || prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows) |
||||
{ |
||||
if (level == 0 && tid == 0) |
||||
{ |
||||
status[get_group_id(0)] = 0; |
||||
|
||||
//if (calcErr) |
||||
// err[get_group_id(0)] = 0; |
||||
} |
||||
|
||||
return; |
||||
} |
||||
|
||||
prevPt.x -= c_halfWin_x; |
||||
prevPt.y -= c_halfWin_y; |
||||
|
||||
// extract the patch from the first image, compute covariation matrix of derivatives |
||||
|
||||
float A11 = 0; |
||||
float A12 = 0; |
||||
float A22 = 0; |
||||
|
||||
float4 I_patch[21][21]; |
||||
float4 dIdx_patch[21][21]; |
||||
float4 dIdy_patch[21][21]; |
||||
|
||||
for (int yBase = get_local_id(1), i = 0; yBase < c_winSize_y; yBase += get_local_size(1), ++i) |
||||
{ |
||||
for (int xBase = get_local_id(0), j = 0; xBase < c_winSize_x; xBase += get_local_size(0), ++j) |
||||
{ |
||||
float x = (prevPt.x + xBase + 0.5f); |
||||
float y = (prevPt.y + yBase + 0.5f); |
||||
|
||||
I_patch[i][j] = read_imagef(I, sampler, (float2)(x, y)).x; |
||||
|
||||
float4 dIdx = 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x + 1, y)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)).x - |
||||
(3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x - 1, y)).x + 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)).x); |
||||
|
||||
float4 dIdy = 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x, y + 1)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)).x - |
||||
(3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x, y - 1)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)).x); |
||||
|
||||
dIdx_patch[i][j] = dIdx; |
||||
dIdy_patch[i][j] = dIdy; |
||||
|
||||
A11 += (dIdx * dIdx).x + (dIdx * dIdx).y + (dIdx * dIdx).z; |
||||
A12 += (dIdx * dIdy).x + (dIdx * dIdy).y + (dIdx * dIdy).z; |
||||
A22 += (dIdy * dIdy).x + (dIdy * dIdy).y + (dIdy * dIdy).z; |
||||
} |
||||
} |
||||
|
||||
reduce3(A11, A12, A22, smem1, smem2, smem3, tid); |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
A11 = smem1[0]; |
||||
A12 = smem2[0]; |
||||
A22 = smem3[0]; |
||||
|
||||
float D = A11 * A22 - A12 * A12; |
||||
|
||||
//if (calcErr && GET_MIN_EIGENVALS && tid == 0) |
||||
// err[get_group_id(0)] = minEig; |
||||
|
||||
if (D < 1.192092896e-07f) |
||||
{ |
||||
if (level == 0 && tid == 0) |
||||
status[get_group_id(0)] = 0; |
||||
|
||||
return; |
||||
} |
||||
|
||||
D = 1.f / D; |
||||
|
||||
A11 *= D; |
||||
A12 *= D; |
||||
A22 *= D; |
||||
|
||||
float2 nextPt = nextPts[get_group_id(0)]; |
||||
nextPt.x *= 2.0f; |
||||
nextPt.y *= 2.0f; |
||||
|
||||
nextPt.x -= c_halfWin_x; |
||||
nextPt.y -= c_halfWin_y; |
||||
|
||||
for (int k = 0; k < c_iters; ++k) |
||||
{ |
||||
if (nextPt.x < -c_halfWin_x || nextPt.x >= cols || nextPt.y < -c_halfWin_y || nextPt.y >= rows) |
||||
{ |
||||
if (tid == 0 && level == 0) |
||||
status[get_group_id(0)] = 0; |
||||
return; |
||||
} |
||||
|
||||
float b1 = 0; |
||||
float b2 = 0; |
||||
|
||||
for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i) |
||||
{ |
||||
for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j) |
||||
{ |
||||
float a = (nextPt.x + x + 0.5f); |
||||
float b = (nextPt.y + y + 0.5f); |
||||
|
||||
float4 I_val = I_patch[i][j]; |
||||
float4 J_val = read_imagef(J, sampler, (float2)(a, b)).x; |
||||
|
||||
float4 diff = (J_val - I_val) * 32.0f; |
||||
|
||||
b1 += (diff * dIdx_patch[i][j]).x + (diff * dIdx_patch[i][j]).y + (diff * dIdx_patch[i][j]).z; |
||||
b2 += (diff * dIdy_patch[i][j]).x + (diff * dIdy_patch[i][j]).y + (diff * dIdy_patch[i][j]).z; |
||||
} |
||||
} |
||||
|
||||
reduce2(b1, b2, smem1, smem2, tid); |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
b1 = smem1[0]; |
||||
b2 = smem2[0]; |
||||
|
||||
float2 delta; |
||||
delta.x = A12 * b2 - A22 * b1; |
||||
delta.y = A12 * b1 - A11 * b2; |
||||
|
||||
nextPt.x += delta.x; |
||||
nextPt.y += delta.y; |
||||
|
||||
if (fabs(delta.x) < 0.01f && fabs(delta.y) < 0.01f) |
||||
break; |
||||
} |
||||
|
||||
float errval = 0.0f; |
||||
if (calcErr) |
||||
{ |
||||
for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i) |
||||
{ |
||||
for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j) |
||||
{ |
||||
float a = (nextPt.x + x + 0.5f); |
||||
float b = (nextPt.y + y + 0.5f); |
||||
|
||||
float4 I_val = I_patch[i][j]; |
||||
float4 J_val = read_imagef(J, sampler, (float2)(a, b)).x; |
||||
|
||||
float4 diff = J_val - I_val; |
||||
|
||||
errval += fabs(diff.x) + fabs(diff.y) + fabs(diff.z); |
||||
} |
||||
} |
||||
|
||||
reduce1(errval, smem1, tid); |
||||
} |
||||
|
||||
if (tid == 0) |
||||
{ |
||||
nextPt.x += c_halfWin_x; |
||||
nextPt.y += c_halfWin_y; |
||||
|
||||
nextPts[get_group_id(0)] = nextPt; |
||||
|
||||
//if (calcErr && !GET_MIN_EIGENVALS) |
||||
// err[get_group_id(0)] = errval; |
||||
} |
||||
} |
||||
|
||||
__kernel void lkDense_C1_D0(image2d_t I, image2d_t J, __global float* u, int uStep, __global float* v, int vStep, __global const float* prevU, int prevUStep, __global const float* prevV, int prevVStep, |
||||
const int rows, const int cols, /*__global float* err, int errStep, int cn,*/ int c_winSize_x, int c_winSize_y, int c_iters, char calcErr) |
||||
{ |
||||
int c_halfWin_x = (c_winSize_x - 1) / 2; |
||||
int c_halfWin_y = (c_winSize_y - 1) / 2; |
||||
|
||||
const int patchWidth = get_local_size(0) + 2 * c_halfWin_x; |
||||
const int patchHeight = get_local_size(1) + 2 * c_halfWin_y; |
||||
|
||||
__local int smem[8192]; |
||||
|
||||
__local int* I_patch = smem; |
||||
__local int* dIdx_patch = I_patch + patchWidth * patchHeight; |
||||
__local int* dIdy_patch = dIdx_patch + patchWidth * patchHeight; |
||||
|
||||
const int xBase = get_group_id(0) * get_local_size(0); |
||||
const int yBase = get_group_id(1) * get_local_size(1); |
||||
|
||||
sampler_t sampleri = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; |
||||
|
||||
for (int i = get_local_id(1); i < patchHeight; i += get_local_size(1)) |
||||
{ |
||||
for (int j = get_local_id(0); j < patchWidth; j += get_local_size(0)) |
||||
{ |
||||
float x = xBase - c_halfWin_x + j + 0.5f; |
||||
float y = yBase - c_halfWin_y + i + 0.5f; |
||||
|
||||
I_patch[i * patchWidth + j] = read_imagei(I, sampleri, (float2)(x, y)).x; |
||||
|
||||
// Sharr Deriv |
||||
|
||||
dIdx_patch[i * patchWidth + j] = 3 * read_imagei(I, sampleri, (float2)(x+1, y-1)).x + 10 * read_imagei(I, sampleri, (float2)(x+1, y)).x + 3 * read_imagei(I, sampleri, (float2)(x+1, y+1)).x - |
||||
(3 * read_imagei(I, sampleri, (float2)(x-1, y-1)).x + 10 * read_imagei(I, sampleri, (float2)(x-1, y)).x + 3 * read_imagei(I, sampleri, (float2)(x-1, y+1)).x); |
||||
|
||||
dIdy_patch[i * patchWidth + j] = 3 * read_imagei(I, sampleri, (float2)(x-1, y+1)).x + 10 * read_imagei(I, sampleri, (float2)(x, y+1)).x + 3 * read_imagei(I, sampleri, (float2)(x+1, y+1)).x - |
||||
(3 * read_imagei(I, sampleri, (float2)(x-1, y-1)).x + 10 * read_imagei(I, sampleri, (float2)(x, y-1)).x + 3 * read_imagei(I, sampleri, (float2)(x+1, y-1)).x); |
||||
} |
||||
} |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
// extract the patch from the first image, compute covariation matrix of derivatives |
||||
|
||||
const int x = get_global_id(0); |
||||
const int y = get_global_id(1); |
||||
|
||||
if (x >= cols || y >= rows) |
||||
return; |
||||
|
||||
int A11i = 0; |
||||
int A12i = 0; |
||||
int A22i = 0; |
||||
|
||||
for (int i = 0; i < c_winSize_y; ++i) |
||||
{ |
||||
for (int j = 0; j < c_winSize_x; ++j) |
||||
{ |
||||
int dIdx = dIdx_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)]; |
||||
int dIdy = dIdy_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)]; |
||||
|
||||
A11i += dIdx * dIdx; |
||||
A12i += dIdx * dIdy; |
||||
A22i += dIdy * dIdy; |
||||
} |
||||
} |
||||
|
||||
float A11 = A11i; |
||||
float A12 = A12i; |
||||
float A22 = A22i; |
||||
|
||||
float D = A11 * A22 - A12 * A12; |
||||
|
||||
//if (calcErr && GET_MIN_EIGENVALS) |
||||
// (err + y * errStep)[x] = minEig; |
||||
|
||||
if (D < 1.192092896e-07f) |
||||
{ |
||||
//if (calcErr) |
||||
// err(y, x) = 3.402823466e+38f; |
||||
|
||||
return; |
||||
} |
||||
|
||||
D = 1.f / D; |
||||
|
||||
A11 *= D; |
||||
A12 *= D; |
||||
A22 *= D; |
||||
|
||||
float2 nextPt; |
||||
nextPt.x = x + prevU[y/2 * prevUStep / 4 + x/2] * 2.0f; |
||||
nextPt.y = y + prevV[y/2 * prevVStep / 4 + x/2] * 2.0f; |
||||
|
||||
for (int k = 0; k < c_iters; ++k) |
||||
{ |
||||
if (nextPt.x < 0 || nextPt.x >= cols || nextPt.y < 0 || nextPt.y >= rows) |
||||
{ |
||||
//if (calcErr) |
||||
// err(y, x) = 3.402823466e+38f; |
||||
|
||||
return; |
||||
} |
||||
|
||||
int b1 = 0; |
||||
int b2 = 0; |
||||
|
||||
for (int i = 0; i < c_winSize_y; ++i) |
||||
{ |
||||
for (int j = 0; j < c_winSize_x; ++j) |
||||
{ |
||||
int iI = I_patch[(get_local_id(1) + i) * patchWidth + get_local_id(0) + j]; |
||||
int iJ = read_imagei(J, sampler, (float2)(nextPt.x - c_halfWin_x + j + 0.5f, nextPt.y - c_halfWin_y + i + 0.5f)).x; |
||||
|
||||
int diff = (iJ - iI) * 32; |
||||
|
||||
int dIdx = dIdx_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)]; |
||||
int dIdy = dIdy_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)]; |
||||
|
||||
b1 += diff * dIdx; |
||||
b2 += diff * dIdy; |
||||
} |
||||
} |
||||
|
||||
float2 delta; |
||||
delta.x = A12 * b2 - A22 * b1; |
||||
delta.y = A12 * b1 - A11 * b2; |
||||
|
||||
nextPt.x += delta.x; |
||||
nextPt.y += delta.y; |
||||
|
||||
if (fabs(delta.x) < 0.01f && fabs(delta.y) < 0.01f) |
||||
break; |
||||
} |
||||
|
||||
u[y * uStep / 4 + x] = nextPt.x - x; |
||||
v[y * vStep / 4 + x] = nextPt.y - y; |
||||
|
||||
if (calcErr) |
||||
{ |
||||
int errval = 0; |
||||
|
||||
for (int i = 0; i < c_winSize_y; ++i) |
||||
{ |
||||
for (int j = 0; j < c_winSize_x; ++j) |
||||
{ |
||||
int iI = I_patch[(get_local_id(1) + i) * patchWidth + get_local_id(0) + j]; |
||||
int iJ = read_imagei(J, sampler, (float2)(nextPt.x - c_halfWin_x + j + 0.5f, nextPt.y - c_halfWin_y + i + 0.5f)).x; |
||||
|
||||
errval += abs(iJ - iI); |
||||
} |
||||
} |
||||
|
||||
//err[y * errStep / 4 + x] = static_cast<float>(errval) / (c_winSize_x * c_winSize_y); |
||||
} |
||||
} |
@ -0,0 +1,597 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other GpuMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp" |
||||
|
||||
using namespace std; |
||||
using namespace cv; |
||||
using namespace cv::ocl; |
||||
|
||||
#if !defined (HAVE_OPENCL) |
||||
|
||||
void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat&, const oclMat&, const oclMat&, oclMat&, oclMat&, oclMat*) { } |
||||
void cv::ocl::PyrLKOpticalFlow::dense(const oclMat&, const oclMat&, oclMat&, oclMat&, oclMat*) { } |
||||
|
||||
#else /* !defined (HAVE_OPENCL) */ |
||||
|
||||
namespace cv |
||||
{ |
||||
namespace ocl |
||||
{ |
||||
///////////////////////////OpenCL kernel strings///////////////////////////
|
||||
extern const char *pyrlk; |
||||
|
||||
} |
||||
} |
||||
|
||||
struct dim3 |
||||
{ |
||||
unsigned int x, y, z; |
||||
}; |
||||
|
||||
struct float2 |
||||
{ |
||||
float x, y; |
||||
}; |
||||
|
||||
struct int2 |
||||
{ |
||||
int x, y; |
||||
}; |
||||
|
||||
void calcSharrDeriv_run(const oclMat& src, oclMat& dx_buf, oclMat& dy_buf, oclMat& dIdx, oclMat& dIdy, int cn) |
||||
{ |
||||
Context *clCxt = src.clCxt; |
||||
|
||||
string kernelName = "calcSharrDeriv_vertical"; |
||||
|
||||
size_t localThreads[3] = { 32, 8, 1 }; |
||||
size_t globalThreads[3] = { src.cols, src.rows, 1}; |
||||
|
||||
vector<pair<size_t , const void *> > args; |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data )); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step )); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows )); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols )); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cn )); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dx_buf.data )); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.step )); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dy_buf.data )); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.step )); |
||||
|
||||
openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, src.channels(), src.depth()); |
||||
|
||||
kernelName = "calcSharrDeriv_horizontal"; |
||||
|
||||
vector<pair<size_t , const void *> > args2; |
||||
args2.push_back( make_pair( sizeof(cl_int), (void *)&src.rows )); |
||||
args2.push_back( make_pair( sizeof(cl_int), (void *)&src.cols )); |
||||
args2.push_back( make_pair( sizeof(cl_int), (void *)&cn )); |
||||
args2.push_back( make_pair( sizeof(cl_mem), (void *)&dx_buf.data )); |
||||
args2.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.step )); |
||||
args2.push_back( make_pair( sizeof(cl_mem), (void *)&dy_buf.data )); |
||||
args2.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.step )); |
||||
args2.push_back( make_pair( sizeof(cl_mem), (void *)&dIdx.data )); |
||||
args2.push_back( make_pair( sizeof(cl_int), (void *)&dIdx.step )); |
||||
args2.push_back( make_pair( sizeof(cl_mem), (void *)&dIdy.data )); |
||||
args2.push_back( make_pair( sizeof(cl_int), (void *)&dIdy.step )); |
||||
|
||||
openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args2, src.channels(), src.depth()); |
||||
} |
||||
|
||||
|
||||
void cv::ocl::PyrLKOpticalFlow::calcSharrDeriv(const oclMat& src, oclMat& dIdx, oclMat& dIdy) |
||||
{ |
||||
CV_Assert(src.rows > 1 && src.cols > 1); |
||||
CV_Assert(src.depth() == CV_8U); |
||||
|
||||
const int cn = src.channels(); |
||||
|
||||
ensureSizeIsEnough(src.size(), CV_MAKETYPE(CV_16S, cn), dx_calcBuf_); |
||||
ensureSizeIsEnough(src.size(), CV_MAKETYPE(CV_16S, cn), dy_calcBuf_); |
||||
|
||||
calcSharrDeriv_run(src, dx_calcBuf_, dy_calcBuf_, dIdx, dIdy, cn); |
||||
} |
||||
|
||||
void cv::ocl::PyrLKOpticalFlow::buildImagePyramid(const oclMat& img0, vector<oclMat>& pyr, bool withBorder) |
||||
{ |
||||
pyr.resize(maxLevel + 1); |
||||
|
||||
Size sz = img0.size(); |
||||
|
||||
Mat img0Temp; |
||||
img0.download(img0Temp); |
||||
|
||||
Mat pyrTemp; |
||||
oclMat o; |
||||
|
||||
for (int level = 0; level <= maxLevel; ++level) |
||||
{ |
||||
oclMat temp; |
||||
|
||||
if (withBorder) |
||||
{ |
||||
temp.create(sz.height + winSize.height * 2, sz.width + winSize.width * 2, img0.type()); |
||||
} |
||||
else |
||||
{ |
||||
ensureSizeIsEnough(sz, img0.type(), pyr[level]); |
||||
} |
||||
|
||||
if (level == 0) |
||||
pyr[level] = img0Temp; |
||||
else |
||||
pyrDown(pyr[level - 1], pyr[level]); |
||||
|
||||
if (withBorder) |
||||
copyMakeBorder(pyr[level], temp, winSize.height, winSize.height, winSize.width, winSize.width, BORDER_REFLECT_101); |
||||
|
||||
sz = Size((sz.width + 1) / 2, (sz.height + 1) / 2); |
||||
|
||||
if (sz.width <= winSize.width || sz.height <= winSize.height) |
||||
{ |
||||
maxLevel = level; |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
|
||||
namespace |
||||
{ |
||||
void calcPatchSize(cv::Size winSize, int cn, dim3& block, dim3& patch, bool isDeviceArch11) |
||||
{ |
||||
winSize.width *= cn; |
||||
|
||||
if (winSize.width > 32 && winSize.width > 2 * winSize.height) |
||||
{ |
||||
block.x = isDeviceArch11 ? 16 : 32; |
||||
block.y = 8; |
||||
} |
||||
else |
||||
{ |
||||
block.x = 16; |
||||
block.y = isDeviceArch11 ? 8 : 16; |
||||
} |
||||
|
||||
patch.x = (winSize.width + block.x - 1) / block.x; |
||||
patch.y = (winSize.height + block.y - 1) / block.y; |
||||
|
||||
block.z = patch.z = 1; |
||||
} |
||||
} |
||||
|
||||
struct MultiplyScalar |
||||
{ |
||||
MultiplyScalar(double val_, double scale_) : val(val_), scale(scale_) {} |
||||
double operator ()(double a) const |
||||
{ |
||||
return (scale * a * val); |
||||
} |
||||
const double val; |
||||
const double scale; |
||||
}; |
||||
|
||||
void callF(const oclMat& src, oclMat& dst, MultiplyScalar op, int mask) |
||||
{ |
||||
Mat srcTemp; |
||||
Mat dstTemp; |
||||
src.download(srcTemp); |
||||
dst.download(dstTemp); |
||||
|
||||
int i; |
||||
int j; |
||||
int k; |
||||
for(i = 0; i < srcTemp.rows; i++) |
||||
{ |
||||
for(j = 0; j < srcTemp.cols; j++) |
||||
{ |
||||
for(k = 0; k < srcTemp.channels(); k++) |
||||
{ |
||||
((float*)dstTemp.data)[srcTemp.channels() * (i * srcTemp.rows + j) + k] = (float)op(((float*)srcTemp.data)[srcTemp.channels() * (i * srcTemp.rows + j) + k]); |
||||
} |
||||
} |
||||
} |
||||
|
||||
dst = dstTemp; |
||||
} |
||||
|
||||
static inline bool isAligned(const unsigned char* ptr, size_t size) |
||||
{ |
||||
return reinterpret_cast<size_t>(ptr) % size == 0; |
||||
} |
||||
|
||||
static inline bool isAligned(size_t step, size_t size) |
||||
{ |
||||
return step % size == 0; |
||||
} |
||||
|
||||
void callT(const oclMat& src, oclMat& dst, MultiplyScalar op, int mask) |
||||
{ |
||||
if (!isAligned(src.data, 4 * sizeof(double)) || !isAligned(src.step, 4 * sizeof(double)) ||
|
||||
!isAligned(dst.data, 4 * sizeof(double)) || !isAligned(dst.step, 4 * sizeof(double))) |
||||
{ |
||||
callF(src, dst, op, mask); |
||||
return; |
||||
} |
||||
|
||||
Mat srcTemp; |
||||
Mat dstTemp; |
||||
src.download(srcTemp); |
||||
dst.download(dstTemp); |
||||
|
||||
int x_shifted; |
||||
|
||||
int i; |
||||
int j; |
||||
for(i = 0; i < srcTemp.rows; i++) |
||||
{ |
||||
const double* srcRow = (const double*)srcTemp.data + i * srcTemp.rows; |
||||
double* dstRow = (double*)dstTemp.data + i * dstTemp.rows;; |
||||
|
||||
for(j = 0; j < srcTemp.cols; j++) |
||||
{ |
||||
x_shifted = j * 4; |
||||
|
||||
if(x_shifted + 4 - 1 < srcTemp.cols) |
||||
{ |
||||
dstRow[x_shifted ] = op(srcRow[x_shifted ]); |
||||
dstRow[x_shifted + 1] = op(srcRow[x_shifted + 1]); |
||||
dstRow[x_shifted + 2] = op(srcRow[x_shifted + 2]); |
||||
dstRow[x_shifted + 3] = op(srcRow[x_shifted + 3]); |
||||
} |
||||
else |
||||
{ |
||||
for (int real_x = x_shifted; real_x < srcTemp.cols; ++real_x) |
||||
{ |
||||
((float*)dstTemp.data)[i * srcTemp.rows + real_x] = op(((float*)srcTemp.data)[i * srcTemp.rows + real_x]); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
void multiply(const oclMat& src1, double val, oclMat& dst, double scale = 1.0f); |
||||
void multiply(const oclMat& src1, double val, oclMat& dst, double scale) |
||||
{ |
||||
MultiplyScalar op(val, scale); |
||||
//if(src1.channels() == 1 && dst.channels() == 1)
|
||||
//{
|
||||
// callT(src1, dst, op, 0);
|
||||
//}
|
||||
//else
|
||||
//{
|
||||
callF(src1, dst, op, 0); |
||||
//}
|
||||
} |
||||
|
||||
cl_mem bindTexture(const oclMat& mat, int depth, int channels) |
||||
{ |
||||
cl_mem texture; |
||||
cl_image_format format; |
||||
int err; |
||||
if(depth == 0) |
||||
{ |
||||
format.image_channel_data_type = CL_UNSIGNED_INT8; |
||||
} |
||||
else if(depth == 5) |
||||
{ |
||||
format.image_channel_data_type = CL_FLOAT; |
||||
} |
||||
if(channels == 1) |
||||
{ |
||||
format.image_channel_order = CL_R; |
||||
} |
||||
else if(channels == 3) |
||||
{ |
||||
format.image_channel_order = CL_RGB; |
||||
} |
||||
else if(channels == 4) |
||||
{ |
||||
format.image_channel_order = CL_RGBA; |
||||
} |
||||
#if CL_VERSION_1_2 |
||||
cl_image_desc desc; |
||||
desc.image_type = CL_MEM_OBJECT_IMAGE2D; |
||||
desc.image_width = mat.cols; |
||||
desc.image_height = mat.rows; |
||||
desc.image_depth = NULL; |
||||
desc.image_array_size = 1; |
||||
desc.image_row_pitch = 0; |
||||
desc.image_slice_pitch= 0; |
||||
desc.buffer = NULL; |
||||
desc.num_mip_levels = 0; |
||||
desc.num_samples = 0; |
||||
texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
|
||||
#else |
||||
texture = clCreateImage2D( |
||||
mat.clCxt->impl->clContext,
|
||||
CL_MEM_READ_WRITE,
|
||||
&format,
|
||||
mat.cols,
|
||||
mat.rows,
|
||||
0,
|
||||
NULL,
|
||||
&err); |
||||
#endif |
||||
size_t origin[] = { 0, 0, 0 };
|
||||
size_t region[] = { mat.cols, mat.rows, 1 };
|
||||
clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, texture, 0, origin, region, 0, NULL, 0); |
||||
openCLSafeCall(err); |
||||
|
||||
return texture; |
||||
} |
||||
|
||||
void lkSparse_run(oclMat& I, oclMat& J, |
||||
const oclMat& prevPts, oclMat& nextPts, oclMat& status, oclMat* err, bool GET_MIN_EIGENVALS, int ptcount,
|
||||
int level, dim3 block, dim3 patch, Size winSize, int iters) |
||||
{ |
||||
Context *clCxt = I.clCxt; |
||||
|
||||
string kernelName = "lkSparse"; |
||||
|
||||
size_t localThreads[3] = { 16, 16, 1 }; |
||||
size_t globalThreads[3] = { 16 * ptcount, 16, 1}; |
||||
|
||||
int cn = I.channels(); |
||||
|
||||
bool calcErr; |
||||
if (err) |
||||
{ |
||||
calcErr = true; |
||||
} |
||||
else |
||||
{ |
||||
calcErr = false; |
||||
} |
||||
calcErr = true; |
||||
|
||||
cl_mem ITex = bindTexture(I, I.depth(), cn); |
||||
cl_mem JTex = bindTexture(J, J.depth(), cn); |
||||
|
||||
vector<pair<size_t , const void *> > args; |
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&ITex )); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&JTex )); |
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&prevPts.data )); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&prevPts.step )); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&nextPts.data )); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&nextPts.step )); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&status.data )); |
||||
//args.push_back( make_pair( sizeof(cl_mem), (void *)&(err->data) ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&level )); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&I.rows )); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&I.cols )); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&patch.x )); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&patch.y )); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cn )); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.width )); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.height )); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&iters )); |
||||
args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr )); |
||||
args.push_back( make_pair( sizeof(cl_char), (void *)&GET_MIN_EIGENVALS )); |
||||
|
||||
openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.channels(), I.depth()); |
||||
} |
||||
|
||||
void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat& prevImg, const oclMat& nextImg, const oclMat& prevPts, oclMat& nextPts, oclMat& status, oclMat* err) |
||||
{ |
||||
if (prevPts.empty()) |
||||
{ |
||||
nextPts.release(); |
||||
status.release(); |
||||
if (err) err->release(); |
||||
return; |
||||
} |
||||
|
||||
derivLambda = std::min(std::max(derivLambda, 0.0), 1.0); |
||||
|
||||
iters = std::min(std::max(iters, 0), 100); |
||||
|
||||
const int cn = prevImg.channels(); |
||||
|
||||
dim3 block, patch; |
||||
calcPatchSize(winSize, cn, block, patch, isDeviceArch11_);
|
||||
|
||||
CV_Assert(derivLambda >= 0); |
||||
CV_Assert(maxLevel >= 0 && winSize.width > 2 && winSize.height > 2); |
||||
CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type()); |
||||
CV_Assert(patch.x > 0 && patch.x < 6 && patch.y > 0 && patch.y < 6); |
||||
CV_Assert(prevPts.rows == 1 && prevPts.type() == CV_32FC2); |
||||
|
||||
if (useInitialFlow) |
||||
CV_Assert(nextPts.size() == prevPts.size() && nextPts.type() == CV_32FC2); |
||||
else |
||||
ensureSizeIsEnough(1, prevPts.cols, prevPts.type(), nextPts); |
||||
|
||||
oclMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1); |
||||
oclMat temp2 = nextPts.reshape(1); |
||||
//oclMat scalar(temp1.rows, temp1.cols, temp1.type(), Scalar(1.0f / (1 << maxLevel) / 2.0f));
|
||||
//ocl::multiply(temp1, scalar, temp2);
|
||||
::multiply(temp1, 1.0f / (1 << maxLevel) / 2.0f, temp2); |
||||
|
||||
ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status); |
||||
status.setTo(Scalar::all(1)); |
||||
|
||||
if (err) |
||||
ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err); |
||||
|
||||
// build the image pyramids.
|
||||
|
||||
prevPyr_.resize(maxLevel + 1); |
||||
nextPyr_.resize(maxLevel + 1); |
||||
|
||||
if (cn == 1 || cn == 4) |
||||
{ |
||||
prevImg.convertTo(prevPyr_[0], CV_32F); |
||||
nextImg.convertTo(nextPyr_[0], CV_32F); |
||||
} |
||||
else |
||||
{ |
||||
oclMat buf_; |
||||
cvtColor(prevImg, buf_, COLOR_BGR2BGRA); |
||||
buf_.convertTo(prevPyr_[0], CV_32F); |
||||
|
||||
cvtColor(nextImg, buf_, COLOR_BGR2BGRA); |
||||
buf_.convertTo(nextPyr_[0], CV_32F); |
||||
} |
||||
|
||||
for (int level = 1; level <= maxLevel; ++level) |
||||
{ |
||||
pyrDown(prevPyr_[level - 1], prevPyr_[level]); |
||||
pyrDown(nextPyr_[level - 1], nextPyr_[level]); |
||||
} |
||||
|
||||
// dI/dx ~ Ix, dI/dy ~ Iy
|
||||
|
||||
for (int level = maxLevel; level >= 0; level--) |
||||
{ |
||||
lkSparse_run(prevPyr_[level], nextPyr_[level],
|
||||
prevPts, nextPts, status, level == 0 && err ? err : 0, getMinEigenVals, prevPts.cols, |
||||
level, block, patch, winSize, iters); |
||||
} |
||||
} |
||||
|
||||
void lkDense_run(oclMat& I, oclMat& J, oclMat& u, oclMat& v,
|
||||
oclMat& prevU, oclMat& prevV, oclMat* err, Size winSize, int iters) |
||||
{ |
||||
Context *clCxt = I.clCxt; |
||||
|
||||
string kernelName = "lkDense"; |
||||
|
||||
size_t localThreads[3] = { 16, 16, 1 }; |
||||
size_t globalThreads[3] = { I.cols, I.rows, 1}; |
||||
|
||||
int cn = I.channels(); |
||||
|
||||
bool calcErr; |
||||
if (err) |
||||
{ |
||||
calcErr = true; |
||||
} |
||||
else |
||||
{ |
||||
calcErr = false; |
||||
} |
||||
|
||||
cl_mem ITex = bindTexture(I, I.depth(), cn); |
||||
cl_mem JTex = bindTexture(J, J.depth(), cn); |
||||
|
||||
int2 halfWin = {(winSize.width - 1) / 2, (winSize.height - 1) / 2}; |
||||
const int patchWidth = 16 + 2 * halfWin.x; |
||||
const int patchHeight = 16 + 2 * halfWin.y; |
||||
size_t smem_size = 3 * patchWidth * patchHeight * sizeof(int); |
||||
|
||||
vector<pair<size_t , const void *> > args; |
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&ITex )); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&JTex )); |
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&u.data )); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&u.step )); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&v.data )); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&v.step )); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&prevU.data )); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&prevU.step )); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&prevV.data )); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&prevV.step )); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&I.rows )); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&I.cols )); |
||||
//args.push_back( make_pair( sizeof(cl_mem), (void *)&(*err).data ));
|
||||
//args.push_back( make_pair( sizeof(cl_int), (void *)&(*err).step ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.width )); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.height )); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&iters )); |
||||
args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr )); |
||||
|
||||
openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.channels(), I.depth()); |
||||
} |
||||
|
||||
void cv::ocl::PyrLKOpticalFlow::dense(const oclMat& prevImg, const oclMat& nextImg, oclMat& u, oclMat& v, oclMat* err) |
||||
{ |
||||
CV_Assert(prevImg.type() == CV_8UC1); |
||||
CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type()); |
||||
CV_Assert(maxLevel >= 0); |
||||
CV_Assert(winSize.width > 2 && winSize.height > 2); |
||||
|
||||
if (err) |
||||
err->create(prevImg.size(), CV_32FC1); |
||||
|
||||
prevPyr_.resize(maxLevel + 1); |
||||
nextPyr_.resize(maxLevel + 1); |
||||
|
||||
prevPyr_[0] = prevImg; |
||||
nextImg.convertTo(nextPyr_[0], CV_32F); |
||||
|
||||
for (int level = 1; level <= maxLevel; ++level) |
||||
{ |
||||
pyrDown(prevPyr_[level - 1], prevPyr_[level]); |
||||
pyrDown(nextPyr_[level - 1], nextPyr_[level]); |
||||
} |
||||
|
||||
ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[0]); |
||||
ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[0]); |
||||
ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[1]); |
||||
ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[1]); |
||||
uPyr_[1].setTo(Scalar::all(0)); |
||||
vPyr_[1].setTo(Scalar::all(0)); |
||||
|
||||
Size winSize2i(winSize.width, winSize.height); |
||||
|
||||
int idx = 0; |
||||
|
||||
for (int level = maxLevel; level >= 0; level--) |
||||
{ |
||||
int idx2 = (idx + 1) & 1; |
||||
|
||||
lkDense_run(prevPyr_[level], nextPyr_[level], uPyr_[idx], vPyr_[idx], uPyr_[idx2], vPyr_[idx2], |
||||
level == 0 ? err : 0, winSize2i, iters); |
||||
|
||||
if (level > 0) |
||||
idx = idx2; |
||||
} |
||||
|
||||
uPyr_[idx].copyTo(u); |
||||
vPyr_[idx].copyTo(v); |
||||
} |
||||
|
||||
#endif /* !defined (HAVE_CUDA) */ |
@ -0,0 +1,167 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// Intel License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of Intel Corporation may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp" |
||||
#include <iomanip> |
||||
|
||||
#ifdef HAVE_OPENCL |
||||
|
||||
using namespace cv; |
||||
using namespace cv::ocl; |
||||
using namespace cvtest; |
||||
using namespace testing; |
||||
using namespace std; |
||||
|
||||
//#define DUMP
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// BroxOpticalFlow
|
||||
|
||||
#define BROX_OPTICAL_FLOW_DUMP_FILE "opticalflow/brox_optical_flow.bin" |
||||
#define BROX_OPTICAL_FLOW_DUMP_FILE_CC20 "opticalflow/brox_optical_flow_cc20.bin" |
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// PyrLKOpticalFlow
|
||||
|
||||
//IMPLEMENT_PARAM_CLASS(UseGray, bool)
|
||||
|
||||
PARAM_TEST_CASE(Sparse, bool, bool) |
||||
{ |
||||
bool useGray; |
||||
bool UseSmart; |
||||
|
||||
virtual void SetUp() |
||||
{ |
||||
UseSmart = GET_PARAM(0); |
||||
useGray = GET_PARAM(0); |
||||
} |
||||
}; |
||||
|
||||
TEST_P(Sparse, Mat) |
||||
{ |
||||
cv::Mat frame0 = readImage("../../../samples/gpu/rubberwhale1.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR); |
||||
ASSERT_FALSE(frame0.empty()); |
||||
|
||||
cv::Mat frame1 = readImage("../../../samples/gpu/rubberwhale2.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR); |
||||
ASSERT_FALSE(frame1.empty()); |
||||
|
||||
cv::Mat gray_frame; |
||||
if (useGray) |
||||
gray_frame = frame0; |
||||
else |
||||
cv::cvtColor(frame0, gray_frame, cv::COLOR_BGR2GRAY); |
||||
|
||||
std::vector<cv::Point2f> pts; |
||||
cv::goodFeaturesToTrack(gray_frame, pts, 1000, 0.01, 0.0); |
||||
|
||||
cv::ocl::oclMat d_pts; |
||||
cv::Mat pts_mat(1, (int)pts.size(), CV_32FC2, (void*)&pts[0]); |
||||
d_pts.upload(pts_mat); |
||||
|
||||
cv::ocl::PyrLKOpticalFlow pyrLK; |
||||
|
||||
cv::ocl::oclMat oclFrame0; |
||||
cv::ocl::oclMat oclFrame1; |
||||
cv::ocl::oclMat d_nextPts; |
||||
cv::ocl::oclMat d_status; |
||||
cv::ocl::oclMat d_err; |
||||
|
||||
oclFrame0 = frame0; |
||||
oclFrame1 = frame1; |
||||
|
||||
pyrLK.sparse(oclFrame0, oclFrame1, d_pts, d_nextPts, d_status, &d_err); |
||||
|
||||
std::vector<cv::Point2f> nextPts(d_nextPts.cols); |
||||
cv::Mat nextPts_mat(1, d_nextPts.cols, CV_32FC2, (void*)&nextPts[0]); |
||||
d_nextPts.download(nextPts_mat); |
||||
|
||||
std::vector<unsigned char> status(d_status.cols); |
||||
cv::Mat status_mat(1, d_status.cols, CV_8UC1, (void*)&status[0]); |
||||
d_status.download(status_mat); |
||||
|
||||
std::vector<float> err(d_err.cols); |
||||
cv::Mat err_mat(1, d_err.cols, CV_32FC1, (void*)&err[0]); |
||||
d_err.download(err_mat); |
||||
|
||||
std::vector<cv::Point2f> nextPts_gold; |
||||
std::vector<unsigned char> status_gold; |
||||
std::vector<float> err_gold; |
||||
cv::calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts_gold, status_gold, err_gold); |
||||
|
||||
ASSERT_EQ(nextPts_gold.size(), nextPts.size()); |
||||
ASSERT_EQ(status_gold.size(), status.size()); |
||||
|
||||
size_t mistmatch = 0; |
||||
for (size_t i = 0; i < nextPts.size(); ++i) |
||||
{ |
||||
if (status[i] != status_gold[i]) |
||||
{ |
||||
++mistmatch; |
||||
continue; |
||||
} |
||||
|
||||
if (status[i]) |
||||
{ |
||||
cv::Point2i a = nextPts[i]; |
||||
cv::Point2i b = nextPts_gold[i]; |
||||
|
||||
bool eq = std::abs(a.x - b.x) < 1 && std::abs(a.y - b.y) < 1; |
||||
//float errdiff = std::abs(err[i] - err_gold[i]);
|
||||
float errdiff = 0.0f; |
||||
|
||||
if (!eq || errdiff > 1e-1) |
||||
++mistmatch; |
||||
} |
||||
} |
||||
|
||||
double bad_ratio = static_cast<double>(mistmatch) / (nextPts.size() * 2); |
||||
|
||||
ASSERT_LE(bad_ratio, 0.05f); |
||||
|
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(Video, Sparse, Combine( |
||||
Values(false, true),
|
||||
Values(false))); |
||||
|
||||
#endif // HAVE_OPENCL
|
||||
|
Loading…
Reference in new issue