|
|
|
@ -40,7 +40,7 @@ |
|
|
|
|
// |
|
|
|
|
// Copyright (c) 2010, Paul Furgale, Chi Hay Tong |
|
|
|
|
// |
|
|
|
|
// The original code was written by Paul Furgale and Chi Hay Tong |
|
|
|
|
// The original code was written by Paul Furgale and Chi Hay Tong |
|
|
|
|
// and later optimized and prepared for integration into OpenCV by Itseez. |
|
|
|
|
// |
|
|
|
|
//M*/ |
|
|
|
@ -52,9 +52,9 @@ |
|
|
|
|
#include "opencv2/gpu/device/functional.hpp" |
|
|
|
|
#include "opencv2/gpu/device/filters.hpp" |
|
|
|
|
|
|
|
|
|
namespace cv { namespace gpu { namespace device |
|
|
|
|
namespace cv { namespace gpu { namespace device |
|
|
|
|
{ |
|
|
|
|
namespace surf |
|
|
|
|
namespace surf |
|
|
|
|
{ |
|
|
|
|
//////////////////////////////////////////////////////////////////////// |
|
|
|
|
// Global parameters |
|
|
|
@ -123,7 +123,7 @@ namespace cv { namespace gpu { namespace device |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
float ratio = (float)newSize / oldSize; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
real_t d = 0; |
|
|
|
|
|
|
|
|
|
#pragma unroll |
|
|
|
@ -225,7 +225,7 @@ namespace cv { namespace gpu { namespace device |
|
|
|
|
static __device__ bool check(int sum_i, int sum_j, int size) |
|
|
|
|
{ |
|
|
|
|
float ratio = (float)size / 9.0f; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
float d = 0; |
|
|
|
|
|
|
|
|
|
int dx1 = __float2int_rn(ratio * c_DM[0]); |
|
|
|
@ -423,12 +423,12 @@ namespace cv { namespace gpu { namespace device |
|
|
|
|
if (::fabs(x[0]) <= 1.f && ::fabs(x[1]) <= 1.f && ::fabs(x[2]) <= 1.f) |
|
|
|
|
{ |
|
|
|
|
// if the step is within the interpolation region, perform it |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const int size = calcSize(c_octave, maxPos.z); |
|
|
|
|
|
|
|
|
|
const int sum_i = (maxPos.y - ((size >> 1) >> c_octave)) << c_octave; |
|
|
|
|
const int sum_j = (maxPos.x - ((size >> 1) >> c_octave)) << c_octave; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const float center_i = sum_i + (float)(size - 1) / 2; |
|
|
|
|
const float center_j = sum_j + (float)(size - 1) / 2; |
|
|
|
|
|
|
|
|
@ -471,8 +471,8 @@ namespace cv { namespace gpu { namespace device |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void icvInterpolateKeypoint_gpu(const PtrStepf& det, const int4* maxPosBuffer, unsigned int maxCounter, |
|
|
|
|
float* featureX, float* featureY, int* featureLaplacian, int* featureOctave, float* featureSize, float* featureHessian, |
|
|
|
|
void icvInterpolateKeypoint_gpu(const PtrStepf& det, const int4* maxPosBuffer, unsigned int maxCounter, |
|
|
|
|
float* featureX, float* featureY, int* featureLaplacian, int* featureOctave, float* featureSize, float* featureHessian, |
|
|
|
|
unsigned int* featureCounter) |
|
|
|
|
{ |
|
|
|
|
dim3 threads; |
|
|
|
@ -509,7 +509,8 @@ namespace cv { namespace gpu { namespace device |
|
|
|
|
__shared__ float s_Y[128]; |
|
|
|
|
__shared__ float s_angle[128]; |
|
|
|
|
|
|
|
|
|
__shared__ float s_sum[32 * 4]; |
|
|
|
|
__shared__ float s_sumx[32 * 4]; |
|
|
|
|
__shared__ float s_sumy[32 * 4]; |
|
|
|
|
|
|
|
|
|
/* The sampling intervals and wavelet sized for selecting an orientation |
|
|
|
|
and building the keypoint descriptor are defined relative to 's' */ |
|
|
|
@ -522,126 +523,109 @@ namespace cv { namespace gpu { namespace device |
|
|
|
|
const int grad_wav_size = 2 * __float2int_rn(2.0f * s); |
|
|
|
|
|
|
|
|
|
// check when grad_wav_size is too big |
|
|
|
|
if ((c_img_rows + 1) >= grad_wav_size && (c_img_cols + 1) >= grad_wav_size) |
|
|
|
|
{ |
|
|
|
|
// Calc X, Y, angle and store it to shared memory |
|
|
|
|
const int tid = threadIdx.y * blockDim.x + threadIdx.x; |
|
|
|
|
|
|
|
|
|
float X = 0.0f, Y = 0.0f, angle = 0.0f; |
|
|
|
|
if ((c_img_rows + 1) < grad_wav_size || (c_img_cols + 1) < grad_wav_size) |
|
|
|
|
return; |
|
|
|
|
|
|
|
|
|
if (tid < ORI_SAMPLES) |
|
|
|
|
{ |
|
|
|
|
const float margin = (float)(grad_wav_size - 1) / 2.0f; |
|
|
|
|
const int x = __float2int_rn(featureX[blockIdx.x] + c_aptX[tid] * s - margin); |
|
|
|
|
const int y = __float2int_rn(featureY[blockIdx.x] + c_aptY[tid] * s - margin); |
|
|
|
|
// Calc X, Y, angle and store it to shared memory |
|
|
|
|
const int tid = threadIdx.y * blockDim.x + threadIdx.x; |
|
|
|
|
|
|
|
|
|
if ((unsigned)y < (unsigned)((c_img_rows + 1) - grad_wav_size) && (unsigned)x < (unsigned)((c_img_cols + 1) - grad_wav_size)) |
|
|
|
|
{ |
|
|
|
|
X = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NX, 4, grad_wav_size, y, x); |
|
|
|
|
Y = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NY, 4, grad_wav_size, y, x); |
|
|
|
|
|
|
|
|
|
angle = atan2f(Y, X); |
|
|
|
|
if (angle < 0) |
|
|
|
|
angle += 2.0f * CV_PI_F; |
|
|
|
|
angle *= 180.0f / CV_PI_F; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
s_X[tid] = X; |
|
|
|
|
s_Y[tid] = Y; |
|
|
|
|
s_angle[tid] = angle; |
|
|
|
|
__syncthreads(); |
|
|
|
|
float X = 0.0f, Y = 0.0f, angle = 0.0f; |
|
|
|
|
|
|
|
|
|
float bestx = 0, besty = 0, best_mod = 0; |
|
|
|
|
if (tid < ORI_SAMPLES) |
|
|
|
|
{ |
|
|
|
|
const float margin = (float)(grad_wav_size - 1) / 2.0f; |
|
|
|
|
const int x = __float2int_rn(featureX[blockIdx.x] + c_aptX[tid] * s - margin); |
|
|
|
|
const int y = __float2int_rn(featureY[blockIdx.x] + c_aptY[tid] * s - margin); |
|
|
|
|
|
|
|
|
|
#pragma unroll |
|
|
|
|
for (int i = 0; i < 18; ++i) |
|
|
|
|
if (y >= 0 && y < (c_img_rows + 1) - grad_wav_size && |
|
|
|
|
x >= 0 && x < (c_img_cols + 1) - grad_wav_size) |
|
|
|
|
{ |
|
|
|
|
const int dir = (i * 4 + threadIdx.y) * ORI_SEARCH_INC; |
|
|
|
|
X = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NX, 4, grad_wav_size, y, x); |
|
|
|
|
Y = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NY, 4, grad_wav_size, y, x); |
|
|
|
|
|
|
|
|
|
float sumx = 0.0f, sumy = 0.0f; |
|
|
|
|
int d = ::abs(__float2int_rn(s_angle[threadIdx.x]) - dir); |
|
|
|
|
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2) |
|
|
|
|
{ |
|
|
|
|
sumx = s_X[threadIdx.x]; |
|
|
|
|
sumy = s_Y[threadIdx.x]; |
|
|
|
|
} |
|
|
|
|
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 32]) - dir); |
|
|
|
|
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2) |
|
|
|
|
{ |
|
|
|
|
sumx += s_X[threadIdx.x + 32]; |
|
|
|
|
sumy += s_Y[threadIdx.x + 32]; |
|
|
|
|
} |
|
|
|
|
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 64]) - dir); |
|
|
|
|
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2) |
|
|
|
|
{ |
|
|
|
|
sumx += s_X[threadIdx.x + 64]; |
|
|
|
|
sumy += s_Y[threadIdx.x + 64]; |
|
|
|
|
} |
|
|
|
|
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 96]) - dir); |
|
|
|
|
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2) |
|
|
|
|
{ |
|
|
|
|
sumx += s_X[threadIdx.x + 96]; |
|
|
|
|
sumy += s_Y[threadIdx.x + 96]; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
float* s_sum_row = s_sum + threadIdx.y * 32; |
|
|
|
|
angle = atan2f(Y, X); |
|
|
|
|
if (angle < 0) |
|
|
|
|
angle += 2.0f * CV_PI_F; |
|
|
|
|
angle *= 180.0f / CV_PI_F; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
s_X[tid] = X; |
|
|
|
|
s_Y[tid] = Y; |
|
|
|
|
s_angle[tid] = angle; |
|
|
|
|
__syncthreads(); |
|
|
|
|
|
|
|
|
|
device::reduce<32>(s_sum_row, sumx, threadIdx.x, plus<volatile float>()); |
|
|
|
|
device::reduce<32>(s_sum_row, sumy, threadIdx.x, plus<volatile float>()); |
|
|
|
|
float bestx = 0, besty = 0, best_mod = 0; |
|
|
|
|
|
|
|
|
|
const float temp_mod = sumx * sumx + sumy * sumy; |
|
|
|
|
if (temp_mod > best_mod) |
|
|
|
|
{ |
|
|
|
|
best_mod = temp_mod; |
|
|
|
|
bestx = sumx; |
|
|
|
|
besty = sumy; |
|
|
|
|
} |
|
|
|
|
#pragma unroll |
|
|
|
|
for (int i = 0; i < 18; ++i) |
|
|
|
|
{ |
|
|
|
|
const int dir = (i * 4 + threadIdx.y) * ORI_SEARCH_INC; |
|
|
|
|
|
|
|
|
|
__syncthreads(); |
|
|
|
|
float sumx = 0.0f, sumy = 0.0f; |
|
|
|
|
int d = ::abs(__float2int_rn(s_angle[threadIdx.x]) - dir); |
|
|
|
|
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2) |
|
|
|
|
{ |
|
|
|
|
sumx = s_X[threadIdx.x]; |
|
|
|
|
sumy = s_Y[threadIdx.x]; |
|
|
|
|
} |
|
|
|
|
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 32]) - dir); |
|
|
|
|
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2) |
|
|
|
|
{ |
|
|
|
|
sumx += s_X[threadIdx.x + 32]; |
|
|
|
|
sumy += s_Y[threadIdx.x + 32]; |
|
|
|
|
} |
|
|
|
|
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 64]) - dir); |
|
|
|
|
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2) |
|
|
|
|
{ |
|
|
|
|
sumx += s_X[threadIdx.x + 64]; |
|
|
|
|
sumy += s_Y[threadIdx.x + 64]; |
|
|
|
|
} |
|
|
|
|
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 96]) - dir); |
|
|
|
|
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2) |
|
|
|
|
{ |
|
|
|
|
sumx += s_X[threadIdx.x + 96]; |
|
|
|
|
sumy += s_Y[threadIdx.x + 96]; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (threadIdx.x == 0) |
|
|
|
|
device::reduce<32>(s_sumx + threadIdx.y * 32, sumx, threadIdx.x, plus<volatile float>()); |
|
|
|
|
device::reduce<32>(s_sumy + threadIdx.y * 32, sumy, threadIdx.x, plus<volatile float>()); |
|
|
|
|
|
|
|
|
|
const float temp_mod = sumx * sumx + sumy * sumy; |
|
|
|
|
if (temp_mod > best_mod) |
|
|
|
|
{ |
|
|
|
|
s_X[threadIdx.y] = bestx; |
|
|
|
|
s_Y[threadIdx.y] = besty; |
|
|
|
|
s_angle[threadIdx.y] = best_mod; |
|
|
|
|
best_mod = temp_mod; |
|
|
|
|
bestx = sumx; |
|
|
|
|
besty = sumy; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__syncthreads(); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (threadIdx.x < 2 && threadIdx.y == 0) |
|
|
|
|
{ |
|
|
|
|
volatile float* v_x = s_X; |
|
|
|
|
volatile float* v_y = s_Y; |
|
|
|
|
volatile float* v_mod = s_angle; |
|
|
|
|
if (threadIdx.x == 0) |
|
|
|
|
{ |
|
|
|
|
s_X[threadIdx.y] = bestx; |
|
|
|
|
s_Y[threadIdx.y] = besty; |
|
|
|
|
s_angle[threadIdx.y] = best_mod; |
|
|
|
|
} |
|
|
|
|
__syncthreads(); |
|
|
|
|
|
|
|
|
|
bestx = v_x[threadIdx.x]; |
|
|
|
|
besty = v_y[threadIdx.x]; |
|
|
|
|
best_mod = v_mod[threadIdx.x]; |
|
|
|
|
if (threadIdx.x == 0 && threadIdx.y == 0) |
|
|
|
|
{ |
|
|
|
|
int bestIdx = 0; |
|
|
|
|
|
|
|
|
|
float temp_mod = v_mod[threadIdx.x + 2]; |
|
|
|
|
if (temp_mod > best_mod) |
|
|
|
|
{ |
|
|
|
|
v_x[threadIdx.x] = bestx = v_x[threadIdx.x + 2]; |
|
|
|
|
v_y[threadIdx.x] = besty = v_y[threadIdx.x + 2]; |
|
|
|
|
v_mod[threadIdx.x] = best_mod = temp_mod; |
|
|
|
|
} |
|
|
|
|
temp_mod = v_mod[threadIdx.x + 1]; |
|
|
|
|
if (temp_mod > best_mod) |
|
|
|
|
{ |
|
|
|
|
v_x[threadIdx.x] = bestx = v_x[threadIdx.x + 1]; |
|
|
|
|
v_y[threadIdx.x] = besty = v_y[threadIdx.x + 1]; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
if (s_angle[1] > s_angle[bestIdx]) |
|
|
|
|
bestIdx = 1; |
|
|
|
|
if (s_angle[2] > s_angle[bestIdx]) |
|
|
|
|
bestIdx = 2; |
|
|
|
|
if (s_angle[3] > s_angle[bestIdx]) |
|
|
|
|
bestIdx = 3; |
|
|
|
|
|
|
|
|
|
if (threadIdx.x == 0 && threadIdx.y == 0 && best_mod != 0) |
|
|
|
|
{ |
|
|
|
|
float kp_dir = atan2f(besty, bestx); |
|
|
|
|
if (kp_dir < 0) |
|
|
|
|
kp_dir += 2.0f * CV_PI_F; |
|
|
|
|
kp_dir *= 180.0f / CV_PI_F; |
|
|
|
|
float kp_dir = atan2f(s_Y[bestIdx], s_X[bestIdx]); |
|
|
|
|
if (kp_dir < 0) |
|
|
|
|
kp_dir += 2.0f * CV_PI_F; |
|
|
|
|
kp_dir *= 180.0f / CV_PI_F; |
|
|
|
|
|
|
|
|
|
featureDir[blockIdx.x] = kp_dir; |
|
|
|
|
} |
|
|
|
|
featureDir[blockIdx.x] = kp_dir; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -649,7 +633,7 @@ namespace cv { namespace gpu { namespace device |
|
|
|
|
#undef ORI_WIN |
|
|
|
|
#undef ORI_SAMPLES |
|
|
|
|
|
|
|
|
|
void icvCalcOrientation_gpu(const float* featureX, const float* featureY, const float* featureSize, float* featureDir, int nFeatures) |
|
|
|
|
void icvCalcOrientation_gpu(const float* featureX, const float* featureY, const float* featureSize, float* featureDir, int nFeatures) |
|
|
|
|
{ |
|
|
|
|
dim3 threads; |
|
|
|
|
threads.x = 32; |
|
|
|
@ -669,27 +653,27 @@ namespace cv { namespace gpu { namespace device |
|
|
|
|
|
|
|
|
|
#define PATCH_SZ 20 |
|
|
|
|
|
|
|
|
|
__constant__ float c_DW[PATCH_SZ * PATCH_SZ] = |
|
|
|
|
__constant__ float c_DW[PATCH_SZ * PATCH_SZ] = |
|
|
|
|
{ |
|
|
|
|
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f, |
|
|
|
|
8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f, |
|
|
|
|
1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f, |
|
|
|
|
3.34794785885606e-005f, 7.650675252079964e-005f, 0.0001594926579855382f, 0.0003033203247468919f, 0.0005262380582280457f, 0.0008328807889483869f, 0.001202550483867526f, 0.001583957928232849f, 0.001903285388834775f, 0.002086334861814976f, 0.002086334861814976f, 0.001903285388834775f, 0.001583957928232849f, 0.001202550483867526f, 0.0008328807889483869f, 0.0005262380582280457f, 0.0003033203247468919f, 0.0001594926579855382f, 7.650675252079964e-005f, 3.34794785885606e-005f, |
|
|
|
|
5.808438800158911e-005f, 0.0001327334903180599f, 0.0002767078403849155f, 0.0005262380582280457f, 0.0009129836107604206f, 0.001444985857233405f, 0.002086335094645619f, 0.002748048631474376f, 0.00330205773934722f, 0.003619635012000799f, 0.003619635012000799f, 0.00330205773934722f, 0.002748048631474376f, 0.002086335094645619f, 0.001444985857233405f, 0.0009129836107604206f, 0.0005262380582280457f, 0.0002767078403849155f, 0.0001327334903180599f, 5.808438800158911e-005f, |
|
|
|
|
9.193058212986216e-005f, 0.0002100782585330308f, 0.0004379475140012801f, 0.0008328807889483869f, 0.001444985857233405f, 0.002286989474669099f, 0.00330205773934722f, 0.004349356517195702f, 0.00522619066759944f, 0.005728822201490402f, 0.005728822201490402f, 0.00522619066759944f, 0.004349356517195702f, 0.00330205773934722f, 0.002286989474669099f, 0.001444985857233405f, 0.0008328807889483869f, 0.0004379475140012801f, 0.0002100782585330308f, 9.193058212986216e-005f, |
|
|
|
|
0.0001327334757661447f, 0.0003033203829545528f, 0.0006323281559161842f, 0.001202550483867526f, 0.002086335094645619f, 0.00330205773934722f, 0.004767658654600382f, 0.006279794964939356f, 0.007545807864516974f, 0.008271530270576477f, 0.008271530270576477f, 0.007545807864516974f, 0.006279794964939356f, 0.004767658654600382f, 0.00330205773934722f, 0.002086335094645619f, 0.001202550483867526f, 0.0006323281559161842f, 0.0003033203829545528f, 0.0001327334757661447f, |
|
|
|
|
0.0001748319627949968f, 0.0003995231236331165f, 0.0008328808471560478f, 0.001583957928232849f, 0.002748048631474376f, 0.004349356517195702f, 0.006279794964939356f, 0.008271529339253902f, 0.009939077310264111f, 0.01089497376233339f, 0.01089497376233339f, 0.009939077310264111f, 0.008271529339253902f, 0.006279794964939356f, 0.004349356517195702f, 0.002748048631474376f, 0.001583957928232849f, 0.0008328808471560478f, 0.0003995231236331165f, 0.0001748319627949968f, |
|
|
|
|
0.0002100782439811155f, 0.0004800673632416874f, 0.001000790391117334f, 0.001903285388834775f, 0.00330205773934722f, 0.00522619066759944f, 0.007545807864516974f, 0.009939077310264111f, 0.01194280479103327f, 0.01309141051024199f, 0.01309141051024199f, 0.01194280479103327f, 0.009939077310264111f, 0.007545807864516974f, 0.00522619066759944f, 0.00330205773934722f, 0.001903285388834775f, 0.001000790391117334f, 0.0004800673632416874f, 0.0002100782439811155f, |
|
|
|
|
0.0002302826324012131f, 0.0005262381164357066f, 0.001097041997127235f, 0.002086334861814976f, 0.003619635012000799f, 0.005728822201490402f, 0.008271530270576477f, 0.01089497376233339f, 0.01309141051024199f, 0.01435048412531614f, 0.01435048412531614f, 0.01309141051024199f, 0.01089497376233339f, 0.008271530270576477f, 0.005728822201490402f, 0.003619635012000799f, 0.002086334861814976f, 0.001097041997127235f, 0.0005262381164357066f, 0.0002302826324012131f, |
|
|
|
|
0.0002302826324012131f, 0.0005262381164357066f, 0.001097041997127235f, 0.002086334861814976f, 0.003619635012000799f, 0.005728822201490402f, 0.008271530270576477f, 0.01089497376233339f, 0.01309141051024199f, 0.01435048412531614f, 0.01435048412531614f, 0.01309141051024199f, 0.01089497376233339f, 0.008271530270576477f, 0.005728822201490402f, 0.003619635012000799f, 0.002086334861814976f, 0.001097041997127235f, 0.0005262381164357066f, 0.0002302826324012131f, |
|
|
|
|
0.0002100782439811155f, 0.0004800673632416874f, 0.001000790391117334f, 0.001903285388834775f, 0.00330205773934722f, 0.00522619066759944f, 0.007545807864516974f, 0.009939077310264111f, 0.01194280479103327f, 0.01309141051024199f, 0.01309141051024199f, 0.01194280479103327f, 0.009939077310264111f, 0.007545807864516974f, 0.00522619066759944f, 0.00330205773934722f, 0.001903285388834775f, 0.001000790391117334f, 0.0004800673632416874f, 0.0002100782439811155f, |
|
|
|
|
0.0001748319627949968f, 0.0003995231236331165f, 0.0008328808471560478f, 0.001583957928232849f, 0.002748048631474376f, 0.004349356517195702f, 0.006279794964939356f, 0.008271529339253902f, 0.009939077310264111f, 0.01089497376233339f, 0.01089497376233339f, 0.009939077310264111f, 0.008271529339253902f, 0.006279794964939356f, 0.004349356517195702f, 0.002748048631474376f, 0.001583957928232849f, 0.0008328808471560478f, 0.0003995231236331165f, 0.0001748319627949968f, |
|
|
|
|
0.0001327334757661447f, 0.0003033203829545528f, 0.0006323281559161842f, 0.001202550483867526f, 0.002086335094645619f, 0.00330205773934722f, 0.004767658654600382f, 0.006279794964939356f, 0.007545807864516974f, 0.008271530270576477f, 0.008271530270576477f, 0.007545807864516974f, 0.006279794964939356f, 0.004767658654600382f, 0.00330205773934722f, 0.002086335094645619f, 0.001202550483867526f, 0.0006323281559161842f, 0.0003033203829545528f, 0.0001327334757661447f, |
|
|
|
|
9.193058212986216e-005f, 0.0002100782585330308f, 0.0004379475140012801f, 0.0008328807889483869f, 0.001444985857233405f, 0.002286989474669099f, 0.00330205773934722f, 0.004349356517195702f, 0.00522619066759944f, 0.005728822201490402f, 0.005728822201490402f, 0.00522619066759944f, 0.004349356517195702f, 0.00330205773934722f, 0.002286989474669099f, 0.001444985857233405f, 0.0008328807889483869f, 0.0004379475140012801f, 0.0002100782585330308f, 9.193058212986216e-005f, |
|
|
|
|
5.808438800158911e-005f, 0.0001327334903180599f, 0.0002767078403849155f, 0.0005262380582280457f, 0.0009129836107604206f, 0.001444985857233405f, 0.002086335094645619f, 0.002748048631474376f, 0.00330205773934722f, 0.003619635012000799f, 0.003619635012000799f, 0.00330205773934722f, 0.002748048631474376f, 0.002086335094645619f, 0.001444985857233405f, 0.0009129836107604206f, 0.0005262380582280457f, 0.0002767078403849155f, 0.0001327334903180599f, 5.808438800158911e-005f, |
|
|
|
|
3.34794785885606e-005f, 7.650675252079964e-005f, 0.0001594926579855382f, 0.0003033203247468919f, 0.0005262380582280457f, 0.0008328807889483869f, 0.001202550483867526f, 0.001583957928232849f, 0.001903285388834775f, 0.002086334861814976f, 0.002086334861814976f, 0.001903285388834775f, 0.001583957928232849f, 0.001202550483867526f, 0.0008328807889483869f, 0.0005262380582280457f, 0.0003033203247468919f, 0.0001594926579855382f, 7.650675252079964e-005f, 3.34794785885606e-005f, |
|
|
|
|
1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f, |
|
|
|
|
8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f, |
|
|
|
|
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f, |
|
|
|
|
8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f, |
|
|
|
|
1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f, |
|
|
|
|
3.34794785885606e-005f, 7.650675252079964e-005f, 0.0001594926579855382f, 0.0003033203247468919f, 0.0005262380582280457f, 0.0008328807889483869f, 0.001202550483867526f, 0.001583957928232849f, 0.001903285388834775f, 0.002086334861814976f, 0.002086334861814976f, 0.001903285388834775f, 0.001583957928232849f, 0.001202550483867526f, 0.0008328807889483869f, 0.0005262380582280457f, 0.0003033203247468919f, 0.0001594926579855382f, 7.650675252079964e-005f, 3.34794785885606e-005f, |
|
|
|
|
5.808438800158911e-005f, 0.0001327334903180599f, 0.0002767078403849155f, 0.0005262380582280457f, 0.0009129836107604206f, 0.001444985857233405f, 0.002086335094645619f, 0.002748048631474376f, 0.00330205773934722f, 0.003619635012000799f, 0.003619635012000799f, 0.00330205773934722f, 0.002748048631474376f, 0.002086335094645619f, 0.001444985857233405f, 0.0009129836107604206f, 0.0005262380582280457f, 0.0002767078403849155f, 0.0001327334903180599f, 5.808438800158911e-005f, |
|
|
|
|
9.193058212986216e-005f, 0.0002100782585330308f, 0.0004379475140012801f, 0.0008328807889483869f, 0.001444985857233405f, 0.002286989474669099f, 0.00330205773934722f, 0.004349356517195702f, 0.00522619066759944f, 0.005728822201490402f, 0.005728822201490402f, 0.00522619066759944f, 0.004349356517195702f, 0.00330205773934722f, 0.002286989474669099f, 0.001444985857233405f, 0.0008328807889483869f, 0.0004379475140012801f, 0.0002100782585330308f, 9.193058212986216e-005f, |
|
|
|
|
0.0001327334757661447f, 0.0003033203829545528f, 0.0006323281559161842f, 0.001202550483867526f, 0.002086335094645619f, 0.00330205773934722f, 0.004767658654600382f, 0.006279794964939356f, 0.007545807864516974f, 0.008271530270576477f, 0.008271530270576477f, 0.007545807864516974f, 0.006279794964939356f, 0.004767658654600382f, 0.00330205773934722f, 0.002086335094645619f, 0.001202550483867526f, 0.0006323281559161842f, 0.0003033203829545528f, 0.0001327334757661447f, |
|
|
|
|
0.0001748319627949968f, 0.0003995231236331165f, 0.0008328808471560478f, 0.001583957928232849f, 0.002748048631474376f, 0.004349356517195702f, 0.006279794964939356f, 0.008271529339253902f, 0.009939077310264111f, 0.01089497376233339f, 0.01089497376233339f, 0.009939077310264111f, 0.008271529339253902f, 0.006279794964939356f, 0.004349356517195702f, 0.002748048631474376f, 0.001583957928232849f, 0.0008328808471560478f, 0.0003995231236331165f, 0.0001748319627949968f, |
|
|
|
|
0.0002100782439811155f, 0.0004800673632416874f, 0.001000790391117334f, 0.001903285388834775f, 0.00330205773934722f, 0.00522619066759944f, 0.007545807864516974f, 0.009939077310264111f, 0.01194280479103327f, 0.01309141051024199f, 0.01309141051024199f, 0.01194280479103327f, 0.009939077310264111f, 0.007545807864516974f, 0.00522619066759944f, 0.00330205773934722f, 0.001903285388834775f, 0.001000790391117334f, 0.0004800673632416874f, 0.0002100782439811155f, |
|
|
|
|
0.0002302826324012131f, 0.0005262381164357066f, 0.001097041997127235f, 0.002086334861814976f, 0.003619635012000799f, 0.005728822201490402f, 0.008271530270576477f, 0.01089497376233339f, 0.01309141051024199f, 0.01435048412531614f, 0.01435048412531614f, 0.01309141051024199f, 0.01089497376233339f, 0.008271530270576477f, 0.005728822201490402f, 0.003619635012000799f, 0.002086334861814976f, 0.001097041997127235f, 0.0005262381164357066f, 0.0002302826324012131f, |
|
|
|
|
0.0002302826324012131f, 0.0005262381164357066f, 0.001097041997127235f, 0.002086334861814976f, 0.003619635012000799f, 0.005728822201490402f, 0.008271530270576477f, 0.01089497376233339f, 0.01309141051024199f, 0.01435048412531614f, 0.01435048412531614f, 0.01309141051024199f, 0.01089497376233339f, 0.008271530270576477f, 0.005728822201490402f, 0.003619635012000799f, 0.002086334861814976f, 0.001097041997127235f, 0.0005262381164357066f, 0.0002302826324012131f, |
|
|
|
|
0.0002100782439811155f, 0.0004800673632416874f, 0.001000790391117334f, 0.001903285388834775f, 0.00330205773934722f, 0.00522619066759944f, 0.007545807864516974f, 0.009939077310264111f, 0.01194280479103327f, 0.01309141051024199f, 0.01309141051024199f, 0.01194280479103327f, 0.009939077310264111f, 0.007545807864516974f, 0.00522619066759944f, 0.00330205773934722f, 0.001903285388834775f, 0.001000790391117334f, 0.0004800673632416874f, 0.0002100782439811155f, |
|
|
|
|
0.0001748319627949968f, 0.0003995231236331165f, 0.0008328808471560478f, 0.001583957928232849f, 0.002748048631474376f, 0.004349356517195702f, 0.006279794964939356f, 0.008271529339253902f, 0.009939077310264111f, 0.01089497376233339f, 0.01089497376233339f, 0.009939077310264111f, 0.008271529339253902f, 0.006279794964939356f, 0.004349356517195702f, 0.002748048631474376f, 0.001583957928232849f, 0.0008328808471560478f, 0.0003995231236331165f, 0.0001748319627949968f, |
|
|
|
|
0.0001327334757661447f, 0.0003033203829545528f, 0.0006323281559161842f, 0.001202550483867526f, 0.002086335094645619f, 0.00330205773934722f, 0.004767658654600382f, 0.006279794964939356f, 0.007545807864516974f, 0.008271530270576477f, 0.008271530270576477f, 0.007545807864516974f, 0.006279794964939356f, 0.004767658654600382f, 0.00330205773934722f, 0.002086335094645619f, 0.001202550483867526f, 0.0006323281559161842f, 0.0003033203829545528f, 0.0001327334757661447f, |
|
|
|
|
9.193058212986216e-005f, 0.0002100782585330308f, 0.0004379475140012801f, 0.0008328807889483869f, 0.001444985857233405f, 0.002286989474669099f, 0.00330205773934722f, 0.004349356517195702f, 0.00522619066759944f, 0.005728822201490402f, 0.005728822201490402f, 0.00522619066759944f, 0.004349356517195702f, 0.00330205773934722f, 0.002286989474669099f, 0.001444985857233405f, 0.0008328807889483869f, 0.0004379475140012801f, 0.0002100782585330308f, 9.193058212986216e-005f, |
|
|
|
|
5.808438800158911e-005f, 0.0001327334903180599f, 0.0002767078403849155f, 0.0005262380582280457f, 0.0009129836107604206f, 0.001444985857233405f, 0.002086335094645619f, 0.002748048631474376f, 0.00330205773934722f, 0.003619635012000799f, 0.003619635012000799f, 0.00330205773934722f, 0.002748048631474376f, 0.002086335094645619f, 0.001444985857233405f, 0.0009129836107604206f, 0.0005262380582280457f, 0.0002767078403849155f, 0.0001327334903180599f, 5.808438800158911e-005f, |
|
|
|
|
3.34794785885606e-005f, 7.650675252079964e-005f, 0.0001594926579855382f, 0.0003033203247468919f, 0.0005262380582280457f, 0.0008328807889483869f, 0.001202550483867526f, 0.001583957928232849f, 0.001903285388834775f, 0.002086334861814976f, 0.002086334861814976f, 0.001903285388834775f, 0.001583957928232849f, 0.001202550483867526f, 0.0008328807889483869f, 0.0005262380582280457f, 0.0003033203247468919f, 0.0001594926579855382f, 7.650675252079964e-005f, 3.34794785885606e-005f, |
|
|
|
|
1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f, |
|
|
|
|
8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f, |
|
|
|
|
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
@ -697,7 +681,7 @@ namespace cv { namespace gpu { namespace device |
|
|
|
|
{ |
|
|
|
|
typedef uchar elem_type; |
|
|
|
|
|
|
|
|
|
__device__ __forceinline__ WinReader(float centerX_, float centerY_, float win_offset_, float cos_dir_, float sin_dir_) : |
|
|
|
|
__device__ __forceinline__ WinReader(float centerX_, float centerY_, float win_offset_, float cos_dir_, float sin_dir_) : |
|
|
|
|
centerX(centerX_), centerY(centerY_), win_offset(win_offset_), cos_dir(cos_dir_), sin_dir(sin_dir_) |
|
|
|
|
{ |
|
|
|
|
} |
|
|
|
@ -710,14 +694,14 @@ namespace cv { namespace gpu { namespace device |
|
|
|
|
return tex2D(imgTex, pixel_x, pixel_y); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
float centerX; |
|
|
|
|
float centerX; |
|
|
|
|
float centerY; |
|
|
|
|
float win_offset; |
|
|
|
|
float cos_dir; |
|
|
|
|
float win_offset; |
|
|
|
|
float cos_dir; |
|
|
|
|
float sin_dir; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
__device__ void calc_dx_dy(float s_dx_bin[25], float s_dy_bin[25], |
|
|
|
|
__device__ void calc_dx_dy(float s_dx_bin[25], float s_dy_bin[25], |
|
|
|
|
const float* featureX, const float* featureY, const float* featureSize, const float* featureDir) |
|
|
|
|
{ |
|
|
|
|
__shared__ float s_PATCH[6][6]; |
|
|
|
@ -739,7 +723,7 @@ namespace cv { namespace gpu { namespace device |
|
|
|
|
sincosf(descriptor_dir, &sin_dir, &cos_dir); |
|
|
|
|
|
|
|
|
|
/* Nearest neighbour version (faster) */ |
|
|
|
|
const float win_offset = -(float)(win_size - 1) / 2; |
|
|
|
|
const float win_offset = -(float)(win_size - 1) / 2; |
|
|
|
|
|
|
|
|
|
// Compute sampling points |
|
|
|
|
// since grids are 2D, need to compute xBlock and yBlock indices |
|
|
|
@ -966,11 +950,11 @@ namespace cv { namespace gpu { namespace device |
|
|
|
|
descriptor_base[threadIdx.x] = lookup / len; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void compute_descriptors_gpu(const DevMem2Df& descriptors, |
|
|
|
|
void compute_descriptors_gpu(const DevMem2Df& descriptors, |
|
|
|
|
const float* featureX, const float* featureY, const float* featureSize, const float* featureDir, int nFeatures) |
|
|
|
|
{ |
|
|
|
|
// compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (descriptors.cols == 64) |
|
|
|
|
{ |
|
|
|
|
compute_descriptors64<<<dim3(nFeatures, 16, 1), dim3(6, 6, 1)>>>(descriptors, featureX, featureY, featureSize, featureDir); |
|
|
|
@ -985,12 +969,12 @@ namespace cv { namespace gpu { namespace device |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
compute_descriptors128<<<dim3(nFeatures, 16, 1), dim3(6, 6, 1)>>>(descriptors, featureX, featureY, featureSize, featureDir); |
|
|
|
|
compute_descriptors128<<<dim3(nFeatures, 16, 1), dim3(6, 6, 1)>>>(descriptors, featureX, featureY, featureSize, featureDir); |
|
|
|
|
cudaSafeCall( cudaGetLastError() ); |
|
|
|
|
|
|
|
|
|
cudaSafeCall( cudaDeviceSynchronize() ); |
|
|
|
|
|
|
|
|
|
normalize_descriptors<128><<<dim3(nFeatures, 1, 1), dim3(128, 1, 1)>>>(descriptors); |
|
|
|
|
normalize_descriptors<128><<<dim3(nFeatures, 1, 1), dim3(128, 1, 1)>>>(descriptors); |
|
|
|
|
cudaSafeCall( cudaGetLastError() ); |
|
|
|
|
|
|
|
|
|
cudaSafeCall( cudaDeviceSynchronize() ); |
|
|
|
|