|
|
|
@ -17,6 +17,7 @@ |
|
|
|
|
// @Authors |
|
|
|
|
// Nathan, liujun@multicorewareinc.com |
|
|
|
|
// Peng Xiao, pengxiao@outlook.com |
|
|
|
|
// Baichuan Su, baichuan@multicorewareinc.com |
|
|
|
|
// |
|
|
|
|
// Redistribution and use in source and binary forms, with or without modification, |
|
|
|
|
// are permitted provided that the following conditions are met: |
|
|
|
@ -112,6 +113,24 @@ result_type reduce_block( |
|
|
|
|
return DIST_RES(result); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
result_type reduce_block_match( |
|
|
|
|
__local value_type *s_query, |
|
|
|
|
__local value_type *s_train, |
|
|
|
|
int lidx, |
|
|
|
|
int lidy |
|
|
|
|
) |
|
|
|
|
{ |
|
|
|
|
result_type result = 0; |
|
|
|
|
#pragma unroll |
|
|
|
|
for (int j = 0 ; j < BLOCK_SIZE ; j++) |
|
|
|
|
{ |
|
|
|
|
result += DIST( |
|
|
|
|
s_query[lidy * BLOCK_SIZE + j], |
|
|
|
|
s_train[j * BLOCK_SIZE + lidx]); |
|
|
|
|
} |
|
|
|
|
return (result); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
result_type reduce_multi_block( |
|
|
|
|
__local value_type *s_query, |
|
|
|
|
__local value_type *s_train, |
|
|
|
@ -128,7 +147,7 @@ result_type reduce_multi_block( |
|
|
|
|
s_query[lidy * MAX_DESC_LEN + block_index * BLOCK_SIZE + j], |
|
|
|
|
s_train[j * BLOCK_SIZE + lidx]); |
|
|
|
|
} |
|
|
|
|
return DIST_RES(result); |
|
|
|
|
return result; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* 2dim launch, global size: dim0 is (query rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, dim1 is BLOCK_SIZE |
|
|
|
@ -187,6 +206,8 @@ __kernel void BruteForceMatch_UnrollMatch( |
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
result = DIST_RES(result); |
|
|
|
|
|
|
|
|
|
int trainIdx = t * BLOCK_SIZE + lidx; |
|
|
|
|
|
|
|
|
|
if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance/* && mask(queryIdx, trainIdx)*/) |
|
|
|
@ -272,11 +293,13 @@ __kernel void BruteForceMatch_Match( |
|
|
|
|
|
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
|
|
result += reduce_block(s_query, s_train, lidx, lidy); |
|
|
|
|
result += reduce_block_match(s_query, s_train, lidx, lidy); |
|
|
|
|
|
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
result = DIST_RES(result); |
|
|
|
|
|
|
|
|
|
const int trainIdx = t * BLOCK_SIZE + lidx; |
|
|
|
|
|
|
|
|
|
if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance /*&& mask(queryIdx, trainIdx)*/) |
|
|
|
@ -493,6 +516,8 @@ __kernel void BruteForceMatch_knnUnrollMatch( |
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
result = DIST_RES(result); |
|
|
|
|
|
|
|
|
|
const int trainIdx = t * BLOCK_SIZE + lidx; |
|
|
|
|
|
|
|
|
|
if (queryIdx < query_rows && trainIdx < train_rows) |
|
|
|
@ -631,11 +656,13 @@ __kernel void BruteForceMatch_knnMatch( |
|
|
|
|
|
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
|
|
result += reduce_block(s_query, s_train, lidx, lidy); |
|
|
|
|
result += reduce_block_match(s_query, s_train, lidx, lidy); |
|
|
|
|
|
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
result = DIST_RES(result); |
|
|
|
|
|
|
|
|
|
const int trainIdx = t * BLOCK_SIZE + lidx; |
|
|
|
|
|
|
|
|
|
if (queryIdx < query_rows && trainIdx < train_rows /*&& mask(queryIdx, trainIdx)*/) |
|
|
|
|