|
|
|
@ -56,98 +56,6 @@ using namespace std; |
|
|
|
|
|
|
|
|
|
static oclMat gauss_w_lut; |
|
|
|
|
static bool hog_device_cpu; |
|
|
|
|
/* pre-compute gaussian and interp_weight lookup tables if sigma is 4.0f */ |
|
|
|
|
static const float gaussian_interp_lut[] =
|
|
|
|
|
{ |
|
|
|
|
/* gaussian lut */ |
|
|
|
|
0.01831564f, 0.02926831f, 0.04393693f, 0.06196101f, 0.08208500f, 0.10215643f,
|
|
|
|
|
0.11943297f, 0.13117145f, 0.13533528f, 0.13117145f, 0.11943297f, 0.10215643f,
|
|
|
|
|
0.08208500f, 0.06196101f, 0.04393693f, 0.02926831f, 0.02926831f, 0.04677062f,
|
|
|
|
|
0.07021102f, 0.09901341f, 0.13117145f, 0.16324551f, 0.19085334f, 0.20961139f,
|
|
|
|
|
0.21626517f, 0.20961139f, 0.19085334f, 0.16324551f, 0.13117145f, 0.09901341f,
|
|
|
|
|
0.07021102f, 0.04677062f, 0.04393693f, 0.07021102f, 0.10539922f, 0.14863673f,
|
|
|
|
|
0.19691168f, 0.24506053f, 0.28650481f, 0.31466395f, 0.32465246f, 0.31466395f,
|
|
|
|
|
0.28650481f, 0.24506053f, 0.19691168f, 0.14863673f, 0.10539922f, 0.07021102f,
|
|
|
|
|
0.06196101f, 0.09901341f, 0.14863673f, 0.20961139f, 0.27768996f, 0.34559074f,
|
|
|
|
|
0.40403652f, 0.44374731f, 0.45783335f, 0.44374731f, 0.40403652f, 0.34559074f,
|
|
|
|
|
0.27768996f, 0.20961139f, 0.14863673f, 0.09901341f, 0.08208500f, 0.13117145f,
|
|
|
|
|
0.19691168f, 0.27768996f, 0.36787945f, 0.45783335f, 0.53526145f, 0.58786964f,
|
|
|
|
|
0.60653067f, 0.58786964f, 0.53526145f, 0.45783335f, 0.36787945f, 0.27768996f,
|
|
|
|
|
0.19691168f, 0.13117145f, 0.10215643f, 0.16324551f, 0.24506053f, 0.34559074f,
|
|
|
|
|
0.45783335f, 0.56978285f, 0.66614360f, 0.73161560f, 0.75483960f, 0.73161560f,
|
|
|
|
|
0.66614360f, 0.56978285f, 0.45783335f, 0.34559074f, 0.24506053f, 0.16324551f,
|
|
|
|
|
0.11943297f, 0.19085334f, 0.28650481f, 0.40403652f, 0.53526145f, 0.66614360f,
|
|
|
|
|
0.77880079f, 0.85534531f, 0.88249689f, 0.85534531f, 0.77880079f, 0.66614360f,
|
|
|
|
|
0.53526145f, 0.40403652f, 0.28650481f, 0.19085334f, 0.13117145f, 0.20961139f,
|
|
|
|
|
0.31466395f, 0.44374731f, 0.58786964f, 0.73161560f, 0.85534531f, 0.93941307f,
|
|
|
|
|
0.96923321f, 0.93941307f, 0.85534531f, 0.73161560f, 0.58786964f, 0.44374731f,
|
|
|
|
|
0.31466395f, 0.20961139f, 0.13533528f, 0.21626517f, 0.32465246f, 0.45783335f,
|
|
|
|
|
0.60653067f, 0.75483960f, 0.88249689f, 0.96923321f, 1.00000000f, 0.96923321f,
|
|
|
|
|
0.88249689f, 0.75483960f, 0.60653067f, 0.45783335f, 0.32465246f, 0.21626517f,
|
|
|
|
|
0.13117145f, 0.20961139f, 0.31466395f, 0.44374731f, 0.58786964f, 0.73161560f,
|
|
|
|
|
0.85534531f, 0.93941307f, 0.96923321f, 0.93941307f, 0.85534531f, 0.73161560f,
|
|
|
|
|
0.58786964f, 0.44374731f, 0.31466395f, 0.20961139f, 0.11943297f, 0.19085334f,
|
|
|
|
|
0.28650481f, 0.40403652f, 0.53526145f, 0.66614360f, 0.77880079f, 0.85534531f,
|
|
|
|
|
0.88249689f, 0.85534531f, 0.77880079f, 0.66614360f, 0.53526145f, 0.40403652f,
|
|
|
|
|
0.28650481f, 0.19085334f, 0.10215643f, 0.16324551f, 0.24506053f, 0.34559074f,
|
|
|
|
|
0.45783335f, 0.56978285f, 0.66614360f, 0.73161560f, 0.75483960f, 0.73161560f,
|
|
|
|
|
0.66614360f, 0.56978285f, 0.45783335f, 0.34559074f, 0.24506053f, 0.16324551f,
|
|
|
|
|
0.08208500f, 0.13117145f, 0.19691168f, 0.27768996f, 0.36787945f, 0.45783335f,
|
|
|
|
|
0.53526145f, 0.58786964f, 0.60653067f, 0.58786964f, 0.53526145f, 0.45783335f,
|
|
|
|
|
0.36787945f, 0.27768996f, 0.19691168f, 0.13117145f, 0.06196101f, 0.09901341f,
|
|
|
|
|
0.14863673f, 0.20961139f, 0.27768996f, 0.34559074f, 0.40403652f, 0.44374731f,
|
|
|
|
|
0.45783335f, 0.44374731f, 0.40403652f, 0.34559074f, 0.27768996f, 0.20961139f,
|
|
|
|
|
0.14863673f, 0.09901341f, 0.04393693f, 0.07021102f, 0.10539922f, 0.14863673f,
|
|
|
|
|
0.19691168f, 0.24506053f, 0.28650481f, 0.31466395f, 0.32465246f, 0.31466395f,
|
|
|
|
|
0.28650481f, 0.24506053f, 0.19691168f, 0.14863673f, 0.10539922f, 0.07021102f,
|
|
|
|
|
0.02926831f, 0.04677062f, 0.07021102f, 0.09901341f, 0.13117145f, 0.16324551f,
|
|
|
|
|
0.19085334f, 0.20961139f, 0.21626517f, 0.20961139f, 0.19085334f, 0.16324551f,
|
|
|
|
|
0.13117145f, 0.09901341f, 0.07021102f, 0.04677062f,
|
|
|
|
|
/* interp_weight lut */ |
|
|
|
|
0.00390625f, 0.01171875f, 0.01953125f, 0.02734375f, 0.03515625f, 0.04296875f,
|
|
|
|
|
0.05078125f, 0.05859375f, 0.05859375f, 0.05078125f, 0.04296875f, 0.03515625f,
|
|
|
|
|
0.02734375f, 0.01953125f, 0.01171875f, 0.00390625f, 0.01171875f, 0.03515625f,
|
|
|
|
|
0.05859375f, 0.08203125f, 0.10546875f, 0.12890625f, 0.15234375f, 0.17578125f,
|
|
|
|
|
0.17578125f, 0.15234375f, 0.12890625f, 0.10546875f, 0.08203125f, 0.05859375f,
|
|
|
|
|
0.03515625f, 0.01171875f, 0.01953125f, 0.05859375f, 0.09765625f, 0.13671875f,
|
|
|
|
|
0.17578125f, 0.21484375f, 0.25390625f, 0.29296875f, 0.29296875f, 0.25390625f,
|
|
|
|
|
0.21484375f, 0.17578125f, 0.13671875f, 0.09765625f, 0.05859375f, 0.01953125f,
|
|
|
|
|
0.02734375f, 0.08203125f, 0.13671875f, 0.19140625f, 0.24609375f, 0.30078125f,
|
|
|
|
|
0.35546875f, 0.41015625f, 0.41015625f, 0.35546875f, 0.30078125f, 0.24609375f,
|
|
|
|
|
0.19140625f, 0.13671875f, 0.08203125f, 0.02734375f, 0.03515625f, 0.10546875f,
|
|
|
|
|
0.17578125f, 0.24609375f, 0.31640625f, 0.38671875f, 0.45703125f, 0.52734375f,
|
|
|
|
|
0.52734375f, 0.45703125f, 0.38671875f, 0.31640625f, 0.24609375f, 0.17578125f,
|
|
|
|
|
0.10546875f, 0.03515625f, 0.04296875f, 0.12890625f, 0.21484375f, 0.30078125f,
|
|
|
|
|
0.38671875f, 0.47265625f, 0.55859375f, 0.64453125f, 0.64453125f, 0.55859375f,
|
|
|
|
|
0.47265625f, 0.38671875f, 0.30078125f, 0.21484375f, 0.12890625f, 0.04296875f,
|
|
|
|
|
0.05078125f, 0.15234375f, 0.25390625f, 0.35546875f, 0.45703125f, 0.55859375f,
|
|
|
|
|
0.66015625f, 0.76171875f, 0.76171875f, 0.66015625f, 0.55859375f, 0.45703125f,
|
|
|
|
|
0.35546875f, 0.25390625f, 0.15234375f, 0.05078125f, 0.05859375f, 0.17578125f,
|
|
|
|
|
0.29296875f, 0.41015625f, 0.52734375f, 0.64453125f, 0.76171875f, 0.87890625f,
|
|
|
|
|
0.87890625f, 0.76171875f, 0.64453125f, 0.52734375f, 0.41015625f, 0.29296875f,
|
|
|
|
|
0.17578125f, 0.05859375f, 0.05859375f, 0.17578125f, 0.29296875f, 0.41015625f,
|
|
|
|
|
0.52734375f, 0.64453125f, 0.76171875f, 0.87890625f, 0.87890625f, 0.76171875f,
|
|
|
|
|
0.64453125f, 0.52734375f, 0.41015625f, 0.29296875f, 0.17578125f, 0.05859375f,
|
|
|
|
|
0.05078125f, 0.15234375f, 0.25390625f, 0.35546875f, 0.45703125f, 0.55859375f,
|
|
|
|
|
0.66015625f, 0.76171875f, 0.76171875f, 0.66015625f, 0.55859375f, 0.45703125f,
|
|
|
|
|
0.35546875f, 0.25390625f, 0.15234375f, 0.05078125f, 0.04296875f, 0.12890625f,
|
|
|
|
|
0.21484375f, 0.30078125f, 0.38671875f, 0.47265625f, 0.55859375f, 0.64453125f,
|
|
|
|
|
0.64453125f, 0.55859375f, 0.47265625f, 0.38671875f, 0.30078125f, 0.21484375f,
|
|
|
|
|
0.12890625f, 0.04296875f, 0.03515625f, 0.10546875f, 0.17578125f, 0.24609375f,
|
|
|
|
|
0.31640625f, 0.38671875f, 0.45703125f, 0.52734375f, 0.52734375f, 0.45703125f,
|
|
|
|
|
0.38671875f, 0.31640625f, 0.24609375f, 0.17578125f, 0.10546875f, 0.03515625f,
|
|
|
|
|
0.02734375f, 0.08203125f, 0.13671875f, 0.19140625f, 0.24609375f, 0.30078125f,
|
|
|
|
|
0.35546875f, 0.41015625f, 0.41015625f, 0.35546875f, 0.30078125f, 0.24609375f,
|
|
|
|
|
0.19140625f, 0.13671875f, 0.08203125f, 0.02734375f, 0.01953125f, 0.05859375f,
|
|
|
|
|
0.09765625f, 0.13671875f, 0.17578125f, 0.21484375f, 0.25390625f, 0.29296875f,
|
|
|
|
|
0.29296875f, 0.25390625f, 0.21484375f, 0.17578125f, 0.13671875f, 0.09765625f,
|
|
|
|
|
0.05859375f, 0.01953125f, 0.01171875f, 0.03515625f, 0.05859375f, 0.08203125f,
|
|
|
|
|
0.10546875f, 0.12890625f, 0.15234375f, 0.17578125f, 0.17578125f, 0.15234375f,
|
|
|
|
|
0.12890625f, 0.10546875f, 0.08203125f, 0.05859375f, 0.03515625f, 0.01171875f,
|
|
|
|
|
0.00390625f, 0.01171875f, 0.01953125f, 0.02734375f, 0.03515625f, 0.04296875f,
|
|
|
|
|
0.05078125f, 0.05859375f, 0.05859375f, 0.05078125f, 0.04296875f, 0.03515625f,
|
|
|
|
|
0.02734375f, 0.01953125f, 0.01171875f, 0.00390625f |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
namespace cv |
|
|
|
|
{ |
|
|
|
@ -180,7 +88,7 @@ namespace cv |
|
|
|
|
int nblocks_win_x, int nblocks_win_y); |
|
|
|
|
|
|
|
|
|
void compute_hists(int nbins, int block_stride_x, int blovck_stride_y, |
|
|
|
|
int height, int width, float sigma, const cv::ocl::oclMat &grad, |
|
|
|
|
int height, int width, const cv::ocl::oclMat &grad, |
|
|
|
|
const cv::ocl::oclMat &qangle,
|
|
|
|
|
const cv::ocl::oclMat &gauss_w_lut, cv::ocl::oclMat &block_hists); |
|
|
|
|
|
|
|
|
@ -328,10 +236,18 @@ void cv::ocl::HOGDescriptor::init_buffer(const oclMat &img, Size win_stride) |
|
|
|
|
Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride); |
|
|
|
|
labels.create(1, wins_per_img.area(), CV_8U); |
|
|
|
|
|
|
|
|
|
vector<float> v_lut = vector<float>(gaussian_interp_lut, gaussian_interp_lut +
|
|
|
|
|
sizeof(gaussian_interp_lut) / sizeof(gaussian_interp_lut[0])); |
|
|
|
|
Mat m_lut(v_lut); |
|
|
|
|
gauss_w_lut.upload(m_lut.reshape(1,1)); |
|
|
|
|
float sigma = getWinSigma(); |
|
|
|
|
float scale = 1.f / (2.f * sigma * sigma); |
|
|
|
|
Mat gaussian_lut(1, 512, CV_32FC1); |
|
|
|
|
int idx = 0; |
|
|
|
|
for(int i=-8; i<8; i++) |
|
|
|
|
for(int j=-8; j<8; j++) |
|
|
|
|
gaussian_lut.at<float>(idx++) = std::exp(-(j * j + i * i) * scale); |
|
|
|
|
for(int i=-8; i<8; i++) |
|
|
|
|
for(int j=-8; j<8; j++) |
|
|
|
|
gaussian_lut.at<float>(idx++) = (8.f - fabs(j + 0.5f)) * (8.f - fabs(i + 0.5f)) / 64.f; |
|
|
|
|
|
|
|
|
|
gauss_w_lut.upload(gaussian_lut); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle) |
|
|
|
@ -358,7 +274,7 @@ void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat &img) |
|
|
|
|
computeGradient(img, this->grad, this->qangle); |
|
|
|
|
|
|
|
|
|
hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height,
|
|
|
|
|
effect_size.width, (float)getWinSigma(), grad, qangle, gauss_w_lut, block_hists); |
|
|
|
|
effect_size.width, grad, qangle, gauss_w_lut, block_hists); |
|
|
|
|
|
|
|
|
|
hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height,
|
|
|
|
|
effect_size.width, block_hists, (float)threshold_L2hys); |
|
|
|
@ -1708,7 +1624,7 @@ void cv::ocl::device::hog::set_up_constants(int nbins, |
|
|
|
|
|
|
|
|
|
void cv::ocl::device::hog::compute_hists(int nbins,
|
|
|
|
|
int block_stride_x, int block_stride_y, |
|
|
|
|
int height, int width, float sigma,
|
|
|
|
|
int height, int width,
|
|
|
|
|
const cv::ocl::oclMat &grad,
|
|
|
|
|
const cv::ocl::oclMat &qangle,
|
|
|
|
|
const cv::ocl::oclMat &gauss_w_lut,
|
|
|
|
@ -1716,8 +1632,7 @@ void cv::ocl::device::hog::compute_hists(int nbins, |
|
|
|
|
{ |
|
|
|
|
Context *clCxt = Context::getContext(); |
|
|
|
|
vector< pair<size_t, const void *> > args; |
|
|
|
|
string kernelName = (sigma == 4.0f) ? "compute_hists_lut_kernel" :
|
|
|
|
|
"compute_hists_kernel"; |
|
|
|
|
string kernelName = "compute_hists_lut_kernel"; |
|
|
|
|
|
|
|
|
|
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x)
|
|
|
|
|
/ block_stride_x; |
|
|
|
@ -1728,9 +1643,6 @@ void cv::ocl::device::hog::compute_hists(int nbins, |
|
|
|
|
int grad_quadstep = grad.step >> 2; |
|
|
|
|
int qangle_step = qangle.step; |
|
|
|
|
|
|
|
|
|
// Precompute gaussian spatial window parameter
|
|
|
|
|
float scale = 1.f / (2.f * sigma * sigma); |
|
|
|
|
|
|
|
|
|
int blocks_in_group = 4; |
|
|
|
|
size_t localThreads[3] = { blocks_in_group * 24, 2, 1 }; |
|
|
|
|
size_t globalThreads[3] = {
|
|
|
|
@ -1751,14 +1663,10 @@ void cv::ocl::device::hog::compute_hists(int nbins, |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&qangle_step)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&grad.data)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&qangle.data)); |
|
|
|
|
if (kernelName.compare("compute_hists_lut_kernel") == 0) |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&gauss_w_lut.data)); |
|
|
|
|
else |
|
|
|
|
args.push_back( make_pair( sizeof(cl_float), (void *)&scale)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&gauss_w_lut.data)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); |
|
|
|
|
args.push_back( make_pair( smem, (void *)NULL)); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if(hog_device_cpu) |
|
|
|
|
{ |
|
|
|
|
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
|
|
|
|
|