|
|
|
@ -15,7 +15,7 @@ |
|
|
|
|
// Third party copyrights are property of their respective owners.
|
|
|
|
|
//
|
|
|
|
|
// @Authors
|
|
|
|
|
// Wenju He, wenju@multicorewareinc.com
|
|
|
|
|
// Wenju He, wenju@multicorewareinc.com
|
|
|
|
|
//
|
|
|
|
|
// Redistribution and use in source and binary forms, with or without modification,
|
|
|
|
|
// are permitted provided that the following conditions are met:
|
|
|
|
@ -48,13 +48,107 @@ using namespace cv; |
|
|
|
|
using namespace cv::ocl; |
|
|
|
|
using namespace std; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define CELL_WIDTH 8 |
|
|
|
|
#define CELL_HEIGHT 8 |
|
|
|
|
#define CELLS_PER_BLOCK_X 2 |
|
|
|
|
#define CELLS_PER_BLOCK_Y 2 |
|
|
|
|
#define NTHREADS 256 |
|
|
|
|
|
|
|
|
|
static oclMat gauss_w_lut; |
|
|
|
|
static bool hog_device_cpu; |
|
|
|
|
/* pre-compute gaussian and interp_weight lookup tables if sigma is 4.0f */ |
|
|
|
|
static const float gaussian_interp_lut[] =
|
|
|
|
|
{ |
|
|
|
|
/* gaussian lut */ |
|
|
|
|
0.01831564f, 0.02926831f, 0.04393693f, 0.06196101f, 0.08208500f, 0.10215643f,
|
|
|
|
|
0.11943297f, 0.13117145f, 0.13533528f, 0.13117145f, 0.11943297f, 0.10215643f,
|
|
|
|
|
0.08208500f, 0.06196101f, 0.04393693f, 0.02926831f, 0.02926831f, 0.04677062f,
|
|
|
|
|
0.07021102f, 0.09901341f, 0.13117145f, 0.16324551f, 0.19085334f, 0.20961139f,
|
|
|
|
|
0.21626517f, 0.20961139f, 0.19085334f, 0.16324551f, 0.13117145f, 0.09901341f,
|
|
|
|
|
0.07021102f, 0.04677062f, 0.04393693f, 0.07021102f, 0.10539922f, 0.14863673f,
|
|
|
|
|
0.19691168f, 0.24506053f, 0.28650481f, 0.31466395f, 0.32465246f, 0.31466395f,
|
|
|
|
|
0.28650481f, 0.24506053f, 0.19691168f, 0.14863673f, 0.10539922f, 0.07021102f,
|
|
|
|
|
0.06196101f, 0.09901341f, 0.14863673f, 0.20961139f, 0.27768996f, 0.34559074f,
|
|
|
|
|
0.40403652f, 0.44374731f, 0.45783335f, 0.44374731f, 0.40403652f, 0.34559074f,
|
|
|
|
|
0.27768996f, 0.20961139f, 0.14863673f, 0.09901341f, 0.08208500f, 0.13117145f,
|
|
|
|
|
0.19691168f, 0.27768996f, 0.36787945f, 0.45783335f, 0.53526145f, 0.58786964f,
|
|
|
|
|
0.60653067f, 0.58786964f, 0.53526145f, 0.45783335f, 0.36787945f, 0.27768996f,
|
|
|
|
|
0.19691168f, 0.13117145f, 0.10215643f, 0.16324551f, 0.24506053f, 0.34559074f,
|
|
|
|
|
0.45783335f, 0.56978285f, 0.66614360f, 0.73161560f, 0.75483960f, 0.73161560f,
|
|
|
|
|
0.66614360f, 0.56978285f, 0.45783335f, 0.34559074f, 0.24506053f, 0.16324551f,
|
|
|
|
|
0.11943297f, 0.19085334f, 0.28650481f, 0.40403652f, 0.53526145f, 0.66614360f,
|
|
|
|
|
0.77880079f, 0.85534531f, 0.88249689f, 0.85534531f, 0.77880079f, 0.66614360f,
|
|
|
|
|
0.53526145f, 0.40403652f, 0.28650481f, 0.19085334f, 0.13117145f, 0.20961139f,
|
|
|
|
|
0.31466395f, 0.44374731f, 0.58786964f, 0.73161560f, 0.85534531f, 0.93941307f,
|
|
|
|
|
0.96923321f, 0.93941307f, 0.85534531f, 0.73161560f, 0.58786964f, 0.44374731f,
|
|
|
|
|
0.31466395f, 0.20961139f, 0.13533528f, 0.21626517f, 0.32465246f, 0.45783335f,
|
|
|
|
|
0.60653067f, 0.75483960f, 0.88249689f, 0.96923321f, 1.00000000f, 0.96923321f,
|
|
|
|
|
0.88249689f, 0.75483960f, 0.60653067f, 0.45783335f, 0.32465246f, 0.21626517f,
|
|
|
|
|
0.13117145f, 0.20961139f, 0.31466395f, 0.44374731f, 0.58786964f, 0.73161560f,
|
|
|
|
|
0.85534531f, 0.93941307f, 0.96923321f, 0.93941307f, 0.85534531f, 0.73161560f,
|
|
|
|
|
0.58786964f, 0.44374731f, 0.31466395f, 0.20961139f, 0.11943297f, 0.19085334f,
|
|
|
|
|
0.28650481f, 0.40403652f, 0.53526145f, 0.66614360f, 0.77880079f, 0.85534531f,
|
|
|
|
|
0.88249689f, 0.85534531f, 0.77880079f, 0.66614360f, 0.53526145f, 0.40403652f,
|
|
|
|
|
0.28650481f, 0.19085334f, 0.10215643f, 0.16324551f, 0.24506053f, 0.34559074f,
|
|
|
|
|
0.45783335f, 0.56978285f, 0.66614360f, 0.73161560f, 0.75483960f, 0.73161560f,
|
|
|
|
|
0.66614360f, 0.56978285f, 0.45783335f, 0.34559074f, 0.24506053f, 0.16324551f,
|
|
|
|
|
0.08208500f, 0.13117145f, 0.19691168f, 0.27768996f, 0.36787945f, 0.45783335f,
|
|
|
|
|
0.53526145f, 0.58786964f, 0.60653067f, 0.58786964f, 0.53526145f, 0.45783335f,
|
|
|
|
|
0.36787945f, 0.27768996f, 0.19691168f, 0.13117145f, 0.06196101f, 0.09901341f,
|
|
|
|
|
0.14863673f, 0.20961139f, 0.27768996f, 0.34559074f, 0.40403652f, 0.44374731f,
|
|
|
|
|
0.45783335f, 0.44374731f, 0.40403652f, 0.34559074f, 0.27768996f, 0.20961139f,
|
|
|
|
|
0.14863673f, 0.09901341f, 0.04393693f, 0.07021102f, 0.10539922f, 0.14863673f,
|
|
|
|
|
0.19691168f, 0.24506053f, 0.28650481f, 0.31466395f, 0.32465246f, 0.31466395f,
|
|
|
|
|
0.28650481f, 0.24506053f, 0.19691168f, 0.14863673f, 0.10539922f, 0.07021102f,
|
|
|
|
|
0.02926831f, 0.04677062f, 0.07021102f, 0.09901341f, 0.13117145f, 0.16324551f,
|
|
|
|
|
0.19085334f, 0.20961139f, 0.21626517f, 0.20961139f, 0.19085334f, 0.16324551f,
|
|
|
|
|
0.13117145f, 0.09901341f, 0.07021102f, 0.04677062f,
|
|
|
|
|
/* interp_weight lut */ |
|
|
|
|
0.00390625f, 0.01171875f, 0.01953125f, 0.02734375f, 0.03515625f, 0.04296875f,
|
|
|
|
|
0.05078125f, 0.05859375f, 0.05859375f, 0.05078125f, 0.04296875f, 0.03515625f,
|
|
|
|
|
0.02734375f, 0.01953125f, 0.01171875f, 0.00390625f, 0.01171875f, 0.03515625f,
|
|
|
|
|
0.05859375f, 0.08203125f, 0.10546875f, 0.12890625f, 0.15234375f, 0.17578125f,
|
|
|
|
|
0.17578125f, 0.15234375f, 0.12890625f, 0.10546875f, 0.08203125f, 0.05859375f,
|
|
|
|
|
0.03515625f, 0.01171875f, 0.01953125f, 0.05859375f, 0.09765625f, 0.13671875f,
|
|
|
|
|
0.17578125f, 0.21484375f, 0.25390625f, 0.29296875f, 0.29296875f, 0.25390625f,
|
|
|
|
|
0.21484375f, 0.17578125f, 0.13671875f, 0.09765625f, 0.05859375f, 0.01953125f,
|
|
|
|
|
0.02734375f, 0.08203125f, 0.13671875f, 0.19140625f, 0.24609375f, 0.30078125f,
|
|
|
|
|
0.35546875f, 0.41015625f, 0.41015625f, 0.35546875f, 0.30078125f, 0.24609375f,
|
|
|
|
|
0.19140625f, 0.13671875f, 0.08203125f, 0.02734375f, 0.03515625f, 0.10546875f,
|
|
|
|
|
0.17578125f, 0.24609375f, 0.31640625f, 0.38671875f, 0.45703125f, 0.52734375f,
|
|
|
|
|
0.52734375f, 0.45703125f, 0.38671875f, 0.31640625f, 0.24609375f, 0.17578125f,
|
|
|
|
|
0.10546875f, 0.03515625f, 0.04296875f, 0.12890625f, 0.21484375f, 0.30078125f,
|
|
|
|
|
0.38671875f, 0.47265625f, 0.55859375f, 0.64453125f, 0.64453125f, 0.55859375f,
|
|
|
|
|
0.47265625f, 0.38671875f, 0.30078125f, 0.21484375f, 0.12890625f, 0.04296875f,
|
|
|
|
|
0.05078125f, 0.15234375f, 0.25390625f, 0.35546875f, 0.45703125f, 0.55859375f,
|
|
|
|
|
0.66015625f, 0.76171875f, 0.76171875f, 0.66015625f, 0.55859375f, 0.45703125f,
|
|
|
|
|
0.35546875f, 0.25390625f, 0.15234375f, 0.05078125f, 0.05859375f, 0.17578125f,
|
|
|
|
|
0.29296875f, 0.41015625f, 0.52734375f, 0.64453125f, 0.76171875f, 0.87890625f,
|
|
|
|
|
0.87890625f, 0.76171875f, 0.64453125f, 0.52734375f, 0.41015625f, 0.29296875f,
|
|
|
|
|
0.17578125f, 0.05859375f, 0.05859375f, 0.17578125f, 0.29296875f, 0.41015625f,
|
|
|
|
|
0.52734375f, 0.64453125f, 0.76171875f, 0.87890625f, 0.87890625f, 0.76171875f,
|
|
|
|
|
0.64453125f, 0.52734375f, 0.41015625f, 0.29296875f, 0.17578125f, 0.05859375f,
|
|
|
|
|
0.05078125f, 0.15234375f, 0.25390625f, 0.35546875f, 0.45703125f, 0.55859375f,
|
|
|
|
|
0.66015625f, 0.76171875f, 0.76171875f, 0.66015625f, 0.55859375f, 0.45703125f,
|
|
|
|
|
0.35546875f, 0.25390625f, 0.15234375f, 0.05078125f, 0.04296875f, 0.12890625f,
|
|
|
|
|
0.21484375f, 0.30078125f, 0.38671875f, 0.47265625f, 0.55859375f, 0.64453125f,
|
|
|
|
|
0.64453125f, 0.55859375f, 0.47265625f, 0.38671875f, 0.30078125f, 0.21484375f,
|
|
|
|
|
0.12890625f, 0.04296875f, 0.03515625f, 0.10546875f, 0.17578125f, 0.24609375f,
|
|
|
|
|
0.31640625f, 0.38671875f, 0.45703125f, 0.52734375f, 0.52734375f, 0.45703125f,
|
|
|
|
|
0.38671875f, 0.31640625f, 0.24609375f, 0.17578125f, 0.10546875f, 0.03515625f,
|
|
|
|
|
0.02734375f, 0.08203125f, 0.13671875f, 0.19140625f, 0.24609375f, 0.30078125f,
|
|
|
|
|
0.35546875f, 0.41015625f, 0.41015625f, 0.35546875f, 0.30078125f, 0.24609375f,
|
|
|
|
|
0.19140625f, 0.13671875f, 0.08203125f, 0.02734375f, 0.01953125f, 0.05859375f,
|
|
|
|
|
0.09765625f, 0.13671875f, 0.17578125f, 0.21484375f, 0.25390625f, 0.29296875f,
|
|
|
|
|
0.29296875f, 0.25390625f, 0.21484375f, 0.17578125f, 0.13671875f, 0.09765625f,
|
|
|
|
|
0.05859375f, 0.01953125f, 0.01171875f, 0.03515625f, 0.05859375f, 0.08203125f,
|
|
|
|
|
0.10546875f, 0.12890625f, 0.15234375f, 0.17578125f, 0.17578125f, 0.15234375f,
|
|
|
|
|
0.12890625f, 0.10546875f, 0.08203125f, 0.05859375f, 0.03515625f, 0.01171875f,
|
|
|
|
|
0.00390625f, 0.01171875f, 0.01953125f, 0.02734375f, 0.03515625f, 0.04296875f,
|
|
|
|
|
0.05078125f, 0.05859375f, 0.05859375f, 0.05078125f, 0.04296875f, 0.03515625f,
|
|
|
|
|
0.02734375f, 0.01953125f, 0.01171875f, 0.00390625f |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
namespace cv |
|
|
|
|
{ |
|
|
|
|
namespace ocl |
|
|
|
@ -78,38 +172,43 @@ namespace cv |
|
|
|
|
int cnblocks_win_x; |
|
|
|
|
int cnblocks_win_y; |
|
|
|
|
int cblock_hist_size; |
|
|
|
|
int cblock_hist_size_2up; |
|
|
|
|
int cdescr_size; |
|
|
|
|
int cdescr_width; |
|
|
|
|
int cdescr_height; |
|
|
|
|
|
|
|
|
|
void set_up_constants(int nbins, int block_stride_x, int block_stride_y, |
|
|
|
|
int nblocks_win_x, int nblocks_win_y); |
|
|
|
|
|
|
|
|
|
void compute_hists(int nbins, int block_stride_x, int blovck_stride_y, |
|
|
|
|
int height, int width, const cv::ocl::oclMat &grad, |
|
|
|
|
const cv::ocl::oclMat &qangle, float sigma, cv::ocl::oclMat &block_hists); |
|
|
|
|
int height, int width, float sigma, const cv::ocl::oclMat &grad, |
|
|
|
|
const cv::ocl::oclMat &qangle,
|
|
|
|
|
const cv::ocl::oclMat &gauss_w_lut, cv::ocl::oclMat &block_hists); |
|
|
|
|
|
|
|
|
|
void normalize_hists(int nbins, int block_stride_x, int block_stride_y, |
|
|
|
|
int height, int width, cv::ocl::oclMat &block_hists, float threshold); |
|
|
|
|
int height, int width, cv::ocl::oclMat &block_hists,
|
|
|
|
|
float threshold); |
|
|
|
|
|
|
|
|
|
void classify_hists(int win_height, int win_width, int block_stride_y, |
|
|
|
|
int block_stride_x, int win_stride_y, int win_stride_x, int height, |
|
|
|
|
int width, const cv::ocl::oclMat &block_hists, const cv::ocl::oclMat &coefs, float free_coef, |
|
|
|
|
int block_stride_x, int win_stride_y, int win_stride_x,
|
|
|
|
|
int height, int width, const cv::ocl::oclMat &block_hists,
|
|
|
|
|
const cv::ocl::oclMat &coefs, float free_coef, |
|
|
|
|
float threshold, cv::ocl::oclMat &labels); |
|
|
|
|
|
|
|
|
|
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, |
|
|
|
|
int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat &block_hists, |
|
|
|
|
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y,
|
|
|
|
|
int block_stride_x, int win_stride_y, int win_stride_x,
|
|
|
|
|
int height, int width, const cv::ocl::oclMat &block_hists, |
|
|
|
|
cv::ocl::oclMat &descriptors); |
|
|
|
|
void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x, |
|
|
|
|
int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat &block_hists, |
|
|
|
|
void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y,
|
|
|
|
|
int block_stride_x, int win_stride_y, int win_stride_x,
|
|
|
|
|
int height, int width, const cv::ocl::oclMat &block_hists, |
|
|
|
|
cv::ocl::oclMat &descriptors); |
|
|
|
|
|
|
|
|
|
void compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img, |
|
|
|
|
float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma); |
|
|
|
|
float angle_scale, cv::ocl::oclMat &grad,
|
|
|
|
|
cv::ocl::oclMat &qangle, bool correct_gamma); |
|
|
|
|
void compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img, |
|
|
|
|
float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma); |
|
|
|
|
|
|
|
|
|
void resize( const oclMat &src, oclMat &dst, const Size sz); |
|
|
|
|
float angle_scale, cv::ocl::oclMat &grad,
|
|
|
|
|
cv::ocl::oclMat &qangle, bool correct_gamma); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
@ -117,8 +216,14 @@ namespace cv |
|
|
|
|
|
|
|
|
|
using namespace ::cv::ocl::device; |
|
|
|
|
|
|
|
|
|
cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_, Size cell_size_, |
|
|
|
|
int nbins_, double win_sigma_, double threshold_L2hys_, bool gamma_correction_, int nlevels_) |
|
|
|
|
static inline int divUp(int total, int grain) |
|
|
|
|
{ |
|
|
|
|
return (total + grain - 1) / grain; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_,
|
|
|
|
|
Size cell_size_, int nbins_, double win_sigma_,
|
|
|
|
|
double threshold_L2hys_, bool gamma_correction_, int nlevels_) |
|
|
|
|
: win_size(win_size_), |
|
|
|
|
block_size(block_size_), |
|
|
|
|
block_stride(block_stride_), |
|
|
|
@ -132,19 +237,27 @@ cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size blo |
|
|
|
|
CV_Assert((win_size.width - block_size.width ) % block_stride.width == 0 && |
|
|
|
|
(win_size.height - block_size.height) % block_stride.height == 0); |
|
|
|
|
|
|
|
|
|
CV_Assert(block_size.width % cell_size.width == 0 && block_size.height % cell_size.height == 0); |
|
|
|
|
CV_Assert(block_size.width % cell_size.width == 0 &&
|
|
|
|
|
block_size.height % cell_size.height == 0); |
|
|
|
|
|
|
|
|
|
CV_Assert(block_stride == cell_size); |
|
|
|
|
|
|
|
|
|
CV_Assert(cell_size == Size(8, 8)); |
|
|
|
|
|
|
|
|
|
Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height); |
|
|
|
|
Size cells_per_block(block_size.width / cell_size.width,
|
|
|
|
|
block_size.height / cell_size.height); |
|
|
|
|
CV_Assert(cells_per_block == Size(2, 2)); |
|
|
|
|
|
|
|
|
|
cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride); |
|
|
|
|
hog::set_up_constants(nbins, block_stride.width, block_stride.height, blocks_per_win.width, blocks_per_win.height); |
|
|
|
|
hog::set_up_constants(nbins, block_stride.width, block_stride.height,
|
|
|
|
|
blocks_per_win.width, blocks_per_win.height); |
|
|
|
|
|
|
|
|
|
effect_size = Size(0, 0); |
|
|
|
|
|
|
|
|
|
if (queryDeviceInfo<IS_CPU_DEVICE, bool>()) |
|
|
|
|
hog_device_cpu = true; |
|
|
|
|
else |
|
|
|
|
hog_device_cpu = false; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
size_t cv::ocl::HOGDescriptor::getDescriptorSize() const |
|
|
|
@ -154,7 +267,8 @@ size_t cv::ocl::HOGDescriptor::getDescriptorSize() const |
|
|
|
|
|
|
|
|
|
size_t cv::ocl::HOGDescriptor::getBlockHistogramSize() const |
|
|
|
|
{ |
|
|
|
|
Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height); |
|
|
|
|
Size cells_per_block = Size(block_size.width / cell_size.width,
|
|
|
|
|
block_size.height / cell_size.height); |
|
|
|
|
return (size_t)(nbins * cells_per_block.area()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -167,7 +281,8 @@ bool cv::ocl::HOGDescriptor::checkDetectorSize() const |
|
|
|
|
{ |
|
|
|
|
size_t detector_size = detector.rows * detector.cols; |
|
|
|
|
size_t descriptor_size = getDescriptorSize(); |
|
|
|
|
return detector_size == 0 || detector_size == descriptor_size || detector_size == descriptor_size + 1; |
|
|
|
|
return detector_size == 0 || detector_size == descriptor_size ||
|
|
|
|
|
detector_size == descriptor_size + 1; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void cv::ocl::HOGDescriptor::setSVMDetector(const vector<float> &_detector) |
|
|
|
@ -207,10 +322,16 @@ void cv::ocl::HOGDescriptor::init_buffer(const oclMat &img, Size win_stride) |
|
|
|
|
|
|
|
|
|
const size_t block_hist_size = getBlockHistogramSize(); |
|
|
|
|
const Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride); |
|
|
|
|
block_hists.create(1, static_cast<int>(block_hist_size * blocks_per_img.area()), CV_32F); |
|
|
|
|
block_hists.create(1,
|
|
|
|
|
static_cast<int>(block_hist_size * blocks_per_img.area()) + 256, CV_32F); |
|
|
|
|
|
|
|
|
|
Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride); |
|
|
|
|
labels.create(1, wins_per_img.area(), CV_8U); |
|
|
|
|
|
|
|
|
|
vector<float> v_lut = vector<float>(gaussian_interp_lut, gaussian_interp_lut +
|
|
|
|
|
sizeof(gaussian_interp_lut) / sizeof(gaussian_interp_lut[0])); |
|
|
|
|
Mat m_lut(v_lut); |
|
|
|
|
gauss_w_lut.upload(m_lut.reshape(1,1)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle) |
|
|
|
@ -221,29 +342,34 @@ void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oc |
|
|
|
|
switch (img.type()) |
|
|
|
|
{ |
|
|
|
|
case CV_8UC1: |
|
|
|
|
hog::compute_gradients_8UC1(effect_size.height, effect_size.width, img, angleScale, grad, qangle, gamma_correction); |
|
|
|
|
hog::compute_gradients_8UC1(effect_size.height, effect_size.width, img,
|
|
|
|
|
angleScale, grad, qangle, gamma_correction); |
|
|
|
|
break; |
|
|
|
|
case CV_8UC4: |
|
|
|
|
hog::compute_gradients_8UC4(effect_size.height, effect_size.width, img, angleScale, grad, qangle, gamma_correction); |
|
|
|
|
hog::compute_gradients_8UC4(effect_size.height, effect_size.width, img,
|
|
|
|
|
angleScale, grad, qangle, gamma_correction); |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat &img) |
|
|
|
|
{ |
|
|
|
|
computeGradient(img, grad, qangle); |
|
|
|
|
computeGradient(img, this->grad, this->qangle); |
|
|
|
|
|
|
|
|
|
hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width, |
|
|
|
|
grad, qangle, (float)getWinSigma(), block_hists); |
|
|
|
|
hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height,
|
|
|
|
|
effect_size.width, (float)getWinSigma(), grad, qangle, gauss_w_lut, block_hists); |
|
|
|
|
|
|
|
|
|
hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width, |
|
|
|
|
block_hists, (float)threshold_L2hys); |
|
|
|
|
hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height,
|
|
|
|
|
effect_size.width, block_hists, (float)threshold_L2hys); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride, oclMat &descriptors, int descr_format) |
|
|
|
|
void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride,
|
|
|
|
|
oclMat &descriptors, int descr_format) |
|
|
|
|
{ |
|
|
|
|
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0); |
|
|
|
|
CV_Assert(win_stride.width % block_stride.width == 0 &&
|
|
|
|
|
win_stride.height % block_stride.height == 0); |
|
|
|
|
|
|
|
|
|
init_buffer(img, win_stride); |
|
|
|
|
|
|
|
|
@ -253,17 +379,20 @@ void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride, |
|
|
|
|
Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride); |
|
|
|
|
Size wins_per_img = numPartsWithin(effect_size, win_size, win_stride); |
|
|
|
|
|
|
|
|
|
descriptors.create(wins_per_img.area(), static_cast<int>(blocks_per_win.area() * block_hist_size), CV_32F); |
|
|
|
|
descriptors.create(wins_per_img.area(),
|
|
|
|
|
static_cast<int>(blocks_per_win.area() * block_hist_size), CV_32F); |
|
|
|
|
|
|
|
|
|
switch (descr_format) |
|
|
|
|
{ |
|
|
|
|
case DESCR_FORMAT_ROW_BY_ROW: |
|
|
|
|
hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width, |
|
|
|
|
win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors); |
|
|
|
|
hog::extract_descrs_by_rows(win_size.height, win_size.width,
|
|
|
|
|
block_stride.height, block_stride.width, win_stride.height, win_stride.width,
|
|
|
|
|
effect_size.height, effect_size.width, block_hists, descriptors); |
|
|
|
|
break; |
|
|
|
|
case DESCR_FORMAT_COL_BY_COL: |
|
|
|
|
hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width, |
|
|
|
|
win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors); |
|
|
|
|
hog::extract_descrs_by_cols(win_size.height, win_size.width,
|
|
|
|
|
block_stride.height, block_stride.width, win_stride.height, win_stride.width,
|
|
|
|
|
effect_size.height, effect_size.width, block_hists, descriptors); |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
CV_Error(CV_StsBadArg, "Unknown descriptor format"); |
|
|
|
@ -271,7 +400,8 @@ void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride, |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void cv::ocl::HOGDescriptor::detect(const oclMat &img, vector<Point> &hits, double hit_threshold, Size win_stride, Size padding) |
|
|
|
|
void cv::ocl::HOGDescriptor::detect(const oclMat &img, vector<Point> &hits,
|
|
|
|
|
double hit_threshold, Size win_stride, Size padding) |
|
|
|
|
{ |
|
|
|
|
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4); |
|
|
|
|
CV_Assert(padding == Size(0, 0)); |
|
|
|
@ -283,14 +413,16 @@ void cv::ocl::HOGDescriptor::detect(const oclMat &img, vector<Point> &hits, doub |
|
|
|
|
if (win_stride == Size()) |
|
|
|
|
win_stride = block_stride; |
|
|
|
|
else |
|
|
|
|
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0); |
|
|
|
|
CV_Assert(win_stride.width % block_stride.width == 0 &&
|
|
|
|
|
win_stride.height % block_stride.height == 0); |
|
|
|
|
init_buffer(img, win_stride); |
|
|
|
|
|
|
|
|
|
computeBlockHistograms(img); |
|
|
|
|
|
|
|
|
|
hog::classify_hists(win_size.height, win_size.width, block_stride.height, block_stride.width, |
|
|
|
|
win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, |
|
|
|
|
detector, (float)free_coef, (float)hit_threshold, labels); |
|
|
|
|
hog::classify_hists(win_size.height, win_size.width, block_stride.height,
|
|
|
|
|
block_stride.width, win_stride.height, win_stride.width,
|
|
|
|
|
effect_size.height, effect_size.width, block_hists, detector,
|
|
|
|
|
(float)free_coef, (float)hit_threshold, labels); |
|
|
|
|
|
|
|
|
|
labels.download(labels_host); |
|
|
|
|
unsigned char *vec = labels_host.ptr(); |
|
|
|
@ -306,8 +438,9 @@ void cv::ocl::HOGDescriptor::detect(const oclMat &img, vector<Point> &hits, doub |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, vector<Rect> &found_locations, double hit_threshold, |
|
|
|
|
Size win_stride, Size padding, double scale0, int group_threshold) |
|
|
|
|
void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, vector<Rect> &found_locations,
|
|
|
|
|
double hit_threshold, Size win_stride, Size padding,
|
|
|
|
|
double scale0, int group_threshold) |
|
|
|
|
{ |
|
|
|
|
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4); |
|
|
|
|
CV_Assert(scale0 > 1); |
|
|
|
@ -333,7 +466,8 @@ void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, vector<Rect> &f |
|
|
|
|
if (win_stride == Size()) |
|
|
|
|
win_stride = block_stride; |
|
|
|
|
else |
|
|
|
|
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0); |
|
|
|
|
CV_Assert(win_stride.width % block_stride.width == 0 &&
|
|
|
|
|
win_stride.height % block_stride.height == 0); |
|
|
|
|
init_buffer(img, win_stride); |
|
|
|
|
image_scale.create(img.size(), img.type()); |
|
|
|
|
|
|
|
|
@ -347,16 +481,18 @@ void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, vector<Rect> &f |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
hog::resize( img, image_scale, effect_size); |
|
|
|
|
resize(img, image_scale, effect_size); |
|
|
|
|
detect(image_scale, locations, hit_threshold, win_stride, padding); |
|
|
|
|
} |
|
|
|
|
Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale)); |
|
|
|
|
Size scaled_win_size(cvRound(win_size.width * scale),
|
|
|
|
|
cvRound(win_size.height * scale)); |
|
|
|
|
for (size_t j = 0; j < locations.size(); j++) |
|
|
|
|
all_candidates.push_back(Rect(Point2d((CvPoint)locations[j]) * scale, scaled_win_size)); |
|
|
|
|
all_candidates.push_back(Rect(Point2d((CvPoint)locations[j]) * scale,
|
|
|
|
|
scaled_win_size)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
found_locations.assign(all_candidates.begin(), all_candidates.end()); |
|
|
|
|
groupRectangles(found_locations, group_threshold, 0.2/*magic number copied from CPU version*/); |
|
|
|
|
groupRectangles(found_locations, group_threshold, 0.2); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
int cv::ocl::HOGDescriptor::numPartsWithin(int size, int part_size, int stride) |
|
|
|
@ -364,9 +500,11 @@ int cv::ocl::HOGDescriptor::numPartsWithin(int size, int part_size, int stride) |
|
|
|
|
return (size - part_size + stride) / stride; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
cv::Size cv::ocl::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size, cv::Size stride) |
|
|
|
|
cv::Size cv::ocl::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size,
|
|
|
|
|
cv::Size stride) |
|
|
|
|
{ |
|
|
|
|
return Size(numPartsWithin(size.width, part_size.width, stride.width), numPartsWithin(size.height, part_size.height, stride.height)); |
|
|
|
|
return Size(numPartsWithin(size.width, part_size.width, stride.width),
|
|
|
|
|
numPartsWithin(size.height, part_size.height, stride.height)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
std::vector<float> cv::ocl::HOGDescriptor::getDefaultPeopleDetector() |
|
|
|
@ -1547,8 +1685,9 @@ static int power_2up(unsigned int n) |
|
|
|
|
return -1; // Input is too big
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void cv::ocl::device::hog::set_up_constants(int nbins, int block_stride_x, int block_stride_y, |
|
|
|
|
int nblocks_win_x, int nblocks_win_y) |
|
|
|
|
void cv::ocl::device::hog::set_up_constants(int nbins,
|
|
|
|
|
int block_stride_x, int block_stride_y,
|
|
|
|
|
int nblocks_win_x, int nblocks_win_y) |
|
|
|
|
{ |
|
|
|
|
cnbins = nbins; |
|
|
|
|
cblock_stride_x = block_stride_x; |
|
|
|
@ -1559,53 +1698,32 @@ void cv::ocl::device::hog::set_up_constants(int nbins, int block_stride_x, int b |
|
|
|
|
int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y; |
|
|
|
|
cblock_hist_size = block_hist_size; |
|
|
|
|
|
|
|
|
|
int block_hist_size_2up = power_2up(block_hist_size); |
|
|
|
|
cblock_hist_size_2up = block_hist_size_2up; |
|
|
|
|
|
|
|
|
|
int descr_width = nblocks_win_x * block_hist_size; |
|
|
|
|
cdescr_width = descr_width; |
|
|
|
|
cdescr_height = nblocks_win_y; |
|
|
|
|
|
|
|
|
|
int descr_size = descr_width * nblocks_win_y; |
|
|
|
|
cdescr_size = descr_size; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline int divUp(int total, int grain) |
|
|
|
|
{ |
|
|
|
|
return (total + grain - 1) / grain; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void openCLExecuteKernel_hog(Context *clCxt , const char **source, string kernelName,
|
|
|
|
|
size_t globalThreads[3], size_t localThreads[3],
|
|
|
|
|
vector< pair<size_t, const void *> > &args) |
|
|
|
|
{ |
|
|
|
|
cl_kernel kernel = openCLGetKernelFromSource(clCxt, source, kernelName); |
|
|
|
|
size_t wave_size = queryDeviceInfo<WAVEFRONT_SIZE, size_t>(kernel); |
|
|
|
|
openCLSafeCall(clReleaseKernel(kernel)); |
|
|
|
|
if (wave_size <= 16) |
|
|
|
|
{ |
|
|
|
|
char build_options[64]; |
|
|
|
|
sprintf(build_options, (wave_size == 16) ? "-D WAVE_SIZE_16" : "-D WAVE_SIZE_1"); |
|
|
|
|
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, -1, build_options); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, -1); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int block_stride_y, |
|
|
|
|
int height, int width, const cv::ocl::oclMat &grad, |
|
|
|
|
const cv::ocl::oclMat &qangle, float sigma, cv::ocl::oclMat &block_hists) |
|
|
|
|
void cv::ocl::device::hog::compute_hists(int nbins,
|
|
|
|
|
int block_stride_x, int block_stride_y, |
|
|
|
|
int height, int width, float sigma,
|
|
|
|
|
const cv::ocl::oclMat &grad,
|
|
|
|
|
const cv::ocl::oclMat &qangle,
|
|
|
|
|
const cv::ocl::oclMat &gauss_w_lut,
|
|
|
|
|
cv::ocl::oclMat &block_hists) |
|
|
|
|
{ |
|
|
|
|
Context *clCxt = Context::getContext(); |
|
|
|
|
string kernelName = "compute_hists_kernel"; |
|
|
|
|
vector< pair<size_t, const void *> > args; |
|
|
|
|
string kernelName = (sigma == 4.0f) ? "compute_hists_lut_kernel" :
|
|
|
|
|
"compute_hists_kernel"; |
|
|
|
|
|
|
|
|
|
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; |
|
|
|
|
int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y; |
|
|
|
|
|
|
|
|
|
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x)
|
|
|
|
|
/ block_stride_x; |
|
|
|
|
int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y)
|
|
|
|
|
/ block_stride_y; |
|
|
|
|
int blocks_total = img_block_width * img_block_height; |
|
|
|
|
int blocks_in_group = 4; |
|
|
|
|
size_t localThreads[3] = { blocks_in_group * 24, 2, 1 }; |
|
|
|
|
size_t globalThreads[3] = { divUp(blocks_total, blocks_in_group) * localThreads[0], 2, 1 }; |
|
|
|
|
|
|
|
|
|
int grad_quadstep = grad.step >> 2; |
|
|
|
|
int qangle_step = qangle.step; |
|
|
|
@ -1613,6 +1731,11 @@ void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int bloc |
|
|
|
|
// Precompute gaussian spatial window parameter
|
|
|
|
|
float scale = 1.f / (2.f * sigma * sigma); |
|
|
|
|
|
|
|
|
|
int blocks_in_group = 4; |
|
|
|
|
size_t localThreads[3] = { blocks_in_group * 24, 2, 1 }; |
|
|
|
|
size_t globalThreads[3] = {
|
|
|
|
|
divUp(img_block_width * img_block_height, blocks_in_group) * localThreads[0], 2, 1 }; |
|
|
|
|
|
|
|
|
|
int hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12) * sizeof(float); |
|
|
|
|
int final_hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y) * sizeof(float); |
|
|
|
|
int smem = (hists_size + final_hists_size) * blocks_in_group; |
|
|
|
@ -1628,62 +1751,120 @@ void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int bloc |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&qangle_step)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&grad.data)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&qangle.data)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_float), (void *)&scale)); |
|
|
|
|
if (kernelName.compare("compute_hists_lut_kernel") == 0) |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&gauss_w_lut.data)); |
|
|
|
|
else |
|
|
|
|
args.push_back( make_pair( sizeof(cl_float), (void *)&scale)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); |
|
|
|
|
args.push_back( make_pair( smem, (void *)NULL)); |
|
|
|
|
|
|
|
|
|
openCLExecuteKernel_hog(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args); |
|
|
|
|
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
|
|
|
|
|
localThreads, args, -1, -1); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void cv::ocl::device::hog::normalize_hists(int nbins, int block_stride_x, int block_stride_y, |
|
|
|
|
int height, int width, cv::ocl::oclMat &block_hists, float threshold) |
|
|
|
|
void cv::ocl::device::hog::normalize_hists(int nbins,
|
|
|
|
|
int block_stride_x, int block_stride_y, |
|
|
|
|
int height, int width,
|
|
|
|
|
cv::ocl::oclMat &block_hists,
|
|
|
|
|
float threshold) |
|
|
|
|
{ |
|
|
|
|
Context *clCxt = Context::getContext(); |
|
|
|
|
string kernelName = "normalize_hists_kernel"; |
|
|
|
|
vector< pair<size_t, const void *> > args; |
|
|
|
|
string kernelName; |
|
|
|
|
|
|
|
|
|
int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y; |
|
|
|
|
int nthreads = power_2up(block_hist_size); |
|
|
|
|
|
|
|
|
|
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; |
|
|
|
|
int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y; |
|
|
|
|
size_t globalThreads[3] = { img_block_width * nthreads, img_block_height, 1 }; |
|
|
|
|
size_t localThreads[3] = { nthreads, 1, 1 }; |
|
|
|
|
|
|
|
|
|
if ((nthreads < 32) || (nthreads > 512) ) |
|
|
|
|
cv::ocl::error("normalize_hists: histogram's size is too small or too big", __FILE__, __LINE__, "normalize_hists"); |
|
|
|
|
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x)
|
|
|
|
|
/ block_stride_x; |
|
|
|
|
int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y)
|
|
|
|
|
/ block_stride_y; |
|
|
|
|
int nthreads; |
|
|
|
|
size_t globalThreads[3] = { 1, 1, 1 }; |
|
|
|
|
size_t localThreads[3] = { 1, 1, 1 }; |
|
|
|
|
|
|
|
|
|
if ( nbins == 9 ) |
|
|
|
|
{ |
|
|
|
|
/* optimized for the case of 9 bins */ |
|
|
|
|
kernelName = "normalize_hists_36_kernel"; |
|
|
|
|
int blocks_in_group = NTHREADS / block_hist_size; |
|
|
|
|
nthreads = blocks_in_group * block_hist_size; |
|
|
|
|
int num_groups = divUp( img_block_width * img_block_height, blocks_in_group); |
|
|
|
|
globalThreads[0] = nthreads * num_groups; |
|
|
|
|
localThreads[0] = nthreads; |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
kernelName = "normalize_hists_kernel"; |
|
|
|
|
nthreads = power_2up(block_hist_size); |
|
|
|
|
globalThreads[0] = img_block_width * nthreads; |
|
|
|
|
globalThreads[1] = img_block_height; |
|
|
|
|
localThreads[0] = nthreads; |
|
|
|
|
|
|
|
|
|
if ((nthreads < 32) || (nthreads > 512) ) |
|
|
|
|
cv::ocl::error("normalize_hists: histogram's size is too small or too big",
|
|
|
|
|
__FILE__, __LINE__, "normalize_hists"); |
|
|
|
|
|
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&nthreads)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&block_hist_size)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&nthreads)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&block_hist_size)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_float), (void *)&threshold)); |
|
|
|
|
args.push_back( make_pair( nthreads * sizeof(float), (void *)NULL)); |
|
|
|
|
|
|
|
|
|
openCLExecuteKernel_hog(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args); |
|
|
|
|
if(hog_device_cpu) |
|
|
|
|
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
|
|
|
|
|
localThreads, args, -1, -1, "-D CPU"); |
|
|
|
|
else |
|
|
|
|
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
|
|
|
|
|
localThreads, args, -1, -1); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int block_stride_y, |
|
|
|
|
int block_stride_x, int win_stride_y, int win_stride_x, int height, |
|
|
|
|
int width, const cv::ocl::oclMat &block_hists, const cv::ocl::oclMat &coefs, float free_coef, |
|
|
|
|
float threshold, cv::ocl::oclMat &labels) |
|
|
|
|
void cv::ocl::device::hog::classify_hists(int win_height, int win_width,
|
|
|
|
|
int block_stride_y, int block_stride_x,
|
|
|
|
|
int win_stride_y, int win_stride_x,
|
|
|
|
|
int height, int width,
|
|
|
|
|
const cv::ocl::oclMat &block_hists,
|
|
|
|
|
const cv::ocl::oclMat &coefs,
|
|
|
|
|
float free_coef, float threshold,
|
|
|
|
|
cv::ocl::oclMat &labels) |
|
|
|
|
{ |
|
|
|
|
Context *clCxt = Context::getContext(); |
|
|
|
|
string kernelName = "classify_hists_kernel"; |
|
|
|
|
vector< pair<size_t, const void *> > args; |
|
|
|
|
|
|
|
|
|
int nthreads; |
|
|
|
|
string kernelName; |
|
|
|
|
switch (cdescr_width) |
|
|
|
|
{ |
|
|
|
|
case 180: |
|
|
|
|
nthreads = 180; |
|
|
|
|
kernelName = "classify_hists_180_kernel"; |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_width)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_height)); |
|
|
|
|
break; |
|
|
|
|
case 252: |
|
|
|
|
nthreads = 256; |
|
|
|
|
kernelName = "classify_hists_252_kernel"; |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_width)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_height)); |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
nthreads = 256; |
|
|
|
|
kernelName = "classify_hists_kernel"; |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_size)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_width)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
int win_block_stride_x = win_stride_x / block_stride_x; |
|
|
|
|
int win_block_stride_y = win_stride_y / block_stride_y; |
|
|
|
|
int img_win_width = (width - win_width + win_stride_x) / win_stride_x; |
|
|
|
|
int img_win_height = (height - win_height + win_stride_y) / win_stride_y; |
|
|
|
|
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; |
|
|
|
|
|
|
|
|
|
size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 }; |
|
|
|
|
size_t localThreads[3] = { NTHREADS, 1, 1 }; |
|
|
|
|
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
|
|
|
|
|
block_stride_x; |
|
|
|
|
|
|
|
|
|
size_t globalThreads[3] = { img_win_width * nthreads, img_win_height, 1 }; |
|
|
|
|
size_t localThreads[3] = { nthreads, 1, 1 }; |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_hist_size)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_size)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_width)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&img_win_width)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&win_block_stride_x)); |
|
|
|
@ -1694,12 +1875,20 @@ void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int blo |
|
|
|
|
args.push_back( make_pair( sizeof(cl_float), (void *)&threshold)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&labels.data)); |
|
|
|
|
|
|
|
|
|
openCLExecuteKernel_hog(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args); |
|
|
|
|
if(hog_device_cpu) |
|
|
|
|
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
|
|
|
|
|
localThreads, args, -1, -1, "-D CPU"); |
|
|
|
|
else |
|
|
|
|
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
|
|
|
|
|
localThreads, args, -1, -1); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, |
|
|
|
|
int win_stride_y, int win_stride_x, int height, int width, |
|
|
|
|
const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors) |
|
|
|
|
void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width,
|
|
|
|
|
int block_stride_y, int block_stride_x, |
|
|
|
|
int win_stride_y, int win_stride_x,
|
|
|
|
|
int height, int width, |
|
|
|
|
const cv::ocl::oclMat &block_hists,
|
|
|
|
|
cv::ocl::oclMat &descriptors) |
|
|
|
|
{ |
|
|
|
|
Context *clCxt = Context::getContext(); |
|
|
|
|
string kernelName = "extract_descrs_by_rows_kernel"; |
|
|
|
@ -1709,7 +1898,8 @@ void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, |
|
|
|
|
int win_block_stride_y = win_stride_y / block_stride_y; |
|
|
|
|
int img_win_width = (width - win_width + win_stride_x) / win_stride_x; |
|
|
|
|
int img_win_height = (height - win_height + win_stride_y) / win_stride_y; |
|
|
|
|
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; |
|
|
|
|
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
|
|
|
|
|
block_stride_x; |
|
|
|
|
int descriptors_quadstep = descriptors.step >> 2; |
|
|
|
|
|
|
|
|
|
size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 }; |
|
|
|
@ -1725,12 +1915,16 @@ void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data)); |
|
|
|
|
|
|
|
|
|
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); |
|
|
|
|
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
|
|
|
|
|
localThreads, args, -1, -1); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x, |
|
|
|
|
int win_stride_y, int win_stride_x, int height, int width, |
|
|
|
|
const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors) |
|
|
|
|
void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width,
|
|
|
|
|
int block_stride_y, int block_stride_x, |
|
|
|
|
int win_stride_y, int win_stride_x,
|
|
|
|
|
int height, int width, |
|
|
|
|
const cv::ocl::oclMat &block_hists,
|
|
|
|
|
cv::ocl::oclMat &descriptors) |
|
|
|
|
{ |
|
|
|
|
Context *clCxt = Context::getContext(); |
|
|
|
|
string kernelName = "extract_descrs_by_cols_kernel"; |
|
|
|
@ -1740,7 +1934,8 @@ void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, |
|
|
|
|
int win_block_stride_y = win_stride_y / block_stride_y; |
|
|
|
|
int img_win_width = (width - win_width + win_stride_x) / win_stride_x; |
|
|
|
|
int img_win_height = (height - win_height + win_stride_y) / win_stride_y; |
|
|
|
|
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; |
|
|
|
|
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
|
|
|
|
|
block_stride_x; |
|
|
|
|
int descriptors_quadstep = descriptors.step >> 2; |
|
|
|
|
|
|
|
|
|
size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 }; |
|
|
|
@ -1757,11 +1952,16 @@ void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data)); |
|
|
|
|
|
|
|
|
|
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); |
|
|
|
|
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
|
|
|
|
|
localThreads, args, -1, -1); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img, |
|
|
|
|
float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma) |
|
|
|
|
void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width,
|
|
|
|
|
const cv::ocl::oclMat &img, |
|
|
|
|
float angle_scale,
|
|
|
|
|
cv::ocl::oclMat &grad,
|
|
|
|
|
cv::ocl::oclMat &qangle,
|
|
|
|
|
bool correct_gamma) |
|
|
|
|
{ |
|
|
|
|
Context *clCxt = Context::getContext(); |
|
|
|
|
string kernelName = "compute_gradients_8UC1_kernel"; |
|
|
|
@ -1786,11 +1986,16 @@ void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const c |
|
|
|
|
args.push_back( make_pair( sizeof(cl_char), (void *)&correctGamma)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins)); |
|
|
|
|
|
|
|
|
|
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); |
|
|
|
|
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
|
|
|
|
|
localThreads, args, -1, -1); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img, |
|
|
|
|
float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma) |
|
|
|
|
void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width,
|
|
|
|
|
const cv::ocl::oclMat &img, |
|
|
|
|
float angle_scale,
|
|
|
|
|
cv::ocl::oclMat &grad,
|
|
|
|
|
cv::ocl::oclMat &qangle,
|
|
|
|
|
bool correct_gamma) |
|
|
|
|
{ |
|
|
|
|
Context *clCxt = Context::getContext(); |
|
|
|
|
string kernelName = "compute_gradients_8UC4_kernel"; |
|
|
|
@ -1816,39 +2021,6 @@ void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const c |
|
|
|
|
args.push_back( make_pair( sizeof(cl_char), (void *)&correctGamma)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins)); |
|
|
|
|
|
|
|
|
|
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void cv::ocl::device::hog::resize( const oclMat &src, oclMat &dst, const Size sz) |
|
|
|
|
{ |
|
|
|
|
CV_Assert( (src.channels() == dst.channels()) ); |
|
|
|
|
Context *clCxt = Context::getContext(); |
|
|
|
|
|
|
|
|
|
string kernelName = (src.type() == CV_8UC1) ? "resize_8UC1_kernel" : "resize_8UC4_kernel"; |
|
|
|
|
size_t blkSizeX = 16, blkSizeY = 16; |
|
|
|
|
size_t glbSizeX = sz.width % blkSizeX == 0 ? sz.width : (sz.width / blkSizeX + 1) * blkSizeX; |
|
|
|
|
size_t glbSizeY = sz.height % blkSizeY == 0 ? sz.height : (sz.height / blkSizeY + 1) * blkSizeY; |
|
|
|
|
size_t globalThreads[3] = {glbSizeX, glbSizeY, 1}; |
|
|
|
|
size_t localThreads[3] = {blkSizeX, blkSizeY, 1}; |
|
|
|
|
|
|
|
|
|
float ifx = (float)src.cols / sz.width; |
|
|
|
|
float ify = (float)src.rows / sz.height; |
|
|
|
|
int src_step = static_cast<int>(src.step); |
|
|
|
|
int dst_step = static_cast<int>(dst.step); |
|
|
|
|
|
|
|
|
|
vector< pair<size_t, const void *> > args; |
|
|
|
|
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_int), (void *)&src_step)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_int), (void *)&sz.width)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_int), (void *)&sz.height)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_float), (void *)&ifx)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_float), (void *)&ify)); |
|
|
|
|
|
|
|
|
|
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); |
|
|
|
|
} |
|
|
|
|
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
|
|
|
|
|
localThreads, args, -1, -1); |
|
|
|
|
} |