diff --git a/modules/gpu/src/cuda/hog.cu b/modules/gpu/src/cuda/hog.cu index 3dbaca037c..523bde5945 100644 --- a/modules/gpu/src/cuda/hog.cu +++ b/modules/gpu/src/cuda/hog.cu @@ -198,8 +198,8 @@ __global__ void compute_hists_kernel_many_blocks(const int img_block_width, cons void compute_hists(int nbins, int block_stride_x, int block_stride_y, - int height, int width, const DevMem2Df& grad, - const DevMem2D& qangle, float sigma, float* block_hists) + int height, int width, const DevMem2Df& grad, + const DevMem2D& qangle, float sigma, float* block_hists) { const int nblocks = 1; @@ -300,7 +300,7 @@ __global__ void normalize_hists_kernel_many_blocks(const int block_hist_size, void normalize_hists(int nbins, int block_stride_x, int block_stride_y, - int height, int width, float* block_hists, float threshold) + int height, int width, float* block_hists, float threshold) { const int nblocks = 1; @@ -336,6 +336,7 @@ void normalize_hists(int nbins, int block_stride_x, int block_stride_y, template // Number of histogram block processed by single GPU thread block __global__ void classify_hists_kernel_many_blocks(const int img_win_width, const int img_block_width, + const int win_block_stride_x, const int win_block_stride_y, const float* block_hists, const float* coefs, float free_coef, float threshold, unsigned char* labels) { @@ -343,8 +344,8 @@ __global__ void classify_hists_kernel_many_blocks(const int img_win_width, const if (blockIdx.x * blockDim.z + win_x >= img_win_width) return; - const float* hist = block_hists + (blockIdx.y * img_block_width + - blockIdx.x * blockDim.z + win_x) * + const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width + + blockIdx.x * win_block_stride_x * blockDim.z + win_x) * cblock_hist_size; float product = 0.f; @@ -397,15 +398,18 @@ __global__ void classify_hists_kernel_many_blocks(const int img_win_width, const // We only support win_stride_x == block_stride_x, win_stride_y == block_stride_y -void classify_hists(int win_height, int win_width, int block_stride_x, int block_stride_y, - int height, int width, float* block_hists, float* coefs, - float free_coef, float threshold, unsigned char* labels) +void classify_hists(int win_height, int win_width, int block_stride_y, int block_stride_x, + int win_stride_y, int win_stride_x, + int height, int width, float* block_hists, float* coefs, + float free_coef, float threshold, unsigned char* labels) { const int nthreads = 256; const int nblocks = 1; - int img_win_width = (width - win_width + block_stride_x) / block_stride_x; - int img_win_height = (height - win_height + block_stride_y) / block_stride_y; + int win_block_stride_x = win_stride_x / block_stride_x; + int win_block_stride_y = win_stride_y / block_stride_y; + int img_win_width = (width - win_width + win_stride_x) / win_stride_x; + int img_win_height = (height - win_height + win_stride_y) / win_stride_y; dim3 threads(nthreads, 1, nblocks); dim3 grid(div_up(img_win_width, nblocks), img_win_height); @@ -416,7 +420,8 @@ void classify_hists(int win_height, int win_width, int block_stride_x, int block int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; classify_hists_kernel_many_blocks<<>>( - img_win_width, img_block_width, block_hists, coefs, free_coef, threshold, labels); + img_win_width, img_block_width, win_block_stride_x, win_block_stride_y, + block_hists, coefs, free_coef, threshold, labels); cudaSafeCall(cudaThreadSynchronize()); } @@ -524,7 +529,7 @@ __global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrEl void compute_gradients_8UC4(int nbins, int height, int width, const DevMem2D& img, - float angle_scale, DevMem2Df grad, DevMem2D qangle) + float angle_scale, DevMem2Df grad, DevMem2D qangle) { const int nthreads = 256; @@ -580,7 +585,7 @@ __global__ void compute_gradients_8UC1_kernel(int height, int width, const PtrEl void compute_gradients_8UC1(int nbins, int height, int width, const DevMem2D& img, - float angle_scale, DevMem2Df grad, DevMem2D qangle) + float angle_scale, DevMem2Df grad, DevMem2D qangle) { const int nthreads = 256; diff --git a/modules/gpu/src/hog.cpp b/modules/gpu/src/hog.cpp index 80247471d7..40c3541312 100644 --- a/modules/gpu/src/hog.cpp +++ b/modules/gpu/src/hog.cpp @@ -73,9 +73,10 @@ void compute_hists(int nbins, int block_stride_x, int blovck_stride_y, void normalize_hists(int nbins, int block_stride_x, int block_stride_y, int height, int width, float* block_hists, float threshold); -void classify_hists(int win_height, int win_width, int block_stride_x, - int block_stride_y, int height, int width, float* block_hists, - float* coefs, float free_coef, float threshold, unsigned char* labels); +void classify_hists(int win_height, int win_width, int block_stride_y, + int block_stride_x, int win_stride_y, int win_stride_x, int height, + int width, float* block_hists, float* coefs, float free_coef, + float threshold, unsigned char* labels); void compute_gradients_8UC1(int nbins, int height, int width, const cv::gpu::DevMem2D& img, float angle_scale, cv::gpu::DevMem2Df grad, cv::gpu::DevMem2D qangle); @@ -209,7 +210,8 @@ void cv::gpu::HOGDescriptor::detect(const GpuMat& img, vector& hits, doub if (win_stride == Size()) win_stride = block_stride; else - CV_Assert(win_stride == block_stride); + CV_Assert(win_stride.width % block_stride.width == 0 && + win_stride.height % block_stride.height == 0); CV_Assert(padding == Size(0, 0)); @@ -229,8 +231,8 @@ void cv::gpu::HOGDescriptor::detect(const GpuMat& img, vector& hits, doub block_hists.ptr(), (float)threshold_L2hys); hog::classify_hists(win_size.height, win_size.width, block_stride.height, block_stride.width, - img.rows, img.cols, block_hists.ptr(), detector.ptr(), - (float)free_coef, (float)hit_threshold, labels.ptr()); + win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr(), + detector.ptr(), (float)free_coef, (float)hit_threshold, labels.ptr()); labels.download(labels_host); unsigned char* vec = labels_host.ptr(); diff --git a/samples/gpu/gpu_hog.cpp b/samples/gpu/gpu_hog.cpp index d1e1a3207b..53370ba1b4 100644 --- a/samples/gpu/gpu_hog.cpp +++ b/samples/gpu/gpu_hog.cpp @@ -31,6 +31,8 @@ public: int gr_threshold; double hit_threshold; int win_width; + int win_stride_width; + int win_stride_height; }; @@ -94,6 +96,8 @@ int main(int argc, char** argv) << " [-scale ] # HOG window scale factor\n" << " [-nlevels ] # max number of HOG window scales\n" << " [-win_width ] # width of the window (48 or 64)\n" + << " [-win_stride_width ] # distance by OX axis between neighbour wins\n" + << " [-win_stride_height ] # distance by OY axis between neighbour wins\n" << " [-gr_threshold ] # merging similar rects constant\n"; return 1; } @@ -118,6 +122,8 @@ Settings::Settings() gr_threshold = 8; hit_threshold = 1.4; win_width = 48; + win_stride_width = 8; + win_stride_height = 8; } @@ -139,6 +145,8 @@ Settings Settings::Read(int argc, char** argv) else if (key == "-scale") settings.scale = atof(val.c_str()); else if (key == "-nlevels") settings.nlevels = atoi(val.c_str()); else if (key == "-win_width") settings.win_width = atoi(val.c_str()); + else if (key == "-win_stride_width") settings.win_stride_width = atoi(val.c_str()); + else if (key == "-win_stride_height") settings.win_stride_height = atoi(val.c_str()); else if (key == "-gr_threshold") settings.gr_threshold = atoi(val.c_str()); else throw exception((string("Unknown key: ") + key).c_str()); } @@ -152,13 +160,13 @@ App::App(const Settings &s) { settings = s; cout << "\nControls:\n" - << "ESC - exit\n" - << "m - change mode GPU <-> CPU\n" - << "g - convert image to gray or not\n" - << "1/q - increase/decrease HOG scale\n" - << "2/w - increase/decrease levels count\n" - << "3/e - increase/decrease HOG group threshold\n" - << "4/r - increase/decrease hit threshold\n" + << "\tESC - exit\n" + << "\tm - change mode GPU <-> CPU\n" + << "\tg - convert image to gray or not\n" + << "\t1/q - increase/decrease HOG scale\n" + << "\t2/w - increase/decrease levels count\n" + << "\t3/e - increase/decrease HOG group threshold\n" + << "\t4/r - increase/decrease hit threshold\n" << endl; use_gpu = true; @@ -171,10 +179,11 @@ App::App(const Settings &s) if (settings.win_width != 64 && settings.win_width != 48) settings.win_width = 64; - cout << endl << "Scale: " << scale << endl; + cout << "Scale: " << scale << endl; cout << "Group threshold: " << gr_threshold << endl; cout << "Levels number: " << nlevels << endl; cout << "Win width: " << settings.win_width << endl; + cout << "Win stride: (" << settings.win_stride_width << ", " << settings.win_stride_height << ")\n"; cout << "Hit threshold: " << hit_threshold << endl; cout << endl; } @@ -185,10 +194,11 @@ void App::RunOpencvGui() running = true; Size win_size(settings.win_width, settings.win_width * 2); //(64, 128) or (48, 96) + Size win_stride(settings.win_stride_width, settings.win_stride_height); vector detector; - if (win_size == Size(64,128)) + if (win_size == Size(64, 128)) detector = cv::gpu::HOGDescriptor::getPeopleDetector_64x128(); else detector = cv::gpu::HOGDescriptor::getPeopleDetector_48x96(); @@ -198,7 +208,7 @@ void App::RunOpencvGui() gpu_hog.setSVMDetector(detector); // CPU's HOG classifier - cv::HOGDescriptor cpu_hog(win_size, Size(16,16), Size(8,8), Size(8,8), 9, 1, -1, HOGDescriptor::L2Hys, 0.2, true, HOGDescriptor::DEFAULT_NLEVELS); + cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1, HOGDescriptor::L2Hys, 0.2, true, HOGDescriptor::DEFAULT_NLEVELS); cpu_hog.setSVMDetector(detector); // Make endless cycle from video (if src is video) @@ -250,10 +260,10 @@ void App::RunOpencvGui() if (use_gpu) { gpu_img = img; - gpu_hog.detectMultiScale(gpu_img, found, hit_threshold, Size(8, 8), Size(0, 0), scale, gr_threshold); + gpu_hog.detectMultiScale(gpu_img, found, hit_threshold, win_stride, Size(0, 0), scale, gr_threshold); } else - cpu_hog.detectMultiScale(img, found, hit_threshold, Size(8, 8), Size(0, 0), scale, gr_threshold); + cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride, Size(0, 0), scale, gr_threshold); HogWorkEnd(); // Draw positive classified windows