added support of different descriptor formats into gpu HOGDescriptor

pull/13383/head
Alexey Spizhevoy 14 years ago
parent faf4d0bc74
commit 6a9d022a9f
  1. 28
      modules/gpu/include/opencv2/gpu/gpu.hpp
  2. 65
      modules/gpu/src/cuda/hog.cu
  3. 33
      modules/gpu/src/hog.cpp
  4. 81
      tests/gpu/src/hog.cpp

@ -1007,11 +1007,15 @@ namespace cv
GpuMat table_space;
};
//////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
struct CV_EXPORTS HOGDescriptor
{
public:
enum { DEFAULT_WIN_SIGMA = -1 };
enum { DEFAULT_NLEVELS = 64 };
enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16),
Size block_stride=Size(8, 8), Size cell_size=Size(8, 8),
@ -1029,13 +1033,14 @@ namespace cv
void setSVMDetector(const vector<float>& detector);
bool checkDetectorSize() const;
void computeBlockHistograms(const GpuMat& img);
void detect(const GpuMat& img, vector<Point>& found_locations, double hit_threshold=0,
Size win_stride=Size(), Size padding=Size());
void detectMultiScale(const GpuMat& img, vector<Rect>& found_locations,
double hit_threshold=0, Size win_stride=Size(), Size padding=Size(),
double scale0=1.05, int group_threshold=2);
void getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors);
void getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors,
int descr_format=DESCR_FORMAT_COL_BY_COL);
Size win_size;
Size block_size;
@ -1044,9 +1049,17 @@ namespace cv
int nbins;
double win_sigma;
double threshold_L2hys;
bool gamma_correction;
int nlevels;
protected:
void computeBlockHistograms(const GpuMat& img);
void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle);
static int numPartsWithin(int size, int part_size, int stride);
static Size numPartsWithin(Size size, Size part_size, Size stride);
bool gamma_correction;
// Coefficients of the separating plane
float free_coef;
GpuMat detector;
@ -1058,13 +1071,8 @@ namespace cv
// Results of the last histogram evaluation step
GpuMat block_hists;
private:
static int numPartsWithin(int size, int part_size, int stride);
static Size numPartsWithin(Size size, Size part_size, Size stride);
void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle);
GpuMat grad, qangle;
// Gradients conputation results
GpuMat grad, qangle;
};
}

@ -428,9 +428,9 @@ void classify_hists(int win_height, int win_width, int block_stride_y, int block
template <int nthreads>
__global__ void extract_descriptors_kernel(const int img_win_width, const int img_block_width,
const int win_block_stride_x, const int win_block_stride_y,
const float* block_hists, PtrElemStepf descriptors)
__global__ void extract_descrs_by_rows_kernel(const int img_block_width, const int win_block_stride_x,
const int win_block_stride_y, const float* block_hists,
PtrElemStepf descriptors)
{
// Get left top corner of the window in src
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
@ -449,9 +449,9 @@ __global__ void extract_descriptors_kernel(const int img_win_width, const int im
}
void extract_descriptors(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
DevMem2Df descriptors)
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
DevMem2Df descriptors)
{
const int nthreads = 256;
@ -464,9 +464,56 @@ void extract_descriptors(int win_height, int win_width, int block_stride_y, int
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
block_stride_x;
extract_descriptors_kernel<nthreads><<<grid, threads>>>(
img_win_width, img_block_width, win_block_stride_x, win_block_stride_y,
block_hists, descriptors);
extract_descrs_by_rows_kernel<nthreads><<<grid, threads>>>(
img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
cudaSafeCall(cudaThreadSynchronize());
}
template <int nthreads>
__global__ void extract_descrs_by_cols_kernel(const int img_block_width, const int win_block_stride_x,
const int win_block_stride_y, const float* block_hists,
PtrElemStepf descriptors)
{
// Get left top corner of the window in src
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
blockIdx.x * win_block_stride_x) * cblock_hist_size;
// Get left top corner of the window in dst
float* descriptor = descriptors.ptr(blockIdx.y * gridDim.x + blockIdx.x);
// Copy elements from src to dst
for (int i = threadIdx.x; i < cdescr_size; i += nthreads)
{
int block_idx = i / cblock_hist_size;
int idx_in_block = i - block_idx * cblock_hist_size;
int y = block_idx / cnblocks_win_x;
int x = block_idx - y * cnblocks_win_x;
descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block]
= hist[(y * img_block_width + x) * cblock_hist_size + idx_in_block];
}
}
void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
DevMem2Df descriptors)
{
const int nthreads = 256;
int win_block_stride_x = win_stride_x / block_stride_x;
int win_block_stride_y = win_stride_y / block_stride_y;
int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
dim3 threads(nthreads, 1);
dim3 grid(img_win_width, img_win_height);
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
block_stride_x;
extract_descrs_by_cols_kernel<nthreads><<<grid, threads>>>(
img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
cudaSafeCall(cudaThreadSynchronize());
}

@ -50,11 +50,9 @@ size_t cv::gpu::HOGDescriptor::getBlockHistogramSize() const { throw_nogpu(); re
double cv::gpu::HOGDescriptor::getWinSigma() const { throw_nogpu(); return 0; }
bool cv::gpu::HOGDescriptor::checkDetectorSize() const { throw_nogpu(); return false; }
void cv::gpu::HOGDescriptor::setSVMDetector(const vector<float>&) { throw_nogpu(); }
void cv::gpu::HOGDescriptor::computeGradient(const GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); }
void cv::gpu::HOGDescriptor::computeBlockHistograms(const GpuMat&) { throw_nogpu(); }
void cv::gpu::HOGDescriptor::detect(const GpuMat&, vector<Point>&, double, Size, Size) { throw_nogpu(); }
void cv::gpu::HOGDescriptor::detectMultiScale(const GpuMat&, vector<Rect>&, double, Size, Size, double, int) { throw_nogpu(); }
void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat&, Size, GpuMat&) { throw_nogpu(); }
void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat&, Size, GpuMat&, int) { throw_nogpu(); }
std::vector<float> cv::gpu::HOGDescriptor::getDefaultPeopleDetector() { throw_nogpu(); return std::vector<float>(); }
std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector_48x96() { throw_nogpu(); return std::vector<float>(); }
std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector_64x128() { throw_nogpu(); return std::vector<float>(); }
@ -78,9 +76,12 @@ void classify_hists(int win_height, int win_width, int block_stride_y,
int width, float* block_hists, float* coefs, float free_coef,
float threshold, unsigned char* labels);
void extract_descriptors(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
cv::gpu::DevMem2Df descriptors);
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
cv::gpu::DevMem2Df descriptors);
void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
cv::gpu::DevMem2Df descriptors);
void compute_gradients_8UC1(int nbins, int height, int width, const cv::gpu::DevMem2D& img,
float angle_scale, cv::gpu::DevMem2Df grad, cv::gpu::DevMem2D qangle);
@ -218,7 +219,7 @@ void cv::gpu::HOGDescriptor::computeBlockHistograms(const GpuMat& img)
}
void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors)
void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors, int descr_format)
{
CV_Assert(win_stride.width % block_stride.width == 0 &&
win_stride.height % block_stride.height == 0);
@ -231,9 +232,21 @@ void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride,
descriptors.create(wins_per_img.area(), blocks_per_win.area() * block_hist_size, CV_32F);
hog::extract_descriptors(win_size.height, win_size.width, block_stride.height, block_stride.width,
win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(),
descriptors);
switch (descr_format)
{
case DESCR_FORMAT_ROW_BY_ROW:
hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width,
win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(),
descriptors);
break;
case DESCR_FORMAT_COL_BY_COL:
hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width,
win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(),
descriptors);
break;
default:
CV_Error(CV_StsBadArg, "Unknown descriptor format");
}
}

@ -51,9 +51,9 @@ using namespace std;
ts->set_failed_test_info(err); \
return; }
struct CV_GpuHogDetectionTest: public CvTest
struct CV_GpuHogDetectionTest: public CvTest, public cv::gpu::HOGDescriptor
{
CV_GpuHogDetectionTest(): CvTest( "GPU-HOG-detect", "HOGDescriptorDetection" ) {}
CV_GpuHogDetectionTest(): CvTest("GPU-HOG-detect", "HOGDescriptorDetection") {}
void run(int)
{
@ -141,54 +141,53 @@ struct CV_GpuHogDetectionTest: public CvTest
{
cv::gpu::GpuMat d_img(img);
cv::gpu::HOGDescriptor hog;
hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
//cpu detector may be updated soon
//hog.setSVMDetector(cv::HOGDescriptor::getDefaultPeopleDetector());
std::vector<cv::Point> locations;
// Test detect
hog.detect(d_img, locations, 0);
detect(d_img, locations, 0);
#ifdef DUMP
dump(hog.block_hists, locations);
dump(block_hists, locations);
#else
compare(hog.block_hists, locations);
compare(block_hists, locations);
#endif
// Test detect on smaller image
cv::gpu::GpuMat d_img2;
cv::gpu::resize(d_img, d_img2, cv::Size(d_img.cols / 2, d_img.rows / 2));
hog.detect(d_img2, locations, 0);
detect(d_img2, locations, 0);
#ifdef DUMP
dump(hog.block_hists, locations);
dump(block_hists, locations);
#else
compare(hog.block_hists, locations);
compare(block_hists, locations);
#endif
// Test detect on greater image
cv::gpu::resize(d_img, d_img2, cv::Size(d_img.cols * 2, d_img.rows * 2));
hog.detect(d_img2, locations, 0);
detect(d_img2, locations, 0);
#ifdef DUMP
dump(hog.block_hists, locations);
dump(block_hists, locations);
#else
compare(hog.block_hists, locations);
compare(block_hists, locations);
#endif
// Test detectMultiScale
std::vector<cv::Rect> rects;
size_t nrects;
hog.detectMultiScale(d_img, rects, 0, cv::Size(8, 8), cv::Size(), 1.05, 2);
detectMultiScale(d_img, rects, 0, cv::Size(8, 8), cv::Size(), 1.05, 2);
#ifdef DUMP
nrects = rects.size();
f.write((char*)&nrects, sizeof(nrects));
for (size_t i = 0; i < rects.size(); ++i)
f.write((char*)&rects[i], sizeof(rects[i]));
dump(hog.block_hists, std::vector<cv::Point>());
dump(block_hists, std::vector<cv::Point>());
#else
f.read((char*)&nrects, sizeof(nrects));
CHECK(nrects == rects.size(), CvTS::FAIL_INVALID_OUTPUT)
@ -198,7 +197,7 @@ struct CV_GpuHogDetectionTest: public CvTest
f.read((char*)&rect, sizeof(rect));
CHECK(rect == rects[i], CvTS::FAIL_INVALID_OUTPUT);
}
compare(hog.block_hists, std::vector<cv::Point>());
compare(block_hists, std::vector<cv::Point>());
#endif
}
@ -211,9 +210,10 @@ struct CV_GpuHogDetectionTest: public CvTest
} gpu_hog_detection_test;
struct CV_GpuHogGetDescriptorsTest: public CvTest
struct CV_GpuHogGetDescriptorsTest: public CvTest, public cv::gpu::HOGDescriptor
{
CV_GpuHogGetDescriptorsTest(): CvTest("GPU-HOG-getDescriptors", "HOGDescriptorGetDescriptors") {}
CV_GpuHogGetDescriptorsTest():
CvTest("GPU-HOG-getDescriptors", "HOGDescriptorGetDescriptors"), HOGDescriptor(cv::Size(64, 128)) {}
void run(int)
{
@ -228,12 +228,11 @@ struct CV_GpuHogGetDescriptorsTest: public CvTest
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
cv::gpu::GpuMat d_img(img);
cv::Size win_size(64, 128);
cv::gpu::HOGDescriptor hog(win_size);
// Convert train images into feature vectors (train table)
cv::gpu::GpuMat descriptors;
hog.getDescriptors(d_img, win_size, descriptors);
cv::gpu::GpuMat descriptors, descriptors_by_cols;
getDescriptors(d_img, win_size, descriptors, DESCR_FORMAT_ROW_BY_ROW);
getDescriptors(d_img, win_size, descriptors_by_cols, DESCR_FORMAT_COL_BY_COL);
// Check size of the result train table
wins_per_img_x = 3;
@ -245,6 +244,20 @@ struct CV_GpuHogGetDescriptorsTest: public CvTest
wins_per_img_x * wins_per_img_y);
CHECK(descriptors.size() == descr_size_expected, CvTS::FAIL_INVALID_OUTPUT);
// Check both formats of output descriptors are handled correctly
cv::Mat dr(descriptors);
cv::Mat dc(descriptors_by_cols);
for (int i = 0; i < wins_per_img_x * wins_per_img_y; ++i)
{
const float* l = dr.rowRange(i, i + 1).ptr<float>();
const float* r = dc.rowRange(i, i + 1).ptr<float>();
for (int y = 0; y < blocks_per_win_y; ++y)
for (int x = 0; x < blocks_per_win_x; ++x)
for (int k = 0; k < block_hist_size; ++k)
CHECK(l[(y * blocks_per_win_x + x) * block_hist_size + k] ==
r[(x * blocks_per_win_y + y) * block_hist_size + k], CvTS::FAIL_INVALID_OUTPUT);
}
/* Now we want to extract the same feature vectors, but from single images. NOTE: results will
be defferent, due to border values interpolation. Using of many small images is slower, however we
wont't call getDescriptors and will use computeBlockHistograms instead of. computeBlockHistograms
@ -253,39 +266,39 @@ struct CV_GpuHogGetDescriptorsTest: public CvTest
img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/positive1.png");
CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA);
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
hog.computeBlockHistograms(cv::gpu::GpuMat(img));
computeBlockHistograms(cv::gpu::GpuMat(img));
// Everything is fine with interpolation for left top subimage
CHECK(cv::norm(hog.block_hists, descriptors.rowRange(0, 1)) == 0.f, CvTS::FAIL_INVALID_OUTPUT);
CHECK(cv::norm(block_hists, descriptors.rowRange(0, 1)) == 0.f, CvTS::FAIL_INVALID_OUTPUT);
img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/positive2.png");
CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA);
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
hog.computeBlockHistograms(cv::gpu::GpuMat(img));
compare_inner_parts(hog.block_hists, descriptors.rowRange(1, 2));
computeBlockHistograms(cv::gpu::GpuMat(img));
compare_inner_parts(block_hists, descriptors.rowRange(1, 2));
img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/negative1.png");
CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA);
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
hog.computeBlockHistograms(cv::gpu::GpuMat(img));
compare_inner_parts(hog.block_hists, descriptors.rowRange(2, 3));
computeBlockHistograms(cv::gpu::GpuMat(img));
compare_inner_parts(block_hists, descriptors.rowRange(2, 3));
img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/negative2.png");
CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA);
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
hog.computeBlockHistograms(cv::gpu::GpuMat(img));
compare_inner_parts(hog.block_hists, descriptors.rowRange(3, 4));
computeBlockHistograms(cv::gpu::GpuMat(img));
compare_inner_parts(block_hists, descriptors.rowRange(3, 4));
img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/positive3.png");
CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA);
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
hog.computeBlockHistograms(cv::gpu::GpuMat(img));
compare_inner_parts(hog.block_hists, descriptors.rowRange(4, 5));
computeBlockHistograms(cv::gpu::GpuMat(img));
compare_inner_parts(block_hists, descriptors.rowRange(4, 5));
img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/negative3.png");
CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA);
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
hog.computeBlockHistograms(cv::gpu::GpuMat(img));
compare_inner_parts(hog.block_hists, descriptors.rowRange(5, 6));
computeBlockHistograms(cv::gpu::GpuMat(img));
compare_inner_parts(block_hists, descriptors.rowRange(5, 6));
}
catch (const cv::Exception& e)
{

Loading…
Cancel
Save