refactor CUDA HOG algorithm:

use abstract interface with hidden implementation
pull/3600/head
Vladislav Vinogradov 10 years ago
parent 0af7597d36
commit 8257dc3c1e
  1. 189
      modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp
  2. 10
      modules/cudaobjdetect/perf/perf_objdetect.cpp
  3. 2976
      modules/cudaobjdetect/src/hog.cpp
  4. 135
      modules/cudaobjdetect/test/test_objdetect.cpp
  5. 35
      samples/gpu/hog.cpp

@ -65,32 +65,24 @@ namespace cv { namespace cuda {
// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector
//
struct CV_EXPORTS HOGConfidence
{
double scale;
std::vector<Point> locations;
std::vector<double> confidences;
std::vector<double> part_scores[4];
};
/** @brief The class implements Histogram of Oriented Gradients (@cite Dalal2005) object detector.
Interfaces of all methods are kept similar to the CPU HOG descriptor and detector analogues as much
as possible.
@note
- An example applying the HOG descriptor for people detection can be found at
- An example applying the HOG descriptor for people detection can be found at
opencv_source_code/samples/cpp/peopledetect.cpp
- A CUDA example applying the HOG descriptor for people detection can be found at
opencv_source_code/samples/gpu/hog.cpp
- (Python) An example applying the HOG descriptor for people detection can be found at
opencv_source_code/samples/python2/peopledetect.py
*/
struct CV_EXPORTS HOGDescriptor
class CV_EXPORTS HOG : public cv::Algorithm
{
enum { DEFAULT_WIN_SIGMA = -1 };
enum { DEFAULT_NLEVELS = 64 };
enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
public:
enum
{
DESCR_FORMAT_ROW_BY_ROW,
DESCR_FORMAT_COL_BY_COL
};
/** @brief Creates the HOG descriptor and detector.
@ -99,132 +91,105 @@ struct CV_EXPORTS HOGDescriptor
@param block_stride Block stride. It must be a multiple of cell size.
@param cell_size Cell size. Only (8, 8) is supported for now.
@param nbins Number of bins. Only 9 bins per cell are supported for now.
@param win_sigma Gaussian smoothing window parameter.
@param threshold_L2hys L2-Hys normalization method shrinkage.
@param gamma_correction Flag to specify whether the gamma correction preprocessing is required or
not.
@param nlevels Maximum number of detection window increases.
*/
HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16),
Size block_stride=Size(8, 8), Size cell_size=Size(8, 8),
int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA,
double threshold_L2hys=0.2, bool gamma_correction=true,
int nlevels=DEFAULT_NLEVELS);
static Ptr<HOG> create(Size win_size = Size(64, 128),
Size block_size = Size(16, 16),
Size block_stride = Size(8, 8),
Size cell_size = Size(8, 8),
int nbins = 9);
//! Gaussian smoothing window parameter.
virtual void setWinSigma(double win_sigma) = 0;
virtual double getWinSigma() const = 0;
//! L2-Hys normalization method shrinkage.
virtual void setL2HysThreshold(double threshold_L2hys) = 0;
virtual double getL2HysThreshold() const = 0;
//! Flag to specify whether the gamma correction preprocessing is required or not.
virtual void setGammaCorrection(bool gamma_correction) = 0;
virtual bool getGammaCorrection() const = 0;
//! Maximum number of detection window increases.
virtual void setNumLevels(int nlevels) = 0;
virtual int getNumLevels() const = 0;
//! Threshold for the distance between features and SVM classifying plane.
//! Usually it is 0 and should be specfied in the detector coefficients (as the last free
//! coefficient). But if the free coefficient is omitted (which is allowed), you can specify it
//! manually here.
virtual void setHitThreshold(double hit_threshold) = 0;
virtual double getHitThreshold() const = 0;
//! Window stride. It must be a multiple of block stride.
virtual void setWinStride(Size win_stride) = 0;
virtual Size getWinStride() const = 0;
//! Coefficient of the detection window increase.
virtual void setScaleFactor(double scale0) = 0;
virtual double getScaleFactor() const = 0;
//! Coefficient to regulate the similarity threshold. When detected, some
//! objects can be covered by many rectangles. 0 means not to perform grouping.
//! See groupRectangles.
virtual void setGroupThreshold(int group_threshold) = 0;
virtual int getGroupThreshold() const = 0;
//! Descriptor storage format:
//! - **DESCR_FORMAT_ROW_BY_ROW** - Row-major order.
//! - **DESCR_FORMAT_COL_BY_COL** - Column-major order.
virtual void setDescriptorFormat(int descr_format) = 0;
virtual int getDescriptorFormat() const = 0;
/** @brief Returns the number of coefficients required for the classification.
*/
size_t getDescriptorSize() const;
virtual size_t getDescriptorSize() const = 0;
/** @brief Returns the block histogram size.
*/
size_t getBlockHistogramSize() const;
*/
virtual size_t getBlockHistogramSize() const = 0;
/** @brief Sets coefficients for the linear SVM classifier.
*/
void setSVMDetector(const std::vector<float>& detector);
*/
virtual void setSVMDetector(InputArray detector) = 0;
/** @brief Returns coefficients of the classifier trained for people detection (for default window size).
*/
static std::vector<float> getDefaultPeopleDetector();
/** @brief Returns coefficients of the classifier trained for people detection (for 48x96 windows).
*/
static std::vector<float> getPeopleDetector48x96();
/** @brief Returns coefficients of the classifier trained for people detection (for 64x128 windows).
*/
static std::vector<float> getPeopleDetector64x128();
/** @brief Returns coefficients of the classifier trained for people detection.
*/
virtual Mat getDefaultPeopleDetector() const = 0;
/** @brief Performs object detection without a multi-scale window.
@param img Source image. CV_8UC1 and CV_8UC4 types are supported for now.
@param found_locations Left-top corner points of detected objects boundaries.
@param hit_threshold Threshold for the distance between features and SVM classifying plane.
Usually it is 0 and should be specfied in the detector coefficients (as the last free
coefficient). But if the free coefficient is omitted (which is allowed), you can specify it
manually here.
@param win_stride Window stride. It must be a multiple of block stride.
@param padding Mock parameter to keep the CPU interface compatibility. It must be (0,0).
@param confidences Optional output array for confidences.
*/
void detect(const GpuMat& img, std::vector<Point>& found_locations,
double hit_threshold=0, Size win_stride=Size(),
Size padding=Size());
virtual void detect(InputArray img,
std::vector<Point>& found_locations,
std::vector<double>* confidences = NULL) = 0;
/** @brief Performs object detection with a multi-scale window.
@param img Source image. See cuda::HOGDescriptor::detect for type limitations.
@param found_locations Detected objects boundaries.
@param confidences Optional output array for confidences.
@param hit_threshold Threshold for the distance between features and SVM classifying plane. See
cuda::HOGDescriptor::detect for details.
@param win_stride Window stride. It must be a multiple of block stride.
@param padding Mock parameter to keep the CPU interface compatibility. It must be (0,0).
@param scale0 Coefficient of the detection window increase.
@param group_threshold Coefficient to regulate the similarity threshold. When detected, some
objects can be covered by many rectangles. 0 means not to perform grouping. See groupRectangles .
*/
void detectMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
double hit_threshold=0, Size win_stride=Size(),
Size padding=Size(), double scale0=1.05,
int group_threshold=2);
void computeConfidence(const GpuMat& img, std::vector<Point>& hits, double hit_threshold,
Size win_stride, Size padding, std::vector<Point>& locations, std::vector<double>& confidences);
void computeConfidenceMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
double hit_threshold, Size win_stride, Size padding,
std::vector<HOGConfidence> &conf_out, int group_threshold);
virtual void detectMultiScale(InputArray img,
std::vector<Rect>& found_locations,
std::vector<double>* confidences = NULL) = 0;
/** @brief Returns block descriptors computed for the whole image.
@param img Source image. See cuda::HOGDescriptor::detect for type limitations.
@param win_stride Window stride. It must be a multiple of block stride.
@param descriptors 2D array of descriptors.
@param descr_format Descriptor storage format:
- **DESCR_FORMAT_ROW_BY_ROW** - Row-major order.
- **DESCR_FORMAT_COL_BY_COL** - Column-major order.
The function is mainly used to learn the classifier.
@param stream CUDA stream.
*/
void getDescriptors(const GpuMat& img, Size win_stride,
GpuMat& descriptors,
int descr_format=DESCR_FORMAT_COL_BY_COL);
Size win_size;
Size block_size;
Size block_stride;
Size cell_size;
int nbins;
double win_sigma;
double threshold_L2hys;
bool gamma_correction;
int nlevels;
protected:
void computeBlockHistograms(const GpuMat& img);
void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle);
double getWinSigma() const;
bool checkDetectorSize() const;
static int numPartsWithin(int size, int part_size, int stride);
static Size numPartsWithin(Size size, Size part_size, Size stride);
// Coefficients of the separating plane
float free_coef;
GpuMat detector;
// Results of the last classification step
GpuMat labels, labels_buf;
Mat labels_host;
// Results of the last histogram evaluation step
GpuMat block_hists, block_hists_buf;
// Gradients conputation results
GpuMat grad, qangle, grad_buf, qangle_buf;
// returns subbuffer with required size, reallocates buffer if nessesary.
static GpuMat getBuffer(const Size& sz, int type, GpuMat& buf);
static GpuMat getBuffer(int rows, int cols, int type, GpuMat& buf);
std::vector<GpuMat> image_scales;
virtual void compute(InputArray img,
OutputArray descriptors,
Stream& stream = Stream::Null()) = 0;
};
//

@ -71,10 +71,10 @@ PERF_TEST_P(Image, ObjDetect_HOG,
const cv::cuda::GpuMat d_img(img);
std::vector<cv::Rect> gpu_found_locations;
cv::cuda::HOGDescriptor d_hog;
d_hog.setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector());
cv::Ptr<cv::cuda::HOG> d_hog = cv::cuda::HOG::create();
d_hog->setSVMDetector(d_hog->getDefaultPeopleDetector());
TEST_CYCLE() d_hog.detectMultiScale(d_img, gpu_found_locations);
TEST_CYCLE() d_hog->detectMultiScale(d_img, gpu_found_locations);
SANITY_CHECK(gpu_found_locations);
}
@ -82,8 +82,10 @@ PERF_TEST_P(Image, ObjDetect_HOG,
{
std::vector<cv::Rect> cpu_found_locations;
cv::Ptr<cv::cuda::HOG> d_hog = cv::cuda::HOG::create();
cv::HOGDescriptor hog;
hog.setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector());
hog.setSVMDetector(d_hog->getDefaultPeopleDetector());
TEST_CYCLE() hog.detectMultiScale(img, cpu_found_locations);

File diff suppressed because it is too large Load Diff

@ -48,9 +48,10 @@ using namespace cvtest;
//#define DUMP
struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescriptor
struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>
{
cv::cuda::DeviceInfo devInfo;
cv::Ptr<cv::cuda::HOG> hog;
#ifdef DUMP
std::ofstream f;
@ -69,23 +70,13 @@ struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescript
devInfo = GetParam();
cv::cuda::setDevice(devInfo.deviceID());
hog = cv::cuda::HOG::create();
}
#ifdef DUMP
void dump(const cv::Mat& blockHists, const std::vector<cv::Point>& locations)
void dump(const std::vector<cv::Point>& locations)
{
f.write((char*)&blockHists.rows, sizeof(blockHists.rows));
f.write((char*)&blockHists.cols, sizeof(blockHists.cols));
for (int i = 0; i < blockHists.rows; ++i)
{
for (int j = 0; j < blockHists.cols; ++j)
{
float val = blockHists.at<float>(i, j);
f.write((char*)&val, sizeof(val));
}
}
int nlocations = locations.size();
f.write((char*)&nlocations, sizeof(nlocations));
@ -93,21 +84,18 @@ struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescript
f.write((char*)&locations[i], sizeof(locations[i]));
}
#else
void compare(const cv::Mat& blockHists, const std::vector<cv::Point>& locations)
void compare(const std::vector<cv::Point>& locations)
{
// skip block_hists check
int rows, cols;
f.read((char*)&rows, sizeof(rows));
f.read((char*)&cols, sizeof(cols));
ASSERT_EQ(rows, blockHists.rows);
ASSERT_EQ(cols, blockHists.cols);
for (int i = 0; i < blockHists.rows; ++i)
for (int i = 0; i < rows; ++i)
{
for (int j = 0; j < blockHists.cols; ++j)
for (int j = 0; j < cols; ++j)
{
float val;
f.read((char*)&val, sizeof(val));
ASSERT_NEAR(val, blockHists.at<float>(i, j), 1e-3);
}
}
@ -126,54 +114,41 @@ struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescript
void testDetect(const cv::Mat& img)
{
gamma_correction = false;
setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector());
hog->setGammaCorrection(false);
hog->setSVMDetector(hog->getDefaultPeopleDetector());
std::vector<cv::Point> locations;
// Test detect
detect(loadMat(img), locations, 0);
hog->detect(loadMat(img), locations);
#ifdef DUMP
dump(cv::Mat(block_hists), locations);
dump(locations);
#else
compare(cv::Mat(block_hists), locations);
compare(locations);
#endif
// Test detect on smaller image
cv::Mat img2;
cv::resize(img, img2, cv::Size(img.cols / 2, img.rows / 2));
detect(loadMat(img2), locations, 0);
hog->detect(loadMat(img2), locations);
#ifdef DUMP
dump(cv::Mat(block_hists), locations);
dump(locations);
#else
compare(cv::Mat(block_hists), locations);
compare(locations);
#endif
// Test detect on greater image
cv::resize(img, img2, cv::Size(img.cols * 2, img.rows * 2));
detect(loadMat(img2), locations, 0);
hog->detect(loadMat(img2), locations);
#ifdef DUMP
dump(cv::Mat(block_hists), locations);
dump(locations);
#else
compare(cv::Mat(block_hists), locations);
compare(locations);
#endif
}
// Does not compare border value, as interpolation leads to delta
void compare_inner_parts(cv::Mat d1, cv::Mat d2)
{
for (int i = 1; i < blocks_per_win_y - 1; ++i)
for (int j = 1; j < blocks_per_win_x - 1; ++j)
for (int k = 0; k < block_hist_size; ++k)
{
float a = d1.at<float>(0, (i * blocks_per_win_x + j) * block_hist_size);
float b = d2.at<float>(0, (i * blocks_per_win_x + j) * block_hist_size);
ASSERT_FLOAT_EQ(a, b);
}
}
};
// desabled while resize does not fixed
@ -182,13 +157,8 @@ CUDA_TEST_P(HOG, DISABLED_Detect)
cv::Mat img_rgb = readImage("hog/road.png");
ASSERT_FALSE(img_rgb.empty());
#ifdef DUMP
f.open((std::string(cvtest::TS::ptr()->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary);
ASSERT_TRUE(f.is_open());
#else
f.open((std::string(cvtest::TS::ptr()->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary);
ASSERT_TRUE(f.is_open());
#endif
// Test on color image
cv::Mat img;
@ -198,8 +168,6 @@ CUDA_TEST_P(HOG, DISABLED_Detect)
// Test on gray image
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2GRAY);
testDetect(img);
f.close();
}
CUDA_TEST_P(HOG, GetDescriptors)
@ -216,8 +184,14 @@ CUDA_TEST_P(HOG, GetDescriptors)
// Convert train images into feature vectors (train table)
cv::cuda::GpuMat descriptors, descriptors_by_cols;
getDescriptors(d_img, win_size, descriptors, DESCR_FORMAT_ROW_BY_ROW);
getDescriptors(d_img, win_size, descriptors_by_cols, DESCR_FORMAT_COL_BY_COL);
hog->setWinStride(Size(64, 128));
hog->setDescriptorFormat(cv::cuda::HOG::DESCR_FORMAT_ROW_BY_ROW);
hog->compute(d_img, descriptors);
hog->setDescriptorFormat(cv::cuda::HOG::DESCR_FORMAT_COL_BY_COL);
hog->compute(d_img, descriptors_by_cols);
// Check size of the result train table
wins_per_img_x = 3;
@ -242,48 +216,6 @@ CUDA_TEST_P(HOG, GetDescriptors)
ASSERT_EQ(l[(y * blocks_per_win_x + x) * block_hist_size + k],
r[(x * blocks_per_win_y + y) * block_hist_size + k]);
}
/* Now we want to extract the same feature vectors, but from single images. NOTE: results will
be defferent, due to border values interpolation. Using of many small images is slower, however we
wont't call getDescriptors and will use computeBlockHistograms instead of. computeBlockHistograms
works good, it can be checked in the gpu_hog sample */
img_rgb = readImage("hog/positive1.png");
ASSERT_TRUE(!img_rgb.empty());
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
computeBlockHistograms(cv::cuda::GpuMat(img));
// Everything is fine with interpolation for left top subimage
ASSERT_EQ(0.0, cv::norm((cv::Mat)block_hists, (cv::Mat)descriptors.rowRange(0, 1)));
img_rgb = readImage("hog/positive2.png");
ASSERT_TRUE(!img_rgb.empty());
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
computeBlockHistograms(cv::cuda::GpuMat(img));
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(1, 2)));
img_rgb = readImage("hog/negative1.png");
ASSERT_TRUE(!img_rgb.empty());
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
computeBlockHistograms(cv::cuda::GpuMat(img));
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(2, 3)));
img_rgb = readImage("hog/negative2.png");
ASSERT_TRUE(!img_rgb.empty());
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
computeBlockHistograms(cv::cuda::GpuMat(img));
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(3, 4)));
img_rgb = readImage("hog/positive3.png");
ASSERT_TRUE(!img_rgb.empty());
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
computeBlockHistograms(cv::cuda::GpuMat(img));
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(4, 5)));
img_rgb = readImage("hog/negative3.png");
ASSERT_TRUE(!img_rgb.empty());
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
computeBlockHistograms(cv::cuda::GpuMat(img));
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(5, 6)));
}
INSTANTIATE_TEST_CASE_P(CUDA_ObjDetect, HOG, ALL_DEVICES);
@ -310,12 +242,12 @@ CUDA_TEST_P(CalTech, HOG)
cv::cuda::GpuMat d_img(img);
cv::Mat markedImage(img.clone());
cv::cuda::HOGDescriptor d_hog;
d_hog.setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector());
d_hog.nlevels = d_hog.nlevels + 32;
cv::Ptr<cv::cuda::HOG> d_hog = cv::cuda::HOG::create();
d_hog->setSVMDetector(d_hog->getDefaultPeopleDetector());
d_hog->setNumLevels(d_hog->getNumLevels() + 32);
std::vector<cv::Rect> found_locations;
d_hog.detectMultiScale(d_img, found_locations);
d_hog->detectMultiScale(d_img, found_locations);
#if defined (LOG_CASCADE_STATISTIC)
for (int i = 0; i < (int)found_locations.size(); i++)
@ -326,7 +258,8 @@ CUDA_TEST_P(CalTech, HOG)
cv::rectangle(markedImage, r , CV_RGB(255, 0, 0));
}
cv::imshow("Res", markedImage); cv::waitKey();
cv::imshow("Res", markedImage);
cv::waitKey();
#endif
}

@ -244,19 +244,13 @@ void App::run()
Size win_size(args.win_width, args.win_width * 2); //(64, 128) or (48, 96)
Size win_stride(args.win_stride_width, args.win_stride_height);
cv::Ptr<cv::cuda::HOG> gpu_hog = cv::cuda::HOG::create(win_size);
cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9);
// Create HOG descriptors and detectors here
vector<float> detector;
if (win_size == Size(64, 128))
detector = cv::cuda::HOGDescriptor::getPeopleDetector64x128();
else
detector = cv::cuda::HOGDescriptor::getPeopleDetector48x96();
cv::cuda::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,
cv::cuda::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,
cv::cuda::HOGDescriptor::DEFAULT_NLEVELS);
cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1,
HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS);
gpu_hog.setSVMDetector(detector);
Mat detector = gpu_hog->getDefaultPeopleDetector();
gpu_hog->setSVMDetector(detector);
cpu_hog.setSVMDetector(detector);
while (running)
@ -307,9 +301,6 @@ void App::run()
else img = img_aux;
img_to_show = img;
gpu_hog.nlevels = nlevels;
cpu_hog.nlevels = nlevels;
vector<Rect> found;
// Perform HOG classification
@ -317,11 +308,19 @@ void App::run()
if (use_gpu)
{
gpu_img.upload(img);
gpu_hog.detectMultiScale(gpu_img, found, hit_threshold, win_stride,
Size(0, 0), scale, gr_threshold);
gpu_hog->setNumLevels(nlevels);
gpu_hog->setHitThreshold(hit_threshold);
gpu_hog->setWinStride(win_stride);
gpu_hog->setScaleFactor(scale);
gpu_hog->setGroupThreshold(gr_threshold);
gpu_hog->detectMultiScale(gpu_img, found);
}
else cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride,
else
{
cpu_hog.nlevels = nlevels;
cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride,
Size(0, 0), scale, gr_threshold);
}
hogWorkEnd();
// Draw positive classified windows

Loading…
Cancel
Save