fixed extra memory allocations.

pull/13383/head
Anatoly Baksheev 14 years ago
parent 05173022bb
commit 8b8ad17f4a
  1. 12
      modules/gpu/include/opencv2/gpu/gpu.hpp
  2. 119
      modules/gpu/src/hog.cpp

@ -1353,14 +1353,20 @@ namespace cv
GpuMat detector; GpuMat detector;
// Results of the last classification step // Results of the last classification step
GpuMat labels; GpuMat labels, labels_buf;
Mat labels_host; Mat labels_host;
// Results of the last histogram evaluation step // Results of the last histogram evaluation step
GpuMat block_hists; GpuMat block_hists, block_hists_buf;
// Gradients conputation results // Gradients conputation results
GpuMat grad, qangle; GpuMat grad, qangle, grad_buf, qangle_buf;
// returns subbuffer with required size, reallocates buffer if nessesary.
static GpuMat getBuffer(const Size& sz, int type, GpuMat& buf);
static GpuMat getBuffer(int rows, int cols, int type, GpuMat& buf);
std::vector<GpuMat> image_scales;
}; };

@ -95,9 +95,8 @@ void resize_8UC4(const cv::gpu::DevMem2D& src, cv::gpu::DevMem2D dst);
}}} }}}
cv::gpu::HOGDescriptor::HOGDescriptor(Size win_size, Size block_size, Size block_stride, cv::gpu::HOGDescriptor::HOGDescriptor(Size win_size, Size block_size, Size block_stride, Size cell_size,
Size cell_size, int nbins, double win_sigma, double threshold_L2hys, int nbins, double win_sigma, double threshold_L2hys, bool gamma_correction, int nlevels)
bool gamma_correction, int nlevels)
: win_size(win_size), : win_size(win_size),
block_size(block_size), block_size(block_size),
block_stride(block_stride), block_stride(block_stride),
@ -108,55 +107,45 @@ cv::gpu::HOGDescriptor::HOGDescriptor(Size win_size, Size block_size, Size block
gamma_correction(gamma_correction), gamma_correction(gamma_correction),
nlevels(nlevels) nlevels(nlevels)
{ {
CV_Assert((win_size.width - block_size.width) % block_stride.width == 0 && CV_Assert((win_size.width - block_size.width ) % block_stride.width == 0 &&
(win_size.height - block_size.height) % block_stride.height == 0); (win_size.height - block_size.height) % block_stride.height == 0);
CV_Assert(block_size.width % cell_size.width == 0 && CV_Assert(block_size.width % cell_size.width == 0 && block_size.height % cell_size.height == 0);
block_size.height % cell_size.height == 0);
CV_Assert(block_stride == cell_size); CV_Assert(block_stride == cell_size);
CV_Assert(cell_size == Size(8, 8)); CV_Assert(cell_size == Size(8, 8));
Size cells_per_block = Size(block_size.width / cell_size.width, Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height);
block_size.height / cell_size.height);
CV_Assert(cells_per_block == Size(2, 2)); CV_Assert(cells_per_block == Size(2, 2));
cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride); cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
hog::set_up_constants(nbins, block_stride.width, block_stride.height, hog::set_up_constants(nbins, block_stride.width, block_stride.height, blocks_per_win.width, blocks_per_win.height);
blocks_per_win.width, blocks_per_win.height);
} }
size_t cv::gpu::HOGDescriptor::getDescriptorSize() const size_t cv::gpu::HOGDescriptor::getDescriptorSize() const
{ {
return numPartsWithin(win_size, block_size, block_stride).area() * return numPartsWithin(win_size, block_size, block_stride).area() * getBlockHistogramSize();
getBlockHistogramSize();
} }
size_t cv::gpu::HOGDescriptor::getBlockHistogramSize() const
size_t cv::gpu::HOGDescriptor::getBlockHistogramSize() const { {
Size cells_per_block = Size(block_size.width / cell_size.width, Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height);
block_size.height / cell_size.height);
return (size_t)(nbins * cells_per_block.area()); return (size_t)(nbins * cells_per_block.area());
} }
double cv::gpu::HOGDescriptor::getWinSigma() const double cv::gpu::HOGDescriptor::getWinSigma() const
{ {
return win_sigma >= 0 ? win_sigma : (block_size.width + block_size.height) / 8.0; return win_sigma >= 0 ? win_sigma : (block_size.width + block_size.height) / 8.0;
} }
bool cv::gpu::HOGDescriptor::checkDetectorSize() const bool cv::gpu::HOGDescriptor::checkDetectorSize() const
{ {
size_t detector_size = detector.rows * detector.cols; size_t detector_size = detector.rows * detector.cols;
size_t descriptor_size = getDescriptorSize(); size_t descriptor_size = getDescriptorSize();
return detector_size == 0 || detector_size == descriptor_size || return detector_size == 0 || detector_size == descriptor_size || detector_size == descriptor_size + 1;
detector_size == descriptor_size + 1;
} }
void cv::gpu::HOGDescriptor::setSVMDetector(const vector<float>& detector) void cv::gpu::HOGDescriptor::setSVMDetector(const vector<float>& detector)
{ {
std::vector<float> detector_reordered(detector.size()); std::vector<float> detector_reordered(detector.size());
@ -181,16 +170,36 @@ void cv::gpu::HOGDescriptor::setSVMDetector(const vector<float>& detector)
CV_Assert(checkDetectorSize()); CV_Assert(checkDetectorSize());
} }
cv::gpu::GpuMat cv::gpu::HOGDescriptor::getBuffer(const Size& sz, int type, GpuMat& buf)
{
if (buf.empty() || buf.type() != type)
buf.create(sz, type);
else
if (buf.cols < sz.width || buf.rows < sz.width)
buf.create(std::max(buf.rows, sz.height), std::max(buf.cols, sz.width), type);
return buf(Rect(Point(0,0), sz));
}
cv::gpu::GpuMat cv::gpu::HOGDescriptor::getBuffer(int rows, int cols, int type, GpuMat& buf)
{
return getBuffer(Size(cols, rows), type, buf);
}
void cv::gpu::HOGDescriptor::computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle) void cv::gpu::HOGDescriptor::computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle)
{ {
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4); CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
// grad.create(img.size(), CV_32FC2);
grad = getBuffer(img.size(), CV_32FC2, grad_buf);
grad.create(img.size(), CV_32FC2); // qangle.create(img.size(), CV_8UC2);
qangle.create(img.size(), CV_8UC2); qangle = getBuffer(img.size(), CV_8UC2, qangle_buf);
float angleScale = (float)(nbins / CV_PI); float angleScale = (float)(nbins / CV_PI);
switch (img.type()) { switch (img.type())
{
case CV_8UC1: case CV_8UC1:
hog::compute_gradients_8UC1(nbins, img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction); hog::compute_gradients_8UC1(nbins, img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction);
break; break;
@ -207,11 +216,12 @@ void cv::gpu::HOGDescriptor::computeBlockHistograms(const GpuMat& img)
size_t block_hist_size = getBlockHistogramSize(); size_t block_hist_size = getBlockHistogramSize();
Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride); Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
block_hists.create(1, block_hist_size * blocks_per_img.area(), CV_32F);
hog::compute_hists(nbins, block_stride.width, block_stride.height, // block_hists.create(1, block_hist_size * blocks_per_img.area(), CV_32F);
img.rows, img.cols, grad, qangle, (float)getWinSigma(), block_hists = getBuffer(1, block_hist_size * blocks_per_img.area(), CV_32F, block_hists_buf);
block_hists.ptr<float>());
hog::compute_hists(nbins, block_stride.width, block_stride.height, img.rows, img.cols,
grad, qangle, (float)getWinSigma(), block_hists.ptr<float>());
hog::normalize_hists(nbins, block_stride.width, block_stride.height, img.rows, img.cols, hog::normalize_hists(nbins, block_stride.width, block_stride.height, img.rows, img.cols,
block_hists.ptr<float>(), (float)threshold_L2hys); block_hists.ptr<float>(), (float)threshold_L2hys);
@ -220,14 +230,13 @@ void cv::gpu::HOGDescriptor::computeBlockHistograms(const GpuMat& img)
void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors, int descr_format) void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors, int descr_format)
{ {
CV_Assert(win_stride.width % block_stride.width == 0 && CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
win_stride.height % block_stride.height == 0);
computeBlockHistograms(img); computeBlockHistograms(img);
const int block_hist_size = getBlockHistogramSize(); const int block_hist_size = getBlockHistogramSize();
Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride); Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride); Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
descriptors.create(wins_per_img.area(), blocks_per_win.area() * block_hist_size, CV_32F); descriptors.create(wins_per_img.area(), blocks_per_win.area() * block_hist_size, CV_32F);
@ -235,13 +244,11 @@ void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride,
{ {
case DESCR_FORMAT_ROW_BY_ROW: case DESCR_FORMAT_ROW_BY_ROW:
hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width, hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width,
win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(), win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(), descriptors);
descriptors);
break; break;
case DESCR_FORMAT_COL_BY_COL: case DESCR_FORMAT_COL_BY_COL:
hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width, hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width,
win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(), win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(), descriptors);
descriptors);
break; break;
default: default:
CV_Error(CV_StsBadArg, "Unknown descriptor format"); CV_Error(CV_StsBadArg, "Unknown descriptor format");
@ -249,8 +256,7 @@ void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride,
} }
void cv::gpu::HOGDescriptor::detect(const GpuMat& img, vector<Point>& hits, double hit_threshold, void cv::gpu::HOGDescriptor::detect(const GpuMat& img, vector<Point>& hits, double hit_threshold, Size win_stride, Size padding)
Size win_stride, Size padding)
{ {
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4); CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
CV_Assert(padding == Size(0, 0)); CV_Assert(padding == Size(0, 0));
@ -264,11 +270,11 @@ void cv::gpu::HOGDescriptor::detect(const GpuMat& img, vector<Point>& hits, doub
if (win_stride == Size()) if (win_stride == Size())
win_stride = block_stride; win_stride = block_stride;
else else
CV_Assert(win_stride.width % block_stride.width == 0 && CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
win_stride.height % block_stride.height == 0);
Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride); Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
labels.create(1, wins_per_img.area(), CV_8U); // labels.create(1, wins_per_img.area(), CV_8U);
labels = getBuffer(1, wins_per_img.area(), CV_8U, labels_buf);
hog::classify_hists(win_size.height, win_size.width, block_stride.height, block_stride.width, hog::classify_hists(win_size.height, win_size.width, block_stride.height, block_stride.width,
win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(), win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(),
@ -286,11 +292,12 @@ void cv::gpu::HOGDescriptor::detect(const GpuMat& img, vector<Point>& hits, doub
} }
void cv::gpu::HOGDescriptor::detectMultiScale(const GpuMat& img, vector<Rect>& found_locations,
double hit_threshold, Size win_stride, Size padding, void cv::gpu::HOGDescriptor::detectMultiScale(const GpuMat& img, vector<Rect>& found_locations, double hit_threshold,
double scale0, int group_threshold) Size win_stride, Size padding, double scale0, int group_threshold)
{ {
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
vector<double> level_scale; vector<double> level_scale;
double scale = 1.; double scale = 1.;
@ -306,6 +313,7 @@ void cv::gpu::HOGDescriptor::detectMultiScale(const GpuMat& img, vector<Rect>& f
} }
levels = std::max(levels, 1); levels = std::max(levels, 1);
level_scale.resize(levels); level_scale.resize(levels);
image_scales.resize(levels);
std::vector<Rect> all_candidates; std::vector<Rect> all_candidates;
vector<Point> locations; vector<Point> locations;
@ -319,12 +327,14 @@ void cv::gpu::HOGDescriptor::detectMultiScale(const GpuMat& img, vector<Rect>& f
if (sz == img.size()) if (sz == img.size())
smaller_img = img; smaller_img = img;
else else
{ {
smaller_img.create(sz, img.type()); image_scales[i].create(sz, img.type());
switch (img.type()) { switch (img.type())
case CV_8UC1: hog::resize_8UC1(img, smaller_img); break; {
case CV_8UC4: hog::resize_8UC4(img, smaller_img); break; case CV_8UC1: hog::resize_8UC1(img, image_scales[i]); break;
case CV_8UC4: hog::resize_8UC4(img, image_scales[i]); break;
} }
smaller_img = image_scales[i];
} }
detect(smaller_img, locations, hit_threshold, win_stride, padding); detect(smaller_img, locations, hit_threshold, win_stride, padding);
@ -337,18 +347,14 @@ void cv::gpu::HOGDescriptor::detectMultiScale(const GpuMat& img, vector<Rect>& f
groupRectangles(found_locations, group_threshold, 0.2/*magic number copied from CPU version*/); groupRectangles(found_locations, group_threshold, 0.2/*magic number copied from CPU version*/);
} }
int cv::gpu::HOGDescriptor::numPartsWithin(int size, int part_size, int stride) int cv::gpu::HOGDescriptor::numPartsWithin(int size, int part_size, int stride)
{ {
return (size - part_size + stride) / stride; return (size - part_size + stride) / stride;
} }
cv::Size cv::gpu::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size, cv::Size stride)
cv::Size cv::gpu::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size,
cv::Size stride)
{ {
return Size(numPartsWithin(size.width, part_size.width, stride.width), return Size(numPartsWithin(size.width, part_size.width, stride.width), numPartsWithin(size.height, part_size.height, stride.height));
numPartsWithin(size.height, part_size.height, stride.height));
} }
std::vector<float> cv::gpu::HOGDescriptor::getDefaultPeopleDetector() std::vector<float> cv::gpu::HOGDescriptor::getDefaultPeopleDetector()
@ -356,7 +362,6 @@ std::vector<float> cv::gpu::HOGDescriptor::getDefaultPeopleDetector()
return getPeopleDetector64x128(); return getPeopleDetector64x128();
} }
std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector48x96() std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector48x96()
{ {
static const float detector[] = { static const float detector[] = {

Loading…
Cancel
Save