Merge pull request #2480 from vpisarev:ocl_orb

pull/2482/merge
Andrey Pavlenko 11 years ago committed by OpenCV Buildbot
commit 6b434befc9
  1. 4
      modules/features2d/perf/perf_orb.cpp
  2. 98
      modules/features2d/src/fast.cpp
  3. 162
      modules/features2d/src/opencl/fast.cl
  4. 254
      modules/features2d/src/opencl/orb.cl
  5. 779
      modules/features2d/src/orb.cpp

@ -28,7 +28,7 @@ PERF_TEST_P(orb, detect, testing::Values(ORB_IMAGES))
TEST_CYCLE() detector(frame, mask, points);
sort(points.begin(), points.end(), comparators::KeypointGreater());
SANITY_CHECK_KEYPOINTS(points);
SANITY_CHECK_KEYPOINTS(points, 1e-5);
}
PERF_TEST_P(orb, extract, testing::Values(ORB_IMAGES))
@ -72,6 +72,6 @@ PERF_TEST_P(orb, full, testing::Values(ORB_IMAGES))
TEST_CYCLE() detector(frame, mask, points, descriptors, false);
perf::sort(points, descriptors);
SANITY_CHECK_KEYPOINTS(points);
SANITY_CHECK_KEYPOINTS(points, 1e-5);
SANITY_CHECK(descriptors);
}

@ -43,6 +43,7 @@ The references are:
#include "precomp.hpp"
#include "fast_score.hpp"
#include "opencl_kernels.hpp"
#if defined _MSC_VER
# pragma warning( disable : 4127)
@ -249,8 +250,90 @@ void FAST_t(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bo
}
}
template<typename pt>
struct cmp_pt
{
bool operator ()(const pt& a, const pt& b) const { return a.y < b.y || (a.y == b.y && a.x < b.x); }
};
static bool ocl_FAST( InputArray _img, std::vector<KeyPoint>& keypoints,
int threshold, bool nonmax_suppression, int maxKeypoints )
{
UMat img = _img.getUMat();
if( img.cols < 7 || img.rows < 7 )
return false;
size_t globalsize[] = { img.cols-6, img.rows-6 };
ocl::Kernel fastKptKernel("FAST_findKeypoints", ocl::features2d::fast_oclsrc);
if (fastKptKernel.empty())
return false;
UMat kp1(1, maxKeypoints*2+1, CV_32S);
UMat ucounter1(kp1, Rect(0,0,1,1));
ucounter1.setTo(Scalar::all(0));
if( !fastKptKernel.args(ocl::KernelArg::ReadOnly(img),
ocl::KernelArg::PtrReadWrite(kp1),
maxKeypoints, threshold).run(2, globalsize, 0, true))
return false;
Mat mcounter;
ucounter1.copyTo(mcounter);
int i, counter = mcounter.at<int>(0);
counter = std::min(counter, maxKeypoints);
keypoints.clear();
if( counter == 0 )
return true;
if( !nonmax_suppression )
{
Mat m;
kp1(Rect(0, 0, counter*2+1, 1)).copyTo(m);
const Point* pt = (const Point*)(m.ptr<int>() + 1);
for( i = 0; i < counter; i++ )
keypoints.push_back(KeyPoint((float)pt[i].x, (float)pt[i].y, 7.f, -1, 1.f));
}
else
{
UMat kp2(1, maxKeypoints*3+1, CV_32S);
UMat ucounter2 = kp2(Rect(0,0,1,1));
ucounter2.setTo(Scalar::all(0));
ocl::Kernel fastNMSKernel("FAST_nonmaxSupression", ocl::features2d::fast_oclsrc);
if (fastNMSKernel.empty())
return false;
size_t globalsize_nms[] = { counter };
if( !fastNMSKernel.args(ocl::KernelArg::PtrReadOnly(kp1),
ocl::KernelArg::PtrReadWrite(kp2),
ocl::KernelArg::ReadOnly(img),
counter, counter).run(1, globalsize_nms, 0, true))
return false;
Mat m2;
kp2(Rect(0, 0, counter*3+1, 1)).copyTo(m2);
Point3i* pt2 = (Point3i*)(m2.ptr<int>() + 1);
int newcounter = std::min(m2.at<int>(0), counter);
std::sort(pt2, pt2 + newcounter, cmp_pt<Point3i>());
for( i = 0; i < newcounter; i++ )
keypoints.push_back(KeyPoint((float)pt2[i].x, (float)pt2[i].y, 7.f, -1, (float)pt2[i].z));
}
return true;
}
void FAST(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression, int type)
{
if( ocl::useOpenCL() && _img.isUMat() && type == FastFeatureDetector::TYPE_9_16 &&
ocl_FAST(_img, keypoints, threshold, nonmax_suppression, 10000))
return;
switch(type) {
case FastFeatureDetector::TYPE_5_8:
FAST_t<8>(_img, keypoints, threshold, nonmax_suppression);
@ -268,6 +351,7 @@ void FAST(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool
}
}
void FAST(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression)
{
FAST(_img, keypoints, threshold, nonmax_suppression, FastFeatureDetector::TYPE_9_16);
@ -285,10 +369,16 @@ FastFeatureDetector::FastFeatureDetector( int _threshold, bool _nonmaxSuppressio
void FastFeatureDetector::detectImpl( InputArray _image, std::vector<KeyPoint>& keypoints, InputArray _mask ) const
{
Mat image = _image.getMat(), mask = _mask.getMat(), grayImage = image;
if( image.type() != CV_8U )
cvtColor( image, grayImage, COLOR_BGR2GRAY );
FAST( grayImage, keypoints, threshold, nonmaxSuppression, type );
Mat mask = _mask.getMat(), grayImage;
UMat ugrayImage;
_InputArray gray = _image;
if( _image.type() != CV_8U )
{
_OutputArray ogray = _image.isUMat() ? _OutputArray(ugrayImage) : _OutputArray(grayImage);
cvtColor( _image, ogray, COLOR_BGR2GRAY );
gray = ogray;
}
FAST( gray, keypoints, threshold, nonmaxSuppression, type );
KeyPointsFilter::runByPixelsMask( keypoints, mask );
}

@ -0,0 +1,162 @@
// OpenCL port of the FAST corner detector.
// Copyright (C) 2014, Itseez Inc. See the license at http://opencv.org
inline int cornerScore(__global const uchar* img, int step)
{
int k, tofs, v = img[0], a0 = 0, b0;
int d[16];
#define LOAD2(idx, ofs) \
tofs = ofs; d[idx] = (short)(v - img[tofs]); d[idx+8] = (short)(v - img[-tofs])
LOAD2(0, 3);
LOAD2(1, -step+3);
LOAD2(2, -step*2+2);
LOAD2(3, -step*3+1);
LOAD2(4, -step*3);
LOAD2(5, -step*3-1);
LOAD2(6, -step*2-2);
LOAD2(7, -step-3);
#pragma unroll
for( k = 0; k < 16; k += 2 )
{
int a = min((int)d[(k+1)&15], (int)d[(k+2)&15]);
a = min(a, (int)d[(k+3)&15]);
a = min(a, (int)d[(k+4)&15]);
a = min(a, (int)d[(k+5)&15]);
a = min(a, (int)d[(k+6)&15]);
a = min(a, (int)d[(k+7)&15]);
a = min(a, (int)d[(k+8)&15]);
a0 = max(a0, min(a, (int)d[k&15]));
a0 = max(a0, min(a, (int)d[(k+9)&15]));
}
b0 = -a0;
#pragma unroll
for( k = 0; k < 16; k += 2 )
{
int b = max((int)d[(k+1)&15], (int)d[(k+2)&15]);
b = max(b, (int)d[(k+3)&15]);
b = max(b, (int)d[(k+4)&15]);
b = max(b, (int)d[(k+5)&15]);
b = max(b, (int)d[(k+6)&15]);
b = max(b, (int)d[(k+7)&15]);
b = max(b, (int)d[(k+8)&15]);
b0 = min(b0, max(b, (int)d[k]));
b0 = min(b0, max(b, (int)d[(k+9)&15]));
}
return -b0-1;
}
__kernel
void FAST_findKeypoints(
__global const uchar * _img, int step, int img_offset,
int img_rows, int img_cols,
volatile __global int* kp_loc,
int max_keypoints, int threshold )
{
int j = get_global_id(0) + 3;
int i = get_global_id(1) + 3;
if (i < img_rows - 3 && j < img_cols - 3)
{
__global const uchar* img = _img + mad24(i, step, j + img_offset);
int v = img[0], t0 = v - threshold, t1 = v + threshold;
int k, tofs, v0, v1;
int m0 = 0, m1 = 0;
#define UPDATE_MASK(idx, ofs) \
tofs = ofs; v0 = img[tofs]; v1 = img[-tofs]; \
m0 |= ((v0 < t0) << idx) | ((v1 < t0) << (8 + idx)); \
m1 |= ((v0 > t1) << idx) | ((v1 > t1) << (8 + idx))
UPDATE_MASK(0, 3);
if( (m0 | m1) == 0 )
return;
UPDATE_MASK(2, -step*2+2);
UPDATE_MASK(4, -step*3);
UPDATE_MASK(6, -step*2-2);
#define EVEN_MASK (1+4+16+64)
if( ((m0 | (m0 >> 8)) & EVEN_MASK) != EVEN_MASK &&
((m1 | (m1 >> 8)) & EVEN_MASK) != EVEN_MASK )
return;
UPDATE_MASK(1, -step+3);
UPDATE_MASK(3, -step*3+1);
UPDATE_MASK(5, -step*3-1);
UPDATE_MASK(7, -step-3);
if( ((m0 | (m0 >> 8)) & 255) != 255 &&
((m1 | (m1 >> 8)) & 255) != 255 )
return;
m0 |= m0 << 16;
m1 |= m1 << 16;
#define CHECK0(i) ((m0 & (511 << i)) == (511 << i))
#define CHECK1(i) ((m1 & (511 << i)) == (511 << i))
if( CHECK0(0) + CHECK0(1) + CHECK0(2) + CHECK0(3) +
CHECK0(4) + CHECK0(5) + CHECK0(6) + CHECK0(7) +
CHECK0(8) + CHECK0(9) + CHECK0(10) + CHECK0(11) +
CHECK0(12) + CHECK0(13) + CHECK0(14) + CHECK0(15) +
CHECK1(0) + CHECK1(1) + CHECK1(2) + CHECK1(3) +
CHECK1(4) + CHECK1(5) + CHECK1(6) + CHECK1(7) +
CHECK1(8) + CHECK1(9) + CHECK1(10) + CHECK1(11) +
CHECK1(12) + CHECK1(13) + CHECK1(14) + CHECK1(15) == 0 )
return;
{
int idx = atomic_inc(kp_loc);
if( idx < max_keypoints )
{
kp_loc[1 + 2*idx] = j;
kp_loc[2 + 2*idx] = i;
}
}
}
}
///////////////////////////////////////////////////////////////////////////
// nonmaxSupression
__kernel
void FAST_nonmaxSupression(
__global const int* kp_in, volatile __global int* kp_out,
__global const uchar * _img, int step, int img_offset,
int rows, int cols, int counter, int max_keypoints)
{
const int idx = get_global_id(0);
if (idx < counter)
{
int x = kp_in[1 + 2*idx];
int y = kp_in[2 + 2*idx];
__global const uchar* img = _img + mad24(y, step, x + img_offset);
int s = cornerScore(img, step);
if( (x < 4 || s > cornerScore(img-1, step)) +
(y < 4 || s > cornerScore(img-step, step)) != 2 )
return;
if( (x >= cols - 4 || s > cornerScore(img+1, step)) +
(y >= rows - 4 || s > cornerScore(img+step, step)) +
(x < 4 || y < 4 || s > cornerScore(img-step-1, step)) +
(x >= cols - 4 || y < 4 || s > cornerScore(img-step+1, step)) +
(x < 4 || y >= rows - 4 || s > cornerScore(img+step-1, step)) +
(x >= cols - 4 || y >= rows - 4 || s > cornerScore(img+step+1, step)) == 6)
{
int new_idx = atomic_inc(kp_out);
if( new_idx < max_keypoints )
{
kp_out[1 + 3*new_idx] = x;
kp_out[2 + 3*new_idx] = y;
kp_out[3 + 3*new_idx] = s;
}
}
}
}

@ -0,0 +1,254 @@
// OpenCL port of the ORB feature detector and descriptor extractor
// Copyright (C) 2014, Itseez Inc. See the license at http://opencv.org
//
// The original code has been contributed by Peter Andreas Entschev, peter@entschev.com
#define LAYERINFO_SIZE 1
#define LAYERINFO_OFS 0
#define KEYPOINT_SIZE 3
#define ORIENTED_KEYPOINT_SIZE 4
#define KEYPOINT_X 0
#define KEYPOINT_Y 1
#define KEYPOINT_Z 2
#define KEYPOINT_ANGLE 3
/////////////////////////////////////////////////////////////
#ifdef ORB_RESPONSES
__kernel void
ORB_HarrisResponses(__global const uchar* imgbuf, int imgstep, int imgoffset0,
__global const int* layerinfo, __global const int* keypoints,
__global float* responses, int nkeypoints )
{
int idx = get_global_id(0);
if( idx < nkeypoints )
{
__global const int* kpt = keypoints + idx*KEYPOINT_SIZE;
__global const int* layer = layerinfo + kpt[KEYPOINT_Z]*LAYERINFO_SIZE;
__global const uchar* img = imgbuf + imgoffset0 + layer[LAYERINFO_OFS] +
(kpt[KEYPOINT_Y] - blockSize/2)*imgstep + (kpt[KEYPOINT_X] - blockSize/2);
int i, j;
int a = 0, b = 0, c = 0;
for( i = 0; i < blockSize; i++, img += imgstep-blockSize )
{
for( j = 0; j < blockSize; j++, img++ )
{
int Ix = (img[1] - img[-1])*2 + img[-imgstep+1] - img[-imgstep-1] + img[imgstep+1] - img[imgstep-1];
int Iy = (img[imgstep] - img[-imgstep])*2 + img[imgstep-1] - img[-imgstep-1] + img[imgstep+1] - img[-imgstep+1];
a += Ix*Ix;
b += Iy*Iy;
c += Ix*Iy;
}
}
responses[idx] = ((float)a * b - (float)c * c - HARRIS_K * (float)(a + b) * (a + b))*scale_sq_sq;
}
}
#endif
/////////////////////////////////////////////////////////////
#ifdef ORB_ANGLES
#define _DBL_EPSILON 2.2204460492503131e-16f
#define atan2_p1 (0.9997878412794807f*57.29577951308232f)
#define atan2_p3 (-0.3258083974640975f*57.29577951308232f)
#define atan2_p5 (0.1555786518463281f*57.29577951308232f)
#define atan2_p7 (-0.04432655554792128f*57.29577951308232f)
inline float fastAtan2( float y, float x )
{
float ax = fabs(x), ay = fabs(y);
float a, c, c2;
if( ax >= ay )
{
c = ay/(ax + _DBL_EPSILON);
c2 = c*c;
a = (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
}
else
{
c = ax/(ay + _DBL_EPSILON);
c2 = c*c;
a = 90.f - (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
}
if( x < 0 )
a = 180.f - a;
if( y < 0 )
a = 360.f - a;
return a;
}
__kernel void
ORB_ICAngle(__global const uchar* imgbuf, int imgstep, int imgoffset0,
__global const int* layerinfo, __global const int* keypoints,
__global float* responses, const __global int* u_max,
int nkeypoints, int half_k )
{
int idx = get_global_id(0);
if( idx < nkeypoints )
{
__global const int* kpt = keypoints + idx*KEYPOINT_SIZE;
__global const int* layer = layerinfo + kpt[KEYPOINT_Z]*LAYERINFO_SIZE;
__global const uchar* center = imgbuf + imgoffset0 + layer[LAYERINFO_OFS] +
kpt[KEYPOINT_Y]*imgstep + kpt[KEYPOINT_X];
int u, v, m_01 = 0, m_10 = 0;
// Treat the center line differently, v=0
for( u = -half_k; u <= half_k; u++ )
m_10 += u * center[u];
// Go line by line in the circular patch
for( v = 1; v <= half_k; v++ )
{
// Proceed over the two lines
int v_sum = 0;
int d = u_max[v];
for( u = -d; u <= d; u++ )
{
int val_plus = center[u + v*imgstep], val_minus = center[u - v*imgstep];
v_sum += (val_plus - val_minus);
m_10 += u * (val_plus + val_minus);
}
m_01 += v * v_sum;
}
// we do not use OpenCL's atan2 intrinsic,
// because we want to get _exactly_ the same results as the CPU version
responses[idx] = fastAtan2((float)m_01, (float)m_10);
}
}
#endif
/////////////////////////////////////////////////////////////
#ifdef ORB_DESCRIPTORS
__kernel void
ORB_computeDescriptor(__global const uchar* imgbuf, int imgstep, int imgoffset0,
__global const int* layerinfo, __global const int* keypoints,
__global uchar* _desc, const __global int* pattern,
int nkeypoints, int dsize )
{
int idx = get_global_id(0);
if( idx < nkeypoints )
{
int i;
__global const int* kpt = keypoints + idx*ORIENTED_KEYPOINT_SIZE;
__global const int* layer = layerinfo + kpt[KEYPOINT_Z]*LAYERINFO_SIZE;
__global const uchar* center = imgbuf + imgoffset0 + layer[LAYERINFO_OFS] +
kpt[KEYPOINT_Y]*imgstep + kpt[KEYPOINT_X];
float angle = as_float(kpt[KEYPOINT_ANGLE]);
angle *= 0.01745329251994329547f;
float sina = sin(angle);
float cosa = cos(angle);
__global uchar* desc = _desc + idx*dsize;
#define GET_VALUE(idx) \
center[mad24(convert_int_rte(pattern[(idx)*2] * sina + pattern[(idx)*2+1] * cosa), imgstep, \
convert_int_rte(pattern[(idx)*2] * cosa - pattern[(idx)*2+1] * sina))]
for( i = 0; i < dsize; i++ )
{
int val;
#if WTA_K == 2
int t0, t1;
t0 = GET_VALUE(0); t1 = GET_VALUE(1);
val = t0 < t1;
t0 = GET_VALUE(2); t1 = GET_VALUE(3);
val |= (t0 < t1) << 1;
t0 = GET_VALUE(4); t1 = GET_VALUE(5);
val |= (t0 < t1) << 2;
t0 = GET_VALUE(6); t1 = GET_VALUE(7);
val |= (t0 < t1) << 3;
t0 = GET_VALUE(8); t1 = GET_VALUE(9);
val |= (t0 < t1) << 4;
t0 = GET_VALUE(10); t1 = GET_VALUE(11);
val |= (t0 < t1) << 5;
t0 = GET_VALUE(12); t1 = GET_VALUE(13);
val |= (t0 < t1) << 6;
t0 = GET_VALUE(14); t1 = GET_VALUE(15);
val |= (t0 < t1) << 7;
pattern += 16*2;
#elif WTA_K == 3
int t0, t1, t2;
t0 = GET_VALUE(0); t1 = GET_VALUE(1); t2 = GET_VALUE(2);
val = t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0);
t0 = GET_VALUE(3); t1 = GET_VALUE(4); t2 = GET_VALUE(5);
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 2;
t0 = GET_VALUE(6); t1 = GET_VALUE(7); t2 = GET_VALUE(8);
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 4;
t0 = GET_VALUE(9); t1 = GET_VALUE(10); t2 = GET_VALUE(11);
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 6;
pattern += 12*2;
#elif WTA_K == 4
int t0, t1, t2, t3, k, val;
int a, b;
t0 = GET_VALUE(0); t1 = GET_VALUE(1);
t2 = GET_VALUE(2); t3 = GET_VALUE(3);
a = 0, b = 2;
if( t1 > t0 ) t0 = t1, a = 1;
if( t3 > t2 ) t2 = t3, b = 3;
k = t0 > t2 ? a : b;
val = k;
t0 = GET_VALUE(4); t1 = GET_VALUE(5);
t2 = GET_VALUE(6); t3 = GET_VALUE(7);
a = 0, b = 2;
if( t1 > t0 ) t0 = t1, a = 1;
if( t3 > t2 ) t2 = t3, b = 3;
k = t0 > t2 ? a : b;
val |= k << 2;
t0 = GET_VALUE(8); t1 = GET_VALUE(9);
t2 = GET_VALUE(10); t3 = GET_VALUE(11);
a = 0, b = 2;
if( t1 > t0 ) t0 = t1, a = 1;
if( t3 > t2 ) t2 = t3, b = 3;
k = t0 > t2 ? a : b;
val |= k << 4;
t0 = GET_VALUE(12); t1 = GET_VALUE(13);
t2 = GET_VALUE(14); t3 = GET_VALUE(15);
a = 0, b = 2;
if( t1 > t0 ) t0 = t1, a = 1;
if( t3 > t2 ) t2 = t3, b = 3;
k = t0 > t2 ? a : b;
val |= k << 6;
pattern += 16*2;
#else
#error "unknown/undefined WTA_K value; should be 2, 3 or 4"
#endif
desc[i] = (uchar)val;
}
}
}
#endif

@ -35,6 +35,7 @@
/** Authors: Ethan Rublee, Vincent Rabaud, Gary Bradski */
#include "precomp.hpp"
#include "opencl_kernels.hpp"
#include <iterator>
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -43,14 +44,86 @@ namespace cv
{
const float HARRIS_K = 0.04f;
const int DESCRIPTOR_SIZE = 32;
template<typename _Tp> inline void copyVectorToUMat(const std::vector<_Tp>& v, OutputArray um)
{
if(v.empty())
um.release();
Mat(1, (int)(v.size()*sizeof(v[0])), CV_8U, (void*)&v[0]).copyTo(um);
}
static bool
ocl_HarrisResponses(const UMat& imgbuf,
const UMat& layerinfo,
const UMat& keypoints,
UMat& responses,
int nkeypoints, int blockSize, float harris_k)
{
size_t globalSize[] = {nkeypoints};
float scale = 1.f/((1 << 2) * blockSize * 255.f);
float scale_sq_sq = scale * scale * scale * scale;
ocl::Kernel hr_ker("ORB_HarrisResponses", ocl::features2d::orb_oclsrc,
format("-D ORB_RESPONSES -D blockSize=%d -D scale_sq_sq=%.12ef -D HARRIS_K=%.12ff", blockSize, scale_sq_sq, harris_k));
if( hr_ker.empty() )
return false;
return hr_ker.args(ocl::KernelArg::ReadOnlyNoSize(imgbuf),
ocl::KernelArg::PtrReadOnly(layerinfo),
ocl::KernelArg::PtrReadOnly(keypoints),
ocl::KernelArg::PtrWriteOnly(responses),
nkeypoints).run(1, globalSize, 0, true);
}
static bool
ocl_ICAngles(const UMat& imgbuf, const UMat& layerinfo,
const UMat& keypoints, UMat& responses,
const UMat& umax, int nkeypoints, int half_k)
{
size_t globalSize[] = {nkeypoints};
ocl::Kernel icangle_ker("ORB_ICAngle", ocl::features2d::orb_oclsrc, "-D ORB_ANGLES");
if( icangle_ker.empty() )
return false;
return icangle_ker.args(ocl::KernelArg::ReadOnlyNoSize(imgbuf),
ocl::KernelArg::PtrReadOnly(layerinfo),
ocl::KernelArg::PtrReadOnly(keypoints),
ocl::KernelArg::PtrWriteOnly(responses),
ocl::KernelArg::PtrReadOnly(umax),
nkeypoints, half_k).run(1, globalSize, 0, true);
}
static bool
ocl_computeOrbDescriptors(const UMat& imgbuf, const UMat& layerInfo,
const UMat& keypoints, UMat& desc, const UMat& pattern,
int nkeypoints, int dsize, int WTA_K)
{
size_t globalSize[] = {nkeypoints};
ocl::Kernel desc_ker("ORB_computeDescriptor", ocl::features2d::orb_oclsrc,
format("-D ORB_DESCRIPTORS -D WTA_K=%d", WTA_K));
if( desc_ker.empty() )
return false;
return desc_ker.args(ocl::KernelArg::ReadOnlyNoSize(imgbuf),
ocl::KernelArg::PtrReadOnly(layerInfo),
ocl::KernelArg::PtrReadOnly(keypoints),
ocl::KernelArg::PtrWriteOnly(desc),
ocl::KernelArg::PtrReadOnly(pattern),
nkeypoints, dsize).run(1, globalSize, 0, true);
}
/**
* Function that computes the Harris responses in a
* blockSize x blockSize patch at given points in an image
* blockSize x blockSize patch at given points in the image
*/
static void
HarrisResponses(const Mat& img, std::vector<KeyPoint>& pts, int blockSize, float harris_k)
HarrisResponses(const Mat& img, const std::vector<Rect>& layerinfo,
std::vector<KeyPoint>& pts, int blockSize, float harris_k)
{
CV_Assert( img.type() == CV_8UC1 && blockSize*blockSize <= 2048 );
@ -60,8 +133,7 @@ HarrisResponses(const Mat& img, std::vector<KeyPoint>& pts, int blockSize, float
int step = (int)(img.step/img.elemSize1());
int r = blockSize/2;
float scale = (1 << 2) * blockSize * 255.0f;
scale = 1.0f / scale;
float scale = 1.f/((1 << 2) * blockSize * 255.f);
float scale_sq_sq = scale * scale * scale * scale;
AutoBuffer<int> ofsbuf(blockSize*blockSize);
@ -72,10 +144,11 @@ HarrisResponses(const Mat& img, std::vector<KeyPoint>& pts, int blockSize, float
for( ptidx = 0; ptidx < ptsize; ptidx++ )
{
int x0 = cvRound(pts[ptidx].pt.x - r);
int y0 = cvRound(pts[ptidx].pt.y - r);
int x0 = cvRound(pts[ptidx].pt.x);
int y0 = cvRound(pts[ptidx].pt.y);
int z = pts[ptidx].octave;
const uchar* ptr0 = ptr00 + y0*step + x0;
const uchar* ptr0 = ptr00 + (y0 - r + layerinfo[z].y)*step + x0 - r + layerinfo[z].x;
int a = 0, b = 0, c = 0;
for( int k = 0; k < blockSize*blockSize; k++ )
@ -94,158 +167,175 @@ HarrisResponses(const Mat& img, std::vector<KeyPoint>& pts, int blockSize, float
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
static float IC_Angle(const Mat& image, const int half_k, Point2f pt,
const std::vector<int> & u_max)
static void ICAngles(const Mat& img, const std::vector<Rect>& layerinfo,
std::vector<KeyPoint>& pts, const std::vector<int> & u_max, int half_k)
{
int m_01 = 0, m_10 = 0;
int step = (int)img.step1();
size_t ptidx, ptsize = pts.size();
const uchar* center = &image.at<uchar> (cvRound(pt.y), cvRound(pt.x));
for( ptidx = 0; ptidx < ptsize; ptidx++ )
{
const Rect& layer = layerinfo[pts[ptidx].octave];
const uchar* center = &img.at<uchar>(cvRound(pts[ptidx].pt.y) + layer.y, cvRound(pts[ptidx].pt.x) + layer.x);
// Treat the center line differently, v=0
for (int u = -half_k; u <= half_k; ++u)
m_10 += u * center[u];
int m_01 = 0, m_10 = 0;
// Go line by line in the circular patch
int step = (int)image.step1();
for (int v = 1; v <= half_k; ++v)
{
// Proceed over the two lines
int v_sum = 0;
int d = u_max[v];
for (int u = -d; u <= d; ++u)
// Treat the center line differently, v=0
for (int u = -half_k; u <= half_k; ++u)
m_10 += u * center[u];
// Go line by line in the circular patch
for (int v = 1; v <= half_k; ++v)
{
int val_plus = center[u + v*step], val_minus = center[u - v*step];
v_sum += (val_plus - val_minus);
m_10 += u * (val_plus + val_minus);
// Proceed over the two lines
int v_sum = 0;
int d = u_max[v];
for (int u = -d; u <= d; ++u)
{
int val_plus = center[u + v*step], val_minus = center[u - v*step];
v_sum += (val_plus - val_minus);
m_10 += u * (val_plus + val_minus);
}
m_01 += v * v_sum;
}
m_01 += v * v_sum;
}
return fastAtan2((float)m_01, (float)m_10);
pts[ptidx].angle = fastAtan2((float)m_01, (float)m_10);
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
static void computeOrbDescriptor(const KeyPoint& kpt,
const Mat& img, const Point* pattern,
uchar* desc, int dsize, int WTA_K)
static void
computeOrbDescriptors( const Mat& imagePyramid, const std::vector<Rect>& layerInfo,
const std::vector<float>& layerScale, std::vector<KeyPoint>& keypoints,
Mat& descriptors, const std::vector<Point>& _pattern, int dsize, int WTA_K )
{
float angle = kpt.angle;
//angle = cvFloor(angle/12)*12.f;
angle *= (float)(CV_PI/180.f);
float a = (float)cos(angle), b = (float)sin(angle);
const uchar* center = &img.at<uchar>(cvRound(kpt.pt.y), cvRound(kpt.pt.x));
int step = (int)img.step;
float x, y;
int ix, iy;
#if 1
#define GET_VALUE(idx) \
(x = pattern[idx].x*a - pattern[idx].y*b, \
y = pattern[idx].x*b + pattern[idx].y*a, \
ix = cvRound(x), \
iy = cvRound(y), \
*(center + iy*step + ix) )
#else
#define GET_VALUE(idx) \
(x = pattern[idx].x*a - pattern[idx].y*b, \
y = pattern[idx].x*b + pattern[idx].y*a, \
ix = cvFloor(x), iy = cvFloor(y), \
x -= ix, y -= iy, \
cvRound(center[iy*step + ix]*(1-x)*(1-y) + center[(iy+1)*step + ix]*(1-x)*y + \
center[iy*step + ix+1]*x*(1-y) + center[(iy+1)*step + ix+1]*x*y))
#endif
if( WTA_K == 2 )
int step = (int)imagePyramid.step;
int j, i, nkeypoints = (int)keypoints.size();
for( j = 0; j < nkeypoints; j++ )
{
for (int i = 0; i < dsize; ++i, pattern += 16)
const KeyPoint& kpt = keypoints[j];
const Rect& layer = layerInfo[kpt.octave];
float scale = 1.f/layerScale[kpt.octave];
float angle = kpt.angle;
angle *= (float)(CV_PI/180.f);
float a = (float)cos(angle), b = (float)sin(angle);
const uchar* center = &imagePyramid.at<uchar>(cvRound(kpt.pt.y*scale) + layer.y,
cvRound(kpt.pt.x*scale) + layer.x);
float x, y;
int ix, iy;
const Point* pattern = &_pattern[0];
uchar* desc = descriptors.ptr<uchar>(j);
#if 1
#define GET_VALUE(idx) \
(x = pattern[idx].x*a - pattern[idx].y*b, \
y = pattern[idx].x*b + pattern[idx].y*a, \
ix = cvRound(x), \
iy = cvRound(y), \
*(center + iy*step + ix) )
#else
#define GET_VALUE(idx) \
(x = pattern[idx].x*a - pattern[idx].y*b, \
y = pattern[idx].x*b + pattern[idx].y*a, \
ix = cvFloor(x), iy = cvFloor(y), \
x -= ix, y -= iy, \
cvRound(center[iy*step + ix]*(1-x)*(1-y) + center[(iy+1)*step + ix]*(1-x)*y + \
center[iy*step + ix+1]*x*(1-y) + center[(iy+1)*step + ix+1]*x*y))
#endif
if( WTA_K == 2 )
{
int t0, t1, val;
t0 = GET_VALUE(0); t1 = GET_VALUE(1);
val = t0 < t1;
t0 = GET_VALUE(2); t1 = GET_VALUE(3);
val |= (t0 < t1) << 1;
t0 = GET_VALUE(4); t1 = GET_VALUE(5);
val |= (t0 < t1) << 2;
t0 = GET_VALUE(6); t1 = GET_VALUE(7);
val |= (t0 < t1) << 3;
t0 = GET_VALUE(8); t1 = GET_VALUE(9);
val |= (t0 < t1) << 4;
t0 = GET_VALUE(10); t1 = GET_VALUE(11);
val |= (t0 < t1) << 5;
t0 = GET_VALUE(12); t1 = GET_VALUE(13);
val |= (t0 < t1) << 6;
t0 = GET_VALUE(14); t1 = GET_VALUE(15);
val |= (t0 < t1) << 7;
desc[i] = (uchar)val;
for (i = 0; i < dsize; ++i, pattern += 16)
{
int t0, t1, val;
t0 = GET_VALUE(0); t1 = GET_VALUE(1);
val = t0 < t1;
t0 = GET_VALUE(2); t1 = GET_VALUE(3);
val |= (t0 < t1) << 1;
t0 = GET_VALUE(4); t1 = GET_VALUE(5);
val |= (t0 < t1) << 2;
t0 = GET_VALUE(6); t1 = GET_VALUE(7);
val |= (t0 < t1) << 3;
t0 = GET_VALUE(8); t1 = GET_VALUE(9);
val |= (t0 < t1) << 4;
t0 = GET_VALUE(10); t1 = GET_VALUE(11);
val |= (t0 < t1) << 5;
t0 = GET_VALUE(12); t1 = GET_VALUE(13);
val |= (t0 < t1) << 6;
t0 = GET_VALUE(14); t1 = GET_VALUE(15);
val |= (t0 < t1) << 7;
desc[i] = (uchar)val;
}
}
}
else if( WTA_K == 3 )
{
for (int i = 0; i < dsize; ++i, pattern += 12)
else if( WTA_K == 3 )
{
int t0, t1, t2, val;
t0 = GET_VALUE(0); t1 = GET_VALUE(1); t2 = GET_VALUE(2);
val = t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0);
for (i = 0; i < dsize; ++i, pattern += 12)
{
int t0, t1, t2, val;
t0 = GET_VALUE(0); t1 = GET_VALUE(1); t2 = GET_VALUE(2);
val = t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0);
t0 = GET_VALUE(3); t1 = GET_VALUE(4); t2 = GET_VALUE(5);
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 2;
t0 = GET_VALUE(3); t1 = GET_VALUE(4); t2 = GET_VALUE(5);
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 2;
t0 = GET_VALUE(6); t1 = GET_VALUE(7); t2 = GET_VALUE(8);
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 4;
t0 = GET_VALUE(6); t1 = GET_VALUE(7); t2 = GET_VALUE(8);
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 4;
t0 = GET_VALUE(9); t1 = GET_VALUE(10); t2 = GET_VALUE(11);
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 6;
t0 = GET_VALUE(9); t1 = GET_VALUE(10); t2 = GET_VALUE(11);
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 6;
desc[i] = (uchar)val;
desc[i] = (uchar)val;
}
}
}
else if( WTA_K == 4 )
{
for (int i = 0; i < dsize; ++i, pattern += 16)
else if( WTA_K == 4 )
{
int t0, t1, t2, t3, u, v, k, val;
t0 = GET_VALUE(0); t1 = GET_VALUE(1);
t2 = GET_VALUE(2); t3 = GET_VALUE(3);
u = 0, v = 2;
if( t1 > t0 ) t0 = t1, u = 1;
if( t3 > t2 ) t2 = t3, v = 3;
k = t0 > t2 ? u : v;
val = k;
t0 = GET_VALUE(4); t1 = GET_VALUE(5);
t2 = GET_VALUE(6); t3 = GET_VALUE(7);
u = 0, v = 2;
if( t1 > t0 ) t0 = t1, u = 1;
if( t3 > t2 ) t2 = t3, v = 3;
k = t0 > t2 ? u : v;
val |= k << 2;
t0 = GET_VALUE(8); t1 = GET_VALUE(9);
t2 = GET_VALUE(10); t3 = GET_VALUE(11);
u = 0, v = 2;
if( t1 > t0 ) t0 = t1, u = 1;
if( t3 > t2 ) t2 = t3, v = 3;
k = t0 > t2 ? u : v;
val |= k << 4;
t0 = GET_VALUE(12); t1 = GET_VALUE(13);
t2 = GET_VALUE(14); t3 = GET_VALUE(15);
u = 0, v = 2;
if( t1 > t0 ) t0 = t1, u = 1;
if( t3 > t2 ) t2 = t3, v = 3;
k = t0 > t2 ? u : v;
val |= k << 6;
desc[i] = (uchar)val;
for (i = 0; i < dsize; ++i, pattern += 16)
{
int t0, t1, t2, t3, u, v, k, val;
t0 = GET_VALUE(0); t1 = GET_VALUE(1);
t2 = GET_VALUE(2); t3 = GET_VALUE(3);
u = 0, v = 2;
if( t1 > t0 ) t0 = t1, u = 1;
if( t3 > t2 ) t2 = t3, v = 3;
k = t0 > t2 ? u : v;
val = k;
t0 = GET_VALUE(4); t1 = GET_VALUE(5);
t2 = GET_VALUE(6); t3 = GET_VALUE(7);
u = 0, v = 2;
if( t1 > t0 ) t0 = t1, u = 1;
if( t3 > t2 ) t2 = t3, v = 3;
k = t0 > t2 ? u : v;
val |= k << 2;
t0 = GET_VALUE(8); t1 = GET_VALUE(9);
t2 = GET_VALUE(10); t3 = GET_VALUE(11);
u = 0, v = 2;
if( t1 > t0 ) t0 = t1, u = 1;
if( t3 > t2 ) t2 = t3, v = 3;
k = t0 > t2 ? u : v;
val |= k << 4;
t0 = GET_VALUE(12); t1 = GET_VALUE(13);
t2 = GET_VALUE(14); t3 = GET_VALUE(15);
u = 0, v = 2;
if( t1 > t0 ) t0 = t1, u = 1;
if( t3 > t2 ) t2 = t3, v = 3;
k = t0 > t2 ? u : v;
val |= k << 6;
desc[i] = (uchar)val;
}
}
else
CV_Error( Error::StsBadSize, "Wrong WTA_K. It can be only 2, 3 or 4." );
#undef GET_VALUE
}
else
CV_Error( Error::StsBadSize, "Wrong WTA_K. It can be only 2, 3 or 4." );
#undef GET_VALUE
}
@ -591,21 +681,37 @@ void ORB::operator()(InputArray image, InputArray mask, std::vector<KeyPoint>& k
}
/** Compute the ORB keypoint orientations
* @param image the image to compute the features and descriptors on
* @param integral_image the integral image of the iamge (can be empty, but the computation will be slower)
* @param scale the scale at which we compute the orientation
* @param keypoints the resulting keypoints
*/
static void computeOrientation(const Mat& image, std::vector<KeyPoint>& keypoints,
int halfPatchSize, const std::vector<int>& umax)
static void uploadORBKeypoints(const std::vector<KeyPoint>& src, std::vector<Vec3i>& buf, OutputArray dst)
{
size_t i, n = src.size();
buf.resize(std::max(buf.size(), n));
for( i = 0; i < n; i++ )
buf[i] = Vec3i(cvRound(src[i].pt.x), cvRound(src[i].pt.y), src[i].octave);
copyVectorToUMat(buf, dst);
}
typedef union if32_t
{
// Process each keypoint
for (std::vector<KeyPoint>::iterator keypoint = keypoints.begin(),
keypointEnd = keypoints.end(); keypoint != keypointEnd; ++keypoint)
int i;
float f;
}
if32_t;
static void uploadORBKeypoints(const std::vector<KeyPoint>& src,
const std::vector<float>& layerScale,
std::vector<Vec4i>& buf, OutputArray dst)
{
size_t i, n = src.size();
buf.resize(std::max(buf.size(), n));
for( i = 0; i < n; i++ )
{
keypoint->angle = IC_Angle(image, halfPatchSize, keypoint->pt, umax);
int z = src[i].octave;
float scale = 1.f/layerScale[z];
if32_t angle;
angle.f = src[i].angle;
buf[i] = Vec4i(cvRound(src[i].pt.x*scale), cvRound(src[i].pt.y*scale), z, angle.i);
}
copyVectorToUMat(buf, dst);
}
@ -614,13 +720,18 @@ static void computeOrientation(const Mat& image, std::vector<KeyPoint>& keypoint
* @param mask_pyramid the masks to apply at every level
* @param keypoints the resulting keypoints, clustered per level
*/
static void computeKeyPoints(const std::vector<Mat>& imagePyramid,
const std::vector<Mat>& maskPyramid,
std::vector<std::vector<KeyPoint> >& allKeypoints,
int nfeatures, int firstLevel, double scaleFactor,
int edgeThreshold, int patchSize, int scoreType )
static void computeKeyPoints(const Mat& imagePyramid,
const UMat& uimagePyramid,
const Mat& maskPyramid,
const std::vector<Rect>& layerInfo,
const UMat& ulayerInfo,
const std::vector<float>& layerScale,
std::vector<KeyPoint>& allKeypoints,
int nfeatures, double scaleFactor,
int edgeThreshold, int patchSize, int scoreType,
bool useOCL )
{
int nlevels = (int)imagePyramid.size();
int i, nkeypoints, level, nlevels = (int)layerInfo.size();
std::vector<int> nfeaturesPerLevel(nlevels);
// fill the extractors and descriptors for the corresponding scales
@ -628,7 +739,7 @@ static void computeKeyPoints(const std::vector<Mat>& imagePyramid,
float ndesiredFeaturesPerScale = nfeatures*(1 - factor)/(1 - (float)std::pow((double)factor, (double)nlevels));
int sumFeatures = 0;
for( int level = 0; level < nlevels-1; level++ )
for( level = 0; level < nlevels-1; level++ )
{
nfeaturesPerLevel[level] = cvRound(ndesiredFeaturesPerScale);
sumFeatures += nfeaturesPerLevel[level];
@ -657,66 +768,116 @@ static void computeKeyPoints(const std::vector<Mat>& imagePyramid,
++v0;
}
allKeypoints.resize(nlevels);
allKeypoints.clear();
std::vector<KeyPoint> keypoints;
std::vector<int> counters(nlevels);
keypoints.reserve(nfeaturesPerLevel[0]*2);
for (int level = 0; level < nlevels; ++level)
for( level = 0; level < nlevels; level++ )
{
int featuresNum = nfeaturesPerLevel[level];
allKeypoints[level].reserve(featuresNum*2);
std::vector<KeyPoint> & keypoints = allKeypoints[level];
Mat img = imagePyramid(layerInfo[level]);
Mat mask = maskPyramid.empty() ? Mat() : maskPyramid(layerInfo[level]);
// Detect FAST features, 20 is a good threshold
FastFeatureDetector fd(20, true);
fd.detect(imagePyramid[level], keypoints, maskPyramid[level]);
fd.detect(img, keypoints, mask);
// Remove keypoints very close to the border
KeyPointsFilter::runByImageBorder(keypoints, imagePyramid[level].size(), edgeThreshold);
KeyPointsFilter::runByImageBorder(keypoints, img.size(), edgeThreshold);
if( scoreType == ORB::HARRIS_SCORE )
{
// Keep more points than necessary as FAST does not give amazing corners
KeyPointsFilter::retainBest(keypoints, 2 * featuresNum);
// Keep more points than necessary as FAST does not give amazing corners
KeyPointsFilter::retainBest(keypoints, scoreType == ORB::HARRIS_SCORE ? 2 * featuresNum : featuresNum);
nkeypoints = (int)keypoints.size();
counters[level] = nkeypoints;
// Compute the Harris cornerness (better scoring than FAST)
HarrisResponses(imagePyramid[level], keypoints, 7, HARRIS_K);
float sf = layerScale[level];
for( i = 0; i < nkeypoints; i++ )
{
keypoints[i].octave = level;
keypoints[i].size = patchSize*sf;
}
//cull to the final desired level, using the new Harris scores or the original FAST scores.
KeyPointsFilter::retainBest(keypoints, featuresNum);
std::copy(keypoints.begin(), keypoints.end(), std::back_inserter(allKeypoints));
}
std::vector<Vec3i> ukeypoints_buf;
float sf = getScale(level, firstLevel, scaleFactor);
nkeypoints = (int)allKeypoints.size();
Mat responses;
UMat ukeypoints, uresponses(1, nkeypoints, CV_32F);
// Set the level of the coordinates
for (std::vector<KeyPoint>::iterator keypoint = keypoints.begin(),
keypointEnd = keypoints.end(); keypoint != keypointEnd; ++keypoint)
// Select best features using the Harris cornerness (better scoring than FAST)
if( scoreType == ORB::HARRIS_SCORE )
{
if( useOCL )
{
keypoint->octave = level;
keypoint->size = patchSize*sf;
uploadORBKeypoints(allKeypoints, ukeypoints_buf, ukeypoints);
useOCL = ocl_HarrisResponses( uimagePyramid, ulayerInfo, ukeypoints,
uresponses, nkeypoints, 7, HARRIS_K );
if( useOCL )
{
uresponses.copyTo(responses);
for( i = 0; i < nkeypoints; i++ )
allKeypoints[i].response = responses.at<float>(i);
}
}
computeOrientation(imagePyramid[level], keypoints, halfPatchSize, umax);
if( !useOCL )
HarrisResponses(imagePyramid, layerInfo, allKeypoints, 7, HARRIS_K);
std::vector<KeyPoint> newAllKeypoints;
newAllKeypoints.reserve(nfeaturesPerLevel[0]*nlevels);
int offset = 0;
for( level = 0; level < nlevels; level++ )
{
int featuresNum = nfeaturesPerLevel[level];
nkeypoints = counters[level];
keypoints.resize(nkeypoints);
std::copy(allKeypoints.begin() + offset,
allKeypoints.begin() + offset + nkeypoints,
keypoints.begin());
offset += nkeypoints;
//cull to the final desired level, using the new Harris scores.
KeyPointsFilter::retainBest(keypoints, featuresNum);
std::copy(keypoints.begin(), keypoints.end(), std::back_inserter(newAllKeypoints));
}
std::swap(allKeypoints, newAllKeypoints);
}
}
nkeypoints = (int)allKeypoints.size();
if( useOCL )
{
UMat uumax;
if( useOCL )
copyVectorToUMat(umax, uumax);
/** Compute the ORB decriptors
* @param image the image to compute the features and descriptors on
* @param integral_image the integral image of the image (can be empty, but the computation will be slower)
* @param level the scale at which we compute the orientation
* @param keypoints the keypoints to use
* @param descriptors the resulting descriptors
*/
static void computeDescriptors(const Mat& image, std::vector<KeyPoint>& keypoints, Mat& descriptors,
const std::vector<Point>& pattern, int dsize, int WTA_K)
{
//convert to grayscale if more than one color
CV_Assert(image.type() == CV_8UC1);
//create the descriptor mat, keypoints.size() rows, BYTES cols
descriptors = Mat::zeros((int)keypoints.size(), dsize, CV_8UC1);
uploadORBKeypoints(allKeypoints, ukeypoints_buf, ukeypoints);
useOCL = ocl_ICAngles(uimagePyramid, ulayerInfo, ukeypoints, uresponses, uumax,
nkeypoints, halfPatchSize);
for (size_t i = 0; i < keypoints.size(); i++)
computeOrbDescriptor(keypoints[i], image, &pattern[0], descriptors.ptr((int)i), dsize, WTA_K);
if( useOCL )
{
uresponses.copyTo(responses);
for( i = 0; i < nkeypoints; i++ )
allKeypoints[i].angle = responses.at<float>(i);
}
}
if( !useOCL )
{
ICAngles(imagePyramid, layerInfo, allKeypoints, umax, halfPatchSize);
}
for( i = 0; i < nkeypoints; i++ )
{
float scale = layerScale[allKeypoints[i].octave];
allKeypoints[i].pt *= scale;
}
}
@ -728,8 +889,8 @@ static void computeDescriptors(const Mat& image, std::vector<KeyPoint>& keypoint
* @param do_keypoints if true, the keypoints are computed, otherwise used as an input
* @param do_descriptors if true, also computes the descriptors
*/
void ORB::operator()( InputArray _image, InputArray _mask, std::vector<KeyPoint>& _keypoints,
OutputArray _descriptors, bool useProvidedKeypoints) const
void ORB::operator()( InputArray _image, InputArray _mask, std::vector<KeyPoint>& keypoints,
OutputArray _descriptors, bool useProvidedKeypoints ) const
{
CV_Assert(patchSize >= 2);
@ -744,11 +905,14 @@ void ORB::operator()( InputArray _image, InputArray _mask, std::vector<KeyPoint>
int halfPatchSize = patchSize / 2;
int border = std::max(edgeThreshold, std::max(halfPatchSize, HARRIS_BLOCK_SIZE/2))+1;
bool useOCL = ocl::useOpenCL();
Mat image = _image.getMat(), mask = _mask.getMat();
if( image.type() != CV_8UC1 )
cvtColor(_image, image, COLOR_BGR2GRAY);
int levelsNum = this->nlevels;
int i, level, nLevels = this->nlevels, nkeypoints = (int)keypoints.size();
bool sortedByLevel = true;
if( !do_keypoints )
{
@ -761,129 +925,145 @@ void ORB::operator()( InputArray _image, InputArray _mask, std::vector<KeyPoint>
//
// In short, ultimately the descriptor should
// ignore octave parameter and deal only with the keypoint size.
levelsNum = 0;
for( size_t i = 0; i < _keypoints.size(); i++ )
levelsNum = std::max(levelsNum, std::max(_keypoints[i].octave, 0));
levelsNum++;
nLevels = 0;
for( i = 0; i < nkeypoints; i++ )
{
level = keypoints[i].octave;
CV_Assert(level >= 0);
if( i > 0 && level < keypoints[i-1].octave )
sortedByLevel = false;
nLevels = std::max(nLevels, level);
}
nLevels++;
}
std::vector<Rect> layerInfo(nLevels);
std::vector<int> layerOfs(nLevels);
std::vector<float> layerScale(nLevels);
Mat imagePyramid, maskPyramid;
UMat uimagePyramid, ulayerInfo;
int level_dy = image.rows + border*2;
Point level_ofs(0,0);
Size bufSize((image.cols + border*2 + 15) & -16, 0);
for( level = 0; level < nLevels; level++ )
{
float scale = getScale(level, firstLevel, scaleFactor);
layerScale[level] = scale;
Size sz(cvRound(image.cols/scale), cvRound(image.rows/scale));
Size wholeSize(sz.width + border*2, sz.height + border*2);
if( level_ofs.x + wholeSize.width > bufSize.width )
{
level_ofs = Point(0, level_ofs.y + level_dy);
level_dy = wholeSize.height;
}
Rect linfo(level_ofs.x + border, level_ofs.y + border, sz.width, sz.height);
layerInfo[level] = linfo;
layerOfs[level] = linfo.y*bufSize.width + linfo.x;
level_ofs.x += wholeSize.width;
}
bufSize.height = level_ofs.y + level_dy;
imagePyramid.create(bufSize, CV_8U);
if( !mask.empty() )
maskPyramid.create(bufSize, CV_8U);
Mat prevImg = image, prevMask = mask;
// Pre-compute the scale pyramids
std::vector<Mat> imagePyramid(levelsNum), maskPyramid(levelsNum);
for (int level = 0; level < levelsNum; ++level)
for (level = 0; level < nLevels; ++level)
{
float scale = 1/getScale(level, firstLevel, scaleFactor);
Size sz(cvRound(image.cols*scale), cvRound(image.rows*scale));
Rect linfo = layerInfo[level];
Size sz(linfo.width, linfo.height);
Size wholeSize(sz.width + border*2, sz.height + border*2);
Mat temp(wholeSize, image.type()), masktemp;
imagePyramid[level] = temp(Rect(border, border, sz.width, sz.height));
Rect wholeLinfo = Rect(linfo.x - border, linfo.y - border, wholeSize.width, wholeSize.height);
Mat extImg = imagePyramid(wholeLinfo), extMask;
Mat currImg = extImg(Rect(border, border, sz.width, sz.height)), currMask;
if( !mask.empty() )
{
masktemp = Mat(wholeSize, mask.type());
maskPyramid[level] = masktemp(Rect(border, border, sz.width, sz.height));
extMask = maskPyramid(wholeLinfo);
currMask = extMask(Rect(border, border, sz.width, sz.height));
}
// Compute the resized image
if( level != firstLevel )
{
if( level < firstLevel )
resize(prevImg, currImg, sz, 0, 0, INTER_LINEAR);
if( !mask.empty() )
{
resize(image, imagePyramid[level], sz, 0, 0, INTER_LINEAR);
if (!mask.empty())
resize(mask, maskPyramid[level], sz, 0, 0, INTER_LINEAR);
}
else
{
resize(imagePyramid[level-1], imagePyramid[level], sz, 0, 0, INTER_LINEAR);
if (!mask.empty())
{
resize(maskPyramid[level-1], maskPyramid[level], sz, 0, 0, INTER_LINEAR);
threshold(maskPyramid[level], maskPyramid[level], 254, 0, THRESH_TOZERO);
}
resize(prevMask, currMask, sz, 0, 0, INTER_LINEAR);
if( level > firstLevel )
threshold(currMask, currMask, 254, 0, THRESH_TOZERO);
}
copyMakeBorder(imagePyramid[level], temp, border, border, border, border,
copyMakeBorder(currImg, extImg, border, border, border, border,
BORDER_REFLECT_101+BORDER_ISOLATED);
if (!mask.empty())
copyMakeBorder(maskPyramid[level], masktemp, border, border, border, border,
copyMakeBorder(currMask, extMask, border, border, border, border,
BORDER_CONSTANT+BORDER_ISOLATED);
}
else
{
copyMakeBorder(image, temp, border, border, border, border,
copyMakeBorder(image, extImg, border, border, border, border,
BORDER_REFLECT_101);
if( !mask.empty() )
copyMakeBorder(mask, masktemp, border, border, border, border,
copyMakeBorder(mask, extMask, border, border, border, border,
BORDER_CONSTANT+BORDER_ISOLATED);
}
prevImg = currImg;
prevMask = currMask;
}
// Pre-compute the keypoints (we keep the best over all scales, so this has to be done beforehand
std::vector < std::vector<KeyPoint> > allKeypoints;
if( useOCL )
copyVectorToUMat(layerOfs, ulayerInfo);
if( do_keypoints )
{
// Get keypoints, those will be far enough from the border that no check will be required for the descriptor
computeKeyPoints(imagePyramid, maskPyramid, allKeypoints,
nfeatures, firstLevel, scaleFactor,
edgeThreshold, patchSize, scoreType);
// make sure we have the right number of keypoints keypoints
/*std::vector<KeyPoint> temp;
for (int level = 0; level < n_levels; ++level)
{
std::vector<KeyPoint>& keypoints = all_keypoints[level];
temp.insert(temp.end(), keypoints.begin(), keypoints.end());
keypoints.clear();
}
if( useOCL )
imagePyramid.copyTo(uimagePyramid);
KeyPoint::retainBest(temp, n_features_);
for (std::vector<KeyPoint>::iterator keypoint = temp.begin(),
keypoint_end = temp.end(); keypoint != keypoint_end; ++keypoint)
all_keypoints[keypoint->octave].push_back(*keypoint);*/
// Get keypoints, those will be far enough from the border that no check will be required for the descriptor
computeKeyPoints(imagePyramid, uimagePyramid, maskPyramid,
layerInfo, ulayerInfo, layerScale, keypoints,
nfeatures, scaleFactor, edgeThreshold, patchSize, scoreType, useOCL);
}
else
{
// Remove keypoints very close to the border
KeyPointsFilter::runByImageBorder(_keypoints, image.size(), edgeThreshold);
// Cluster the input keypoints depending on the level they were computed at
allKeypoints.resize(levelsNum);
for (std::vector<KeyPoint>::iterator keypoint = _keypoints.begin(),
keypointEnd = _keypoints.end(); keypoint != keypointEnd; ++keypoint)
allKeypoints[keypoint->octave].push_back(*keypoint);
KeyPointsFilter::runByImageBorder(keypoints, image.size(), edgeThreshold);
// Make sure we rescale the coordinates
for (int level = 0; level < levelsNum; ++level)
if( !sortedByLevel )
{
if (level == firstLevel)
continue;
std::vector<KeyPoint> & keypoints = allKeypoints[level];
float scale = 1/getScale(level, firstLevel, scaleFactor);
for (std::vector<KeyPoint>::iterator keypoint = keypoints.begin(),
keypointEnd = keypoints.end(); keypoint != keypointEnd; ++keypoint)
keypoint->pt *= scale;
std::vector<std::vector<KeyPoint> > allKeypoints(nLevels);
nkeypoints = (int)keypoints.size();
for( i = 0; i < nkeypoints; i++ )
{
level = keypoints[i].octave;
CV_Assert(0 <= level);
allKeypoints[level].push_back(keypoints[i]);
}
keypoints.clear();
for( level = 0; level < nLevels; level++ )
std::copy(allKeypoints[level].begin(), allKeypoints[level].end(), std::back_inserter(keypoints));
}
}
Mat descriptors;
std::vector<Point> pattern;
if( do_descriptors )
{
int nkeypoints = 0;
for (int level = 0; level < levelsNum; ++level)
nkeypoints += (int)allKeypoints[level].size();
int dsize = descriptorSize();
nkeypoints = (int)keypoints.size();
if( nkeypoints == 0 )
_descriptors.release();
else
{
_descriptors.create(nkeypoints, descriptorSize(), CV_8U);
descriptors = _descriptors.getMat();
_descriptors.release();
return;
}
_descriptors.create(nkeypoints, dsize, CV_8U);
std::vector<Point> pattern;
const int npoints = 512;
Point patternbuf[npoints];
const Point* pattern0 = (const Point*)bit_pattern_31_;
@ -903,43 +1083,36 @@ void ORB::operator()( InputArray _image, InputArray _mask, std::vector<KeyPoint>
int ntuples = descriptorSize()*4;
initializeOrbPattern(pattern0, pattern, ntuples, WTA_K, npoints);
}
}
_keypoints.clear();
int offset = 0;
for (int level = 0; level < levelsNum; ++level)
{
// Get the features and compute their orientation
std::vector<KeyPoint>& keypoints = allKeypoints[level];
int nkeypoints = (int)keypoints.size();
// Compute the descriptors
if (do_descriptors)
for( level = 0; level < nLevels; level++ )
{
Mat desc;
if (!descriptors.empty())
{
desc = descriptors.rowRange(offset, offset + nkeypoints);
}
offset += nkeypoints;
// preprocess the resized image
Mat& workingMat = imagePyramid[level];
Mat workingMat = imagePyramid(layerInfo[level]);
//boxFilter(working_mat, working_mat, working_mat.depth(), Size(5,5), Point(-1,-1), true, BORDER_REFLECT_101);
GaussianBlur(workingMat, workingMat, Size(7, 7), 2, 2, BORDER_REFLECT_101);
computeDescriptors(workingMat, keypoints, desc, pattern, descriptorSize(), WTA_K);
}
// Copy to the output data
if (level != firstLevel)
if( useOCL )
{
imagePyramid.copyTo(uimagePyramid);
std::vector<Vec4i> kptbuf;
UMat ukeypoints, upattern;
copyVectorToUMat(pattern, upattern);
uploadORBKeypoints(keypoints, layerScale, kptbuf, ukeypoints);
UMat udescriptors = _descriptors.getUMat();
useOCL = ocl_computeOrbDescriptors(uimagePyramid, ulayerInfo,
ukeypoints, udescriptors, upattern,
nkeypoints, dsize, WTA_K);
}
if( !useOCL )
{
float scale = getScale(level, firstLevel, scaleFactor);
for (std::vector<KeyPoint>::iterator keypoint = keypoints.begin(),
keypointEnd = keypoints.end(); keypoint != keypointEnd; ++keypoint)
keypoint->pt *= scale;
Mat descriptors = _descriptors.getMat();
computeOrbDescriptors(imagePyramid, layerInfo, layerScale,
keypoints, descriptors, pattern, dsize, WTA_K);
}
// And add the keypoints to the output
_keypoints.insert(_keypoints.end(), keypoints.begin(), keypoints.end());
}
}

Loading…
Cancel
Save