mirror of https://github.com/opencv/opencv.git
Open Source Computer Vision Library
https://opencv.org/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
459 lines
16 KiB
459 lines
16 KiB
/*M///////////////////////////////////////////////////////////////////////////////////////// |
|
// |
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
|
// |
|
// By downloading, copying, installing or using the software you agree to this license. |
|
// If you do not agree to this license, do not download, install, |
|
// copy or use the software. |
|
// |
|
// |
|
// License Agreement |
|
// For Open Source Computer Vision Library |
|
// |
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. |
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. |
|
// Third party copyrights are property of their respective owners. |
|
// |
|
// @Authors |
|
// Peng Xiao, pengxiao@multicorewareinc.com |
|
// |
|
// Redistribution and use in source and binary forms, with or without modification, |
|
// are permitted provided that the following conditions are met: |
|
// |
|
// * Redistribution's of source code must retain the above copyright notice, |
|
// this list of conditions and the following disclaimer. |
|
// |
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
|
// this list of conditions and the following disclaimer in the documentation |
|
// and/or other materials provided with the distribution. |
|
// |
|
// * The name of the copyright holders may not be used to endorse or promote products |
|
// derived from this software without specific prior written permission. |
|
// |
|
// This software is provided by the copyright holders and contributors as is and |
|
// any express or implied warranties, including, but not limited to, the implied |
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
|
// indirect, incidental, special, exemplary, or consequential damages |
|
// (including, but not limited to, procurement of substitute goods or services; |
|
// loss of use, data, or profits; or business interruption) however caused |
|
// and on any theory of liability, whether in contract, strict liability, |
|
// or tort (including negligence or otherwise) arising in any way out of |
|
// the use of this software, even if advised of the possibility of such damage. |
|
// |
|
//M*/ |
|
#include "precomp.hpp" |
|
#include "surf.hpp" |
|
|
|
#include <cstdio> |
|
#include <sstream> |
|
#include "opencl_kernels.hpp" |
|
|
|
namespace cv |
|
{ |
|
|
|
enum { ORI_SEARCH_INC=5, ORI_LOCAL_SIZE=(360 / ORI_SEARCH_INC) }; |
|
|
|
static inline int calcSize(int octave, int layer) |
|
{ |
|
/* Wavelet size at first layer of first octave. */ |
|
const int HAAR_SIZE0 = 9; |
|
|
|
/* Wavelet size increment between layers. This should be an even number, |
|
such that the wavelet sizes in an octave are either all even or all odd. |
|
This ensures that when looking for the neighbors of a sample, the layers |
|
|
|
above and below are aligned correctly. */ |
|
const int HAAR_SIZE_INC = 6; |
|
|
|
return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave; |
|
} |
|
|
|
|
|
SURF_OCL::SURF_OCL() |
|
{ |
|
img_cols = img_rows = maxCandidates = maxFeatures = 0; |
|
haveImageSupport = false; |
|
status = -1; |
|
} |
|
|
|
bool SURF_OCL::init(const SURF* p) |
|
{ |
|
params = p; |
|
if(status < 0) |
|
{ |
|
status = 0; |
|
if(ocl::haveOpenCL()) |
|
{ |
|
const ocl::Device& dev = ocl::Device::getDefault(); |
|
if( dev.type() == ocl::Device::TYPE_CPU || dev.doubleFPConfig() == 0 ) |
|
return false; |
|
haveImageSupport = false;//dev.imageSupport(); |
|
kerOpts = haveImageSupport ? "-D HAVE_IMAGE2D -D DOUBLE_SUPPORT" : ""; |
|
// status = 1; |
|
} |
|
} |
|
return status > 0; |
|
} |
|
|
|
|
|
bool SURF_OCL::setImage(InputArray _img, InputArray _mask) |
|
{ |
|
if( status <= 0 ) |
|
return false; |
|
if( !_mask.empty()) |
|
return false; |
|
int imgtype = _img.type(); |
|
CV_Assert(!_img.empty()); |
|
CV_Assert(params && params->nOctaves > 0 && params->nOctaveLayers > 0); |
|
|
|
int min_size = calcSize(params->nOctaves - 1, 0); |
|
Size sz = _img.size(); |
|
img_cols = sz.width; |
|
img_rows = sz.height; |
|
CV_Assert(img_rows >= min_size && img_cols >= min_size); |
|
|
|
const int layer_rows = img_rows >> (params->nOctaves - 1); |
|
const int layer_cols = img_cols >> (params->nOctaves - 1); |
|
const int min_margin = ((calcSize((params->nOctaves - 1), 2) >> 1) >> (params->nOctaves - 1)) + 1; |
|
CV_Assert(layer_rows - 2 * min_margin > 0); |
|
CV_Assert(layer_cols - 2 * min_margin > 0); |
|
|
|
maxFeatures = std::min(static_cast<int>(img_cols*img_rows * 0.01f), 65535); |
|
maxCandidates = std::min(static_cast<int>(1.5 * maxFeatures), 65535); |
|
|
|
CV_Assert(maxFeatures > 0); |
|
|
|
counters.create(1, params->nOctaves + 1, CV_32SC1); |
|
counters.setTo(Scalar::all(0)); |
|
|
|
img.release(); |
|
if(_img.isUMat() && imgtype == CV_8UC1) |
|
img = _img.getUMat(); |
|
else if( imgtype == CV_8UC1 ) |
|
_img.copyTo(img); |
|
else |
|
cvtColor(_img, img, COLOR_BGR2GRAY); |
|
|
|
integral(img, sum); |
|
|
|
if(haveImageSupport) |
|
{ |
|
imgTex = ocl::Image2D(img); |
|
sumTex = ocl::Image2D(sum); |
|
} |
|
|
|
return true; |
|
} |
|
|
|
|
|
bool SURF_OCL::detectKeypoints(UMat &keypoints) |
|
{ |
|
// create image pyramid buffers |
|
// different layers have same sized buffers, but they are sampled from Gaussian kernel. |
|
det.create(img_rows * (params->nOctaveLayers + 2), img_cols, CV_32F); |
|
trace.create(img_rows * (params->nOctaveLayers + 2), img_cols, CV_32FC1); |
|
|
|
maxPosBuffer.create(1, maxCandidates, CV_32SC4); |
|
keypoints.create(SURF_OCL::ROWS_COUNT, maxFeatures, CV_32F); |
|
keypoints.setTo(Scalar::all(0)); |
|
Mat cpuCounters; |
|
|
|
for (int octave = 0; octave < params->nOctaves; ++octave) |
|
{ |
|
const int layer_rows = img_rows >> octave; |
|
const int layer_cols = img_cols >> octave; |
|
|
|
if(!calcLayerDetAndTrace(octave, layer_rows)) |
|
return false; |
|
|
|
if(!findMaximaInLayer(1 + octave, octave, layer_rows, layer_cols)) |
|
return false; |
|
|
|
cpuCounters = counters.getMat(ACCESS_READ); |
|
int maxCounter = cpuCounters.at<int>(1 + octave); |
|
maxCounter = std::min(maxCounter, maxCandidates); |
|
cpuCounters.release(); |
|
|
|
if (maxCounter > 0) |
|
{ |
|
if(!interpolateKeypoint(maxCounter, keypoints, octave, layer_rows, maxFeatures)) |
|
return false; |
|
} |
|
} |
|
|
|
cpuCounters = counters.getMat(ACCESS_READ); |
|
int featureCounter = cpuCounters.at<int>(0); |
|
featureCounter = std::min(featureCounter, maxFeatures); |
|
cpuCounters.release(); |
|
|
|
keypoints = UMat(keypoints, Rect(0, 0, featureCounter, keypoints.rows)); |
|
|
|
if (params->upright) |
|
return setUpRight(keypoints); |
|
else |
|
return calcOrientation(keypoints); |
|
} |
|
|
|
|
|
bool SURF_OCL::setUpRight(UMat &keypoints) |
|
{ |
|
int nFeatures = keypoints.cols; |
|
if( nFeatures == 0 ) |
|
return true; |
|
|
|
size_t globalThreads[3] = {nFeatures, 1}; |
|
ocl::Kernel kerUpRight("SURF_setUpRight", ocl::nonfree::surf_oclsrc, kerOpts); |
|
return kerUpRight.args(ocl::KernelArg::ReadWrite(keypoints)).run(2, globalThreads, 0, true); |
|
} |
|
|
|
bool SURF_OCL::computeDescriptors(const UMat &keypoints, OutputArray _descriptors) |
|
{ |
|
int dsize = params->descriptorSize(); |
|
int nFeatures = keypoints.cols; |
|
if (nFeatures == 0) |
|
{ |
|
_descriptors.release(); |
|
return true; |
|
} |
|
_descriptors.create(nFeatures, dsize, CV_32F); |
|
UMat descriptors; |
|
if( _descriptors.isUMat() ) |
|
descriptors = _descriptors.getUMat(); |
|
else |
|
descriptors.create(nFeatures, dsize, CV_32F); |
|
|
|
ocl::Kernel kerCalcDesc, kerNormDesc; |
|
|
|
if( dsize == 64 ) |
|
{ |
|
kerCalcDesc.create("SURF_computeDescriptors64", ocl::nonfree::surf_oclsrc, kerOpts); |
|
kerNormDesc.create("SURF_normalizeDescriptors64", ocl::nonfree::surf_oclsrc, kerOpts); |
|
} |
|
else |
|
{ |
|
CV_Assert(dsize == 128); |
|
kerCalcDesc.create("SURF_computeDescriptors128", ocl::nonfree::surf_oclsrc, kerOpts); |
|
kerNormDesc.create("SURF_normalizeDescriptors128", ocl::nonfree::surf_oclsrc, kerOpts); |
|
} |
|
|
|
size_t localThreads[] = {6, 6}; |
|
size_t globalThreads[] = {nFeatures*localThreads[0], localThreads[1]}; |
|
|
|
if(haveImageSupport) |
|
{ |
|
kerCalcDesc.args(imgTex, |
|
img_rows, img_cols, |
|
ocl::KernelArg::ReadOnlyNoSize(keypoints), |
|
ocl::KernelArg::WriteOnlyNoSize(descriptors)); |
|
} |
|
else |
|
{ |
|
kerCalcDesc.args(ocl::KernelArg::ReadOnlyNoSize(img), |
|
img_rows, img_cols, |
|
ocl::KernelArg::ReadOnlyNoSize(keypoints), |
|
ocl::KernelArg::WriteOnlyNoSize(descriptors)); |
|
} |
|
|
|
if(!kerCalcDesc.run(2, globalThreads, localThreads, true)) |
|
return false; |
|
|
|
size_t localThreads_n[] = {dsize, 1}; |
|
size_t globalThreads_n[] = {nFeatures*localThreads_n[0], localThreads_n[1]}; |
|
|
|
globalThreads[0] = nFeatures * localThreads[0]; |
|
globalThreads[1] = localThreads[1]; |
|
bool ok = kerNormDesc.args(ocl::KernelArg::ReadWriteNoSize(descriptors)). |
|
run(2, globalThreads_n, localThreads_n, true); |
|
if(ok && !_descriptors.isUMat()) |
|
descriptors.copyTo(_descriptors); |
|
return ok; |
|
} |
|
|
|
|
|
void SURF_OCL::uploadKeypoints(const std::vector<KeyPoint> &keypoints, UMat &keypointsGPU) |
|
{ |
|
if (keypoints.empty()) |
|
keypointsGPU.release(); |
|
else |
|
{ |
|
Mat keypointsCPU(SURF_OCL::ROWS_COUNT, static_cast<int>(keypoints.size()), CV_32FC1); |
|
|
|
float *kp_x = keypointsCPU.ptr<float>(SURF_OCL::X_ROW); |
|
float *kp_y = keypointsCPU.ptr<float>(SURF_OCL::Y_ROW); |
|
int *kp_laplacian = keypointsCPU.ptr<int>(SURF_OCL::LAPLACIAN_ROW); |
|
int *kp_octave = keypointsCPU.ptr<int>(SURF_OCL::OCTAVE_ROW); |
|
float *kp_size = keypointsCPU.ptr<float>(SURF_OCL::SIZE_ROW); |
|
float *kp_dir = keypointsCPU.ptr<float>(SURF_OCL::ANGLE_ROW); |
|
float *kp_hessian = keypointsCPU.ptr<float>(SURF_OCL::HESSIAN_ROW); |
|
|
|
for (size_t i = 0, size = keypoints.size(); i < size; ++i) |
|
{ |
|
const KeyPoint &kp = keypoints[i]; |
|
kp_x[i] = kp.pt.x; |
|
kp_y[i] = kp.pt.y; |
|
kp_octave[i] = kp.octave; |
|
kp_size[i] = kp.size; |
|
kp_dir[i] = kp.angle; |
|
kp_hessian[i] = kp.response; |
|
kp_laplacian[i] = 1; |
|
} |
|
|
|
keypointsCPU.copyTo(keypointsGPU); |
|
} |
|
} |
|
|
|
void SURF_OCL::downloadKeypoints(const UMat &keypointsGPU, std::vector<KeyPoint> &keypoints) |
|
{ |
|
const int nFeatures = keypointsGPU.cols; |
|
|
|
if (nFeatures == 0) |
|
keypoints.clear(); |
|
else |
|
{ |
|
CV_Assert(keypointsGPU.type() == CV_32FC1 && keypointsGPU.rows == ROWS_COUNT); |
|
|
|
Mat keypointsCPU = keypointsGPU.getMat(ACCESS_READ); |
|
keypoints.resize(nFeatures); |
|
|
|
float *kp_x = keypointsCPU.ptr<float>(SURF_OCL::X_ROW); |
|
float *kp_y = keypointsCPU.ptr<float>(SURF_OCL::Y_ROW); |
|
int *kp_laplacian = keypointsCPU.ptr<int>(SURF_OCL::LAPLACIAN_ROW); |
|
int *kp_octave = keypointsCPU.ptr<int>(SURF_OCL::OCTAVE_ROW); |
|
float *kp_size = keypointsCPU.ptr<float>(SURF_OCL::SIZE_ROW); |
|
float *kp_dir = keypointsCPU.ptr<float>(SURF_OCL::ANGLE_ROW); |
|
float *kp_hessian = keypointsCPU.ptr<float>(SURF_OCL::HESSIAN_ROW); |
|
|
|
for (int i = 0; i < nFeatures; ++i) |
|
{ |
|
KeyPoint &kp = keypoints[i]; |
|
kp.pt.x = kp_x[i]; |
|
kp.pt.y = kp_y[i]; |
|
kp.class_id = kp_laplacian[i]; |
|
kp.octave = kp_octave[i]; |
|
kp.size = kp_size[i]; |
|
kp.angle = kp_dir[i]; |
|
kp.response = kp_hessian[i]; |
|
} |
|
} |
|
} |
|
|
|
bool SURF_OCL::detect(InputArray _img, InputArray _mask, UMat& keypoints) |
|
{ |
|
if( !setImage(_img, _mask) ) |
|
return false; |
|
|
|
return detectKeypoints(keypoints); |
|
} |
|
|
|
|
|
bool SURF_OCL::detectAndCompute(InputArray _img, InputArray _mask, UMat& keypoints, |
|
OutputArray _descriptors, bool useProvidedKeypoints ) |
|
{ |
|
if( !setImage(_img, _mask) ) |
|
return false; |
|
|
|
if( !useProvidedKeypoints && !detectKeypoints(keypoints) ) |
|
return false; |
|
|
|
return computeDescriptors(keypoints, _descriptors); |
|
} |
|
|
|
inline int divUp(int a, int b) { return (a + b-1)/b; } |
|
|
|
//////////////////////////// |
|
// kernel caller definitions |
|
bool SURF_OCL::calcLayerDetAndTrace(int octave, int c_layer_rows) |
|
{ |
|
int nOctaveLayers = params->nOctaveLayers; |
|
const int min_size = calcSize(octave, 0); |
|
const int max_samples_i = 1 + ((img_rows - min_size) >> octave); |
|
const int max_samples_j = 1 + ((img_cols - min_size) >> octave); |
|
|
|
size_t localThreads[] = {16, 16}; |
|
size_t globalThreads[] = |
|
{ |
|
divUp(max_samples_j, (int)localThreads[0]) * localThreads[0], |
|
divUp(max_samples_i, (int)localThreads[1]) * localThreads[1] * (nOctaveLayers + 2) |
|
}; |
|
ocl::Kernel kerCalcDetTrace("SURF_calcLayerDetAndTrace", ocl::nonfree::surf_oclsrc, kerOpts); |
|
if(haveImageSupport) |
|
{ |
|
kerCalcDetTrace.args(sumTex, |
|
img_rows, img_cols, nOctaveLayers, |
|
octave, c_layer_rows, |
|
ocl::KernelArg::WriteOnlyNoSize(det), |
|
ocl::KernelArg::WriteOnlyNoSize(trace)); |
|
} |
|
else |
|
{ |
|
kerCalcDetTrace.args(ocl::KernelArg::ReadOnlyNoSize(sum), |
|
img_rows, img_cols, nOctaveLayers, |
|
octave, c_layer_rows, |
|
ocl::KernelArg::WriteOnlyNoSize(det), |
|
ocl::KernelArg::WriteOnlyNoSize(trace)); |
|
} |
|
return kerCalcDetTrace.run(2, globalThreads, localThreads, true); |
|
} |
|
|
|
bool SURF_OCL::findMaximaInLayer(int counterOffset, int octave, |
|
int layer_rows, int layer_cols) |
|
{ |
|
const int min_margin = ((calcSize(octave, 2) >> 1) >> octave) + 1; |
|
int nOctaveLayers = params->nOctaveLayers; |
|
|
|
size_t localThreads[3] = {16, 16}; |
|
size_t globalThreads[3] = |
|
{ |
|
divUp(layer_cols - 2 * min_margin, (int)localThreads[0] - 2) * localThreads[0], |
|
divUp(layer_rows - 2 * min_margin, (int)localThreads[1] - 2) * nOctaveLayers * localThreads[1] |
|
}; |
|
|
|
ocl::Kernel kerFindMaxima("SURF_findMaximaInLayer", ocl::nonfree::surf_oclsrc, kerOpts); |
|
return kerFindMaxima.args(ocl::KernelArg::ReadOnlyNoSize(det), |
|
ocl::KernelArg::ReadOnlyNoSize(trace), |
|
ocl::KernelArg::PtrReadWrite(maxPosBuffer), |
|
ocl::KernelArg::PtrReadWrite(counters), |
|
counterOffset, img_rows, img_cols, |
|
octave, nOctaveLayers, |
|
layer_rows, layer_cols, |
|
maxCandidates, |
|
(float)params->hessianThreshold).run(2, globalThreads, localThreads, true); |
|
} |
|
|
|
bool SURF_OCL::interpolateKeypoint(int maxCounter, UMat &keypoints, int octave, int layer_rows, int max_features) |
|
{ |
|
size_t localThreads[3] = {3, 3, 3}; |
|
size_t globalThreads[3] = {maxCounter*localThreads[0], localThreads[1], 3}; |
|
|
|
ocl::Kernel kerInterp("SURF_interpolateKeypoint", ocl::nonfree::surf_oclsrc, kerOpts); |
|
|
|
return kerInterp.args(ocl::KernelArg::ReadOnlyNoSize(det), |
|
ocl::KernelArg::PtrReadOnly(maxPosBuffer), |
|
ocl::KernelArg::ReadWriteNoSize(keypoints), |
|
ocl::KernelArg::PtrReadWrite(counters), |
|
img_rows, img_cols, octave, layer_rows, max_features). |
|
run(3, globalThreads, localThreads, true); |
|
} |
|
|
|
bool SURF_OCL::calcOrientation(UMat &keypoints) |
|
{ |
|
int nFeatures = keypoints.cols; |
|
if( nFeatures == 0 ) |
|
return true; |
|
ocl::Kernel kerOri("SURF_calcOrientation", ocl::nonfree::surf_oclsrc, kerOpts); |
|
|
|
if( haveImageSupport ) |
|
kerOri.args(sumTex, img_rows, img_cols, |
|
ocl::KernelArg::ReadWriteNoSize(keypoints)); |
|
else |
|
kerOri.args(ocl::KernelArg::ReadOnlyNoSize(sum), |
|
img_rows, img_cols, |
|
ocl::KernelArg::ReadWriteNoSize(keypoints)); |
|
|
|
size_t localThreads[3] = {ORI_LOCAL_SIZE, 1}; |
|
size_t globalThreads[3] = {nFeatures * localThreads[0], 1}; |
|
return kerOri.run(2, globalThreads, localThreads, true); |
|
} |
|
|
|
}
|
|
|