Open Source Computer Vision Library https://opencv.org/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

459 lines
16 KiB

/*M/////////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include "surf.hpp"
#include <cstdio>
#include <sstream>
#include "opencl_kernels.hpp"
namespace cv
{
enum { ORI_SEARCH_INC=5, ORI_LOCAL_SIZE=(360 / ORI_SEARCH_INC) };
static inline int calcSize(int octave, int layer)
{
/* Wavelet size at first layer of first octave. */
const int HAAR_SIZE0 = 9;
/* Wavelet size increment between layers. This should be an even number,
such that the wavelet sizes in an octave are either all even or all odd.
This ensures that when looking for the neighbors of a sample, the layers
above and below are aligned correctly. */
const int HAAR_SIZE_INC = 6;
return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave;
}
SURF_OCL::SURF_OCL()
{
img_cols = img_rows = maxCandidates = maxFeatures = 0;
haveImageSupport = false;
status = -1;
}
bool SURF_OCL::init(const SURF* p)
{
params = p;
if(status < 0)
{
status = 0;
if(ocl::haveOpenCL())
{
const ocl::Device& dev = ocl::Device::getDefault();
if( dev.type() == ocl::Device::TYPE_CPU || dev.doubleFPConfig() == 0 )
return false;
haveImageSupport = false;//dev.imageSupport();
kerOpts = haveImageSupport ? "-D HAVE_IMAGE2D -D DOUBLE_SUPPORT" : "";
// status = 1;
}
}
return status > 0;
}
bool SURF_OCL::setImage(InputArray _img, InputArray _mask)
{
if( status <= 0 )
return false;
if( !_mask.empty())
return false;
int imgtype = _img.type();
CV_Assert(!_img.empty());
CV_Assert(params && params->nOctaves > 0 && params->nOctaveLayers > 0);
int min_size = calcSize(params->nOctaves - 1, 0);
Size sz = _img.size();
img_cols = sz.width;
img_rows = sz.height;
CV_Assert(img_rows >= min_size && img_cols >= min_size);
const int layer_rows = img_rows >> (params->nOctaves - 1);
const int layer_cols = img_cols >> (params->nOctaves - 1);
const int min_margin = ((calcSize((params->nOctaves - 1), 2) >> 1) >> (params->nOctaves - 1)) + 1;
CV_Assert(layer_rows - 2 * min_margin > 0);
CV_Assert(layer_cols - 2 * min_margin > 0);
maxFeatures = std::min(static_cast<int>(img_cols*img_rows * 0.01f), 65535);
maxCandidates = std::min(static_cast<int>(1.5 * maxFeatures), 65535);
CV_Assert(maxFeatures > 0);
counters.create(1, params->nOctaves + 1, CV_32SC1);
counters.setTo(Scalar::all(0));
img.release();
if(_img.isUMat() && imgtype == CV_8UC1)
img = _img.getUMat();
else if( imgtype == CV_8UC1 )
_img.copyTo(img);
else
cvtColor(_img, img, COLOR_BGR2GRAY);
integral(img, sum);
if(haveImageSupport)
{
imgTex = ocl::Image2D(img);
sumTex = ocl::Image2D(sum);
}
return true;
}
bool SURF_OCL::detectKeypoints(UMat &keypoints)
{
// create image pyramid buffers
// different layers have same sized buffers, but they are sampled from Gaussian kernel.
det.create(img_rows * (params->nOctaveLayers + 2), img_cols, CV_32F);
trace.create(img_rows * (params->nOctaveLayers + 2), img_cols, CV_32FC1);
maxPosBuffer.create(1, maxCandidates, CV_32SC4);
keypoints.create(SURF_OCL::ROWS_COUNT, maxFeatures, CV_32F);
keypoints.setTo(Scalar::all(0));
Mat cpuCounters;
for (int octave = 0; octave < params->nOctaves; ++octave)
{
const int layer_rows = img_rows >> octave;
const int layer_cols = img_cols >> octave;
if(!calcLayerDetAndTrace(octave, layer_rows))
return false;
if(!findMaximaInLayer(1 + octave, octave, layer_rows, layer_cols))
return false;
cpuCounters = counters.getMat(ACCESS_READ);
int maxCounter = cpuCounters.at<int>(1 + octave);
maxCounter = std::min(maxCounter, maxCandidates);
cpuCounters.release();
if (maxCounter > 0)
{
if(!interpolateKeypoint(maxCounter, keypoints, octave, layer_rows, maxFeatures))
return false;
}
}
cpuCounters = counters.getMat(ACCESS_READ);
int featureCounter = cpuCounters.at<int>(0);
featureCounter = std::min(featureCounter, maxFeatures);
cpuCounters.release();
keypoints = UMat(keypoints, Rect(0, 0, featureCounter, keypoints.rows));
if (params->upright)
return setUpRight(keypoints);
else
return calcOrientation(keypoints);
}
bool SURF_OCL::setUpRight(UMat &keypoints)
{
int nFeatures = keypoints.cols;
if( nFeatures == 0 )
return true;
size_t globalThreads[3] = {nFeatures, 1};
ocl::Kernel kerUpRight("SURF_setUpRight", ocl::nonfree::surf_oclsrc, kerOpts);
return kerUpRight.args(ocl::KernelArg::ReadWrite(keypoints)).run(2, globalThreads, 0, true);
}
bool SURF_OCL::computeDescriptors(const UMat &keypoints, OutputArray _descriptors)
{
int dsize = params->descriptorSize();
int nFeatures = keypoints.cols;
if (nFeatures == 0)
{
_descriptors.release();
return true;
}
_descriptors.create(nFeatures, dsize, CV_32F);
UMat descriptors;
if( _descriptors.isUMat() )
descriptors = _descriptors.getUMat();
else
descriptors.create(nFeatures, dsize, CV_32F);
ocl::Kernel kerCalcDesc, kerNormDesc;
if( dsize == 64 )
{
kerCalcDesc.create("SURF_computeDescriptors64", ocl::nonfree::surf_oclsrc, kerOpts);
kerNormDesc.create("SURF_normalizeDescriptors64", ocl::nonfree::surf_oclsrc, kerOpts);
}
else
{
CV_Assert(dsize == 128);
kerCalcDesc.create("SURF_computeDescriptors128", ocl::nonfree::surf_oclsrc, kerOpts);
kerNormDesc.create("SURF_normalizeDescriptors128", ocl::nonfree::surf_oclsrc, kerOpts);
}
size_t localThreads[] = {6, 6};
size_t globalThreads[] = {nFeatures*localThreads[0], localThreads[1]};
if(haveImageSupport)
{
kerCalcDesc.args(imgTex,
img_rows, img_cols,
ocl::KernelArg::ReadOnlyNoSize(keypoints),
ocl::KernelArg::WriteOnlyNoSize(descriptors));
}
else
{
kerCalcDesc.args(ocl::KernelArg::ReadOnlyNoSize(img),
img_rows, img_cols,
ocl::KernelArg::ReadOnlyNoSize(keypoints),
ocl::KernelArg::WriteOnlyNoSize(descriptors));
}
if(!kerCalcDesc.run(2, globalThreads, localThreads, true))
return false;
size_t localThreads_n[] = {dsize, 1};
size_t globalThreads_n[] = {nFeatures*localThreads_n[0], localThreads_n[1]};
globalThreads[0] = nFeatures * localThreads[0];
globalThreads[1] = localThreads[1];
bool ok = kerNormDesc.args(ocl::KernelArg::ReadWriteNoSize(descriptors)).
run(2, globalThreads_n, localThreads_n, true);
if(ok && !_descriptors.isUMat())
descriptors.copyTo(_descriptors);
return ok;
}
void SURF_OCL::uploadKeypoints(const std::vector<KeyPoint> &keypoints, UMat &keypointsGPU)
{
if (keypoints.empty())
keypointsGPU.release();
else
{
Mat keypointsCPU(SURF_OCL::ROWS_COUNT, static_cast<int>(keypoints.size()), CV_32FC1);
float *kp_x = keypointsCPU.ptr<float>(SURF_OCL::X_ROW);
float *kp_y = keypointsCPU.ptr<float>(SURF_OCL::Y_ROW);
int *kp_laplacian = keypointsCPU.ptr<int>(SURF_OCL::LAPLACIAN_ROW);
int *kp_octave = keypointsCPU.ptr<int>(SURF_OCL::OCTAVE_ROW);
float *kp_size = keypointsCPU.ptr<float>(SURF_OCL::SIZE_ROW);
float *kp_dir = keypointsCPU.ptr<float>(SURF_OCL::ANGLE_ROW);
float *kp_hessian = keypointsCPU.ptr<float>(SURF_OCL::HESSIAN_ROW);
for (size_t i = 0, size = keypoints.size(); i < size; ++i)
{
const KeyPoint &kp = keypoints[i];
kp_x[i] = kp.pt.x;
kp_y[i] = kp.pt.y;
kp_octave[i] = kp.octave;
kp_size[i] = kp.size;
kp_dir[i] = kp.angle;
kp_hessian[i] = kp.response;
kp_laplacian[i] = 1;
}
keypointsCPU.copyTo(keypointsGPU);
}
}
void SURF_OCL::downloadKeypoints(const UMat &keypointsGPU, std::vector<KeyPoint> &keypoints)
{
const int nFeatures = keypointsGPU.cols;
if (nFeatures == 0)
keypoints.clear();
else
{
CV_Assert(keypointsGPU.type() == CV_32FC1 && keypointsGPU.rows == ROWS_COUNT);
Mat keypointsCPU = keypointsGPU.getMat(ACCESS_READ);
keypoints.resize(nFeatures);
float *kp_x = keypointsCPU.ptr<float>(SURF_OCL::X_ROW);
float *kp_y = keypointsCPU.ptr<float>(SURF_OCL::Y_ROW);
int *kp_laplacian = keypointsCPU.ptr<int>(SURF_OCL::LAPLACIAN_ROW);
int *kp_octave = keypointsCPU.ptr<int>(SURF_OCL::OCTAVE_ROW);
float *kp_size = keypointsCPU.ptr<float>(SURF_OCL::SIZE_ROW);
float *kp_dir = keypointsCPU.ptr<float>(SURF_OCL::ANGLE_ROW);
float *kp_hessian = keypointsCPU.ptr<float>(SURF_OCL::HESSIAN_ROW);
for (int i = 0; i < nFeatures; ++i)
{
KeyPoint &kp = keypoints[i];
kp.pt.x = kp_x[i];
kp.pt.y = kp_y[i];
kp.class_id = kp_laplacian[i];
kp.octave = kp_octave[i];
kp.size = kp_size[i];
kp.angle = kp_dir[i];
kp.response = kp_hessian[i];
}
}
}
bool SURF_OCL::detect(InputArray _img, InputArray _mask, UMat& keypoints)
{
if( !setImage(_img, _mask) )
return false;
return detectKeypoints(keypoints);
}
bool SURF_OCL::detectAndCompute(InputArray _img, InputArray _mask, UMat& keypoints,
OutputArray _descriptors, bool useProvidedKeypoints )
{
if( !setImage(_img, _mask) )
return false;
if( !useProvidedKeypoints && !detectKeypoints(keypoints) )
return false;
return computeDescriptors(keypoints, _descriptors);
}
inline int divUp(int a, int b) { return (a + b-1)/b; }
////////////////////////////
// kernel caller definitions
bool SURF_OCL::calcLayerDetAndTrace(int octave, int c_layer_rows)
{
int nOctaveLayers = params->nOctaveLayers;
const int min_size = calcSize(octave, 0);
const int max_samples_i = 1 + ((img_rows - min_size) >> octave);
const int max_samples_j = 1 + ((img_cols - min_size) >> octave);
size_t localThreads[] = {16, 16};
size_t globalThreads[] =
{
divUp(max_samples_j, (int)localThreads[0]) * localThreads[0],
divUp(max_samples_i, (int)localThreads[1]) * localThreads[1] * (nOctaveLayers + 2)
};
ocl::Kernel kerCalcDetTrace("SURF_calcLayerDetAndTrace", ocl::nonfree::surf_oclsrc, kerOpts);
if(haveImageSupport)
{
kerCalcDetTrace.args(sumTex,
img_rows, img_cols, nOctaveLayers,
octave, c_layer_rows,
ocl::KernelArg::WriteOnlyNoSize(det),
ocl::KernelArg::WriteOnlyNoSize(trace));
}
else
{
kerCalcDetTrace.args(ocl::KernelArg::ReadOnlyNoSize(sum),
img_rows, img_cols, nOctaveLayers,
octave, c_layer_rows,
ocl::KernelArg::WriteOnlyNoSize(det),
ocl::KernelArg::WriteOnlyNoSize(trace));
}
return kerCalcDetTrace.run(2, globalThreads, localThreads, true);
}
bool SURF_OCL::findMaximaInLayer(int counterOffset, int octave,
int layer_rows, int layer_cols)
{
const int min_margin = ((calcSize(octave, 2) >> 1) >> octave) + 1;
int nOctaveLayers = params->nOctaveLayers;
size_t localThreads[3] = {16, 16};
size_t globalThreads[3] =
{
divUp(layer_cols - 2 * min_margin, (int)localThreads[0] - 2) * localThreads[0],
divUp(layer_rows - 2 * min_margin, (int)localThreads[1] - 2) * nOctaveLayers * localThreads[1]
};
ocl::Kernel kerFindMaxima("SURF_findMaximaInLayer", ocl::nonfree::surf_oclsrc, kerOpts);
return kerFindMaxima.args(ocl::KernelArg::ReadOnlyNoSize(det),
ocl::KernelArg::ReadOnlyNoSize(trace),
ocl::KernelArg::PtrReadWrite(maxPosBuffer),
ocl::KernelArg::PtrReadWrite(counters),
counterOffset, img_rows, img_cols,
octave, nOctaveLayers,
layer_rows, layer_cols,
maxCandidates,
(float)params->hessianThreshold).run(2, globalThreads, localThreads, true);
}
bool SURF_OCL::interpolateKeypoint(int maxCounter, UMat &keypoints, int octave, int layer_rows, int max_features)
{
size_t localThreads[3] = {3, 3, 3};
size_t globalThreads[3] = {maxCounter*localThreads[0], localThreads[1], 3};
ocl::Kernel kerInterp("SURF_interpolateKeypoint", ocl::nonfree::surf_oclsrc, kerOpts);
return kerInterp.args(ocl::KernelArg::ReadOnlyNoSize(det),
ocl::KernelArg::PtrReadOnly(maxPosBuffer),
ocl::KernelArg::ReadWriteNoSize(keypoints),
ocl::KernelArg::PtrReadWrite(counters),
img_rows, img_cols, octave, layer_rows, max_features).
run(3, globalThreads, localThreads, true);
}
bool SURF_OCL::calcOrientation(UMat &keypoints)
{
int nFeatures = keypoints.cols;
if( nFeatures == 0 )
return true;
ocl::Kernel kerOri("SURF_calcOrientation", ocl::nonfree::surf_oclsrc, kerOpts);
if( haveImageSupport )
kerOri.args(sumTex, img_rows, img_cols,
ocl::KernelArg::ReadWriteNoSize(keypoints));
else
kerOri.args(ocl::KernelArg::ReadOnlyNoSize(sum),
img_rows, img_cols,
ocl::KernelArg::ReadWriteNoSize(keypoints));
size_t localThreads[3] = {ORI_LOCAL_SIZE, 1};
size_t globalThreads[3] = {nFeatures * localThreads[0], 1};
return kerOri.run(2, globalThreads, localThreads, true);
}
}