Merge pull request #102 from NikoKJ/ocl4_2.43

pull/112/merge
Andrey Kamaev 12 years ago
commit f3a158c862
  1. 9
      modules/ocl/include/opencv2/ocl/ocl.hpp
  2. 2
      modules/ocl/perf/perf_haar.cpp
  3. 6
      modules/ocl/perf/perf_hog.cpp
  4. 4
      modules/ocl/perf/perf_surf.cpp
  5. 4
      modules/ocl/perf/precomp.hpp
  6. 2
      modules/ocl/perf/utility.hpp
  7. 32
      modules/ocl/src/arithm.cpp
  8. 14
      modules/ocl/src/brute_force_matcher.cpp
  9. 6
      modules/ocl/src/build_warps.cpp
  10. 2
      modules/ocl/src/color.cpp
  11. 2
      modules/ocl/src/columnsum.cpp
  12. 4
      modules/ocl/src/filtering.cpp
  13. 6
      modules/ocl/src/haar.cpp
  14. 2
      modules/ocl/src/initialization.cpp
  15. 2
      modules/ocl/src/interpolate_frames.cpp
  16. 2
      modules/ocl/src/mssegmentation.cpp
  17. 2
      modules/ocl/src/precomp.hpp
  18. 116
      modules/ocl/src/pyrlk.cpp
  19. 20
      modules/ocl/src/surf.cpp
  20. 4
      modules/ocl/test/precomp.hpp
  21. 4
      modules/ocl/test/test_hog.cpp
  22. 4
      modules/ocl/test/test_imgproc.cpp

@ -378,7 +378,7 @@ namespace cv
// disabled until fix crash
// support all types
CV_EXPORTS Scalar sum(const oclMat &m);
CV_EXPORTS Scalar absSum(const oclMat &m);
CV_EXPORTS Scalar sqrSum(const oclMat &m);
//! finds global minimum and maximum array elements and returns their values
@ -1738,5 +1738,12 @@ namespace cv
}
}
#if _MSC_VER >= 1200
#pragma warning( push)
#pragma warning( disable: 4267)
#endif
#include "opencv2/ocl/matrix_operations.hpp"
#if _MSC_VER >= 1200
#pragma warning( pop)
#endif
#endif /* __OPENCV_GPU_HPP__ */

@ -137,7 +137,7 @@ TEST_F(Haar, FaceDetect)
printf( "cpudetection time = %g ms\n", t / (LOOP_TIMES * (double)cvGetTickFrequency() * 1000.) );
cv::ocl::oclMat image;
CvSeq *_objects;
CvSeq *_objects=NULL;
t = (double)cvGetTickCount();
for(int k = 0; k < LOOP_TIMES; k++)
{

@ -103,10 +103,10 @@ TEST_P(HOG, Performance)
ASSERT_FALSE(img.empty());
// define HOG related arguments
float scale = 1.05;
float scale = 1.05f;
//int nlevels = 13;
float gr_threshold = 8;
float hit_threshold = 1.4;
int gr_threshold = 8;
float hit_threshold = 1.4f;
//bool hit_threshold_auto = true;
int win_width = is48 ? 48 : 64;

@ -54,11 +54,11 @@ using namespace cvtest;
using namespace testing;
using namespace std;
extern std::string workdir;
#define FILTER_IMAGE "../../../samples/gpu/road.png"
TEST(SURF, Performance)
{
cv::Mat img = readImage(workdir+"lena.jpg", cv::IMREAD_GRAYSCALE);
cv::Mat img = readImage(FILTER_IMAGE, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
ocl::SURF_OCL d_surf;

@ -55,13 +55,13 @@
#include "cvconfig.h"
#include "opencv2/core/core.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/calib3d/calib3d.hpp"
//#include "opencv2/calib3d/calib3d.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/video/video.hpp"
#include "opencv2/ts/ts.hpp"
#include "opencv2/ts/ts_perf.hpp"
#include "opencv2/ocl/ocl.hpp"
#include "opencv2/nonfree/nonfree.hpp"
//#include "opencv2/nonfree/nonfree.hpp"
#include "utility.hpp"
#include "interpolation.hpp"

@ -45,7 +45,7 @@
#ifdef PRINT_KERNEL_RUN_TIME
#define LOOP_TIMES 1
#else
#define LOOP_TIMES 100
#define LOOP_TIMES 1
#endif
#define MWIDTH 1920
#define MHEIGHT 1080

@ -813,6 +813,22 @@ Scalar cv::ocl::sum(const oclMat &src)
return func(src, 0);
}
Scalar cv::ocl::absSum(const oclMat &src)
{
if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
}
static sumFunc functab[2] =
{
arithmetic_sum<float>,
arithmetic_sum<double>
};
sumFunc func;
func = functab[src.clCxt->impl->double_support];
return func(src, 1);
}
Scalar cv::ocl::sqrSum(const oclMat &src)
{
@ -945,13 +961,17 @@ template <typename T> void arithmetic_minMax(const oclMat &src, double *minVal,
T *p = new T[groupnum * vlen * 2];
memset(p, 0, dbsize);
openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize);
for(int i = 0; i < vlen * (int)groupnum; i++)
{
*minVal = *minVal < p[i] ? *minVal : p[i];
if(minVal != NULL){
for(int i = 0; i < vlen * (int)groupnum; i++)
{
*minVal = *minVal < p[i] ? *minVal : p[i];
}
}
for(int i = vlen * (int)groupnum; i < 2 * vlen * (int)groupnum; i++)
{
*maxVal = *maxVal > p[i] ? *maxVal : p[i];
if(maxVal != NULL){
for(int i = vlen * (int)groupnum; i < 2 * vlen * (int)groupnum; i++)
{
*maxVal = *maxVal > p[i] ? *maxVal : p[i];
}
}
delete[] p;
openCLFree(dstBuffer);

@ -1548,8 +1548,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &query, vector<
temp.reserve(2 * k);
matches.resize(query.rows);
for(size_t queryIdx = 0; queryIdx < matches.size(); queryIdx++ )
matches[queryIdx].reserve(k);
for_each(matches.begin(), matches.end(), bind2nd(mem_fun_ref(&vector<DMatch>::reserve), k));
for (size_t imgIdx = 0, size = trainDescCollection.size(); imgIdx < size; ++imgIdx)
{
@ -1573,15 +1572,8 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &query, vector<
if (compactResult)
{
size_t i, j = 0;
for( i = 0; i < matches.size(); i++ )
if( !matches[i].empty() )
{
if( i > j )
matches[j] = matches[i];
j++;
}
matches.resize(j);
vector< vector<DMatch> >::iterator new_end = remove_if(matches.begin(), matches.end(), mem_fun_ref(&vector<DMatch>::empty));
matches.erase(new_end, matches.end());
}
}
}

@ -76,7 +76,7 @@ namespace cv
//////////////////////////////////////////////////////////////////////////////
// buildWarpPlaneMaps
void cv::ocl::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T,
void cv::ocl::buildWarpPlaneMaps(Size /*src_size*/, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T,
float scale, oclMat &map_x, oclMat &map_y)
{
CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
@ -121,7 +121,7 @@ void cv::ocl::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, cons
//////////////////////////////////////////////////////////////////////////////
// buildWarpCylyndricalMaps
void cv::ocl::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale,
void cv::ocl::buildWarpCylindricalMaps(Size /*src_size*/, Rect dst_roi, const Mat &K, const Mat &R, float scale,
oclMat &map_x, oclMat &map_y)
{
CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
@ -160,7 +160,7 @@ void cv::ocl::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K
//////////////////////////////////////////////////////////////////////////////
// buildWarpSphericalMaps
void cv::ocl::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale,
void cv::ocl::buildWarpSphericalMaps(Size /*src_size*/, Rect dst_roi, const Mat &K, const Mat &R, float scale,
oclMat &map_x, oclMat &map_y)
{
CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);

@ -96,7 +96,7 @@ namespace
size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1};
openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2Gray", gt, lt, args, -1, -1, build_options);
}
void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int /*dcn*/)
{
Size sz = src.size();
int scn = src.oclchannels(), depth = src.depth(), bidx;

@ -88,7 +88,7 @@ void cv::ocl::columnSum(const oclMat &src, oclMat &dst)
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));
size_t globalThreads[3] = {dst.cols, 1, 1};
size_t localThreads[3] = {16, 16, 1};
size_t localThreads[3] = {256, 1, 1};
openCLExecuteKernel(clCxt, &imgproc_columnsum, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());

@ -1235,7 +1235,7 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel
openCLExecuteKernel(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option);
}
Ptr<BaseRowFilter_GPU> cv::ocl::getLinearRowFilter_GPU(int srcType, int bufType, const Mat &rowKernel, int anchor, int bordertype)
Ptr<BaseRowFilter_GPU> cv::ocl::getLinearRowFilter_GPU(int srcType, int /*bufType*/, const Mat &rowKernel, int anchor, int bordertype)
{
static const gpuFilter1D_t gpuFilter1D_callers[6] =
{
@ -1391,7 +1391,7 @@ void linearColumnFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_ker
openCLExecuteKernel(clCxt, &filter_sep_col, kernelName, globalThreads, localThreads, args, -1, -1, compile_option);
}
Ptr<BaseColumnFilter_GPU> cv::ocl::getLinearColumnFilter_GPU(int bufType, int dstType, const Mat &columnKernel, int anchor, int bordertype, double delta)
Ptr<BaseColumnFilter_GPU> cv::ocl::getLinearColumnFilter_GPU(int /*bufType*/, int dstType, const Mat &columnKernel, int anchor, int bordertype, double /*delta*/)
{
static const gpuFilter1D_t gpuFilter1D_callers[6] =
{

@ -162,7 +162,7 @@ typedef _ALIGNED_ON(128) struct GpuHidHaarFeature
_ALIGNED_ON(4) int p3 ;
_ALIGNED_ON(4) float weight ;
}
_ALIGNED_ON(32) rect[CV_HAAR_FEATURE_MAX] ;
/*_ALIGNED_ON(32)*/ rect[CV_HAAR_FEATURE_MAX] ;
}
GpuHidHaarFeature;
@ -867,8 +867,8 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
std::vector<cv::Rect> rectList;
std::vector<int> rweights;
double factor;
int datasize;
int totalclassifier;
int datasize=0;
int totalclassifier=0;
void *out;
GpuHidHaarClassifierCascade *gcascade;

@ -644,7 +644,7 @@ namespace cv
size_t kernelWorkGroupSize;
openCLSafeCall(clGetKernelWorkGroupInfo(kernel, clCxt->impl->devices[0],
CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0));
CV_DbgAssert( (localThreads[0] <= clCxt->impl->maxWorkItemSizes[0]) &&
CV_Assert( (localThreads[0] <= clCxt->impl->maxWorkItemSizes[0]) &&
(localThreads[1] <= clCxt->impl->maxWorkItemSizes[1]) &&
(localThreads[2] <= clCxt->impl->maxWorkItemSizes[2]) &&
((localThreads[0] * localThreads[1] * localThreads[2]) <= kernelWorkGroupSize) &&

@ -218,7 +218,7 @@ void interpolate::vectorWarp(const oclMat &src, const oclMat &u, const oclMat &v
normalizeKernel(buffer, src.rows, b_offset, d_offset);
}
void interpolate::blendFrames(const oclMat &frame0, const oclMat &frame1, const oclMat &buffer, float pos, oclMat &newFrame, cl_mem &tex_src0, cl_mem &tex_src1)
void interpolate::blendFrames(const oclMat &frame0, const oclMat &/*frame1*/, const oclMat &buffer, float pos, oclMat &newFrame, cl_mem &tex_src0, cl_mem &tex_src1)
{
int step = buffer.step / sizeof(float);

@ -150,7 +150,7 @@ namespace
// Implementation
//
DjSets DjSets::operator = (const DjSets &obj)
DjSets DjSets::operator = (const DjSets &/*obj*/)
{
//cout << "Invalid DjSets constructor\n";
CV_Error(-1, "Invalid DjSets constructor\n");

@ -47,7 +47,7 @@
#define __OPENCV_PRECOMP_H__
#if _MSC_VER >= 1200
#pragma warning( disable: 4244 4251 4710 4711 4514 4996 )
#pragma warning( disable: 4267 4324 4244 4251 4710 4711 4514 4996 )
#endif
#ifdef HAVE_CVCONFIG_H

@ -581,26 +581,26 @@ void pyrDown_cus(const oclMat &src, oclMat &dst)
//
//void callF(const oclMat& src, oclMat& dst, MultiplyScalar op, int mask)
//{
// Mat srcTemp;
// Mat dstTemp;
// src.download(srcTemp);
// dst.download(dstTemp);
// Mat srcTemp;
// Mat dstTemp;
// src.download(srcTemp);
// dst.download(dstTemp);
//
// int i;
// int j;
// int k;
// for(i = 0; i < srcTemp.rows; i++)
// {
// for(j = 0; j < srcTemp.cols; j++)
// {
// for(k = 0; k < srcTemp.channels(); k++)
// {
// ((float*)dstTemp.data)[srcTemp.channels() * (i * srcTemp.rows + j) + k] = (float)op(((float*)srcTemp.data)[srcTemp.channels() * (i * srcTemp.rows + j) + k]);
// }
// }
// }
// int i;
// int j;
// int k;
// for(i = 0; i < srcTemp.rows; i++)
// {
// for(j = 0; j < srcTemp.cols; j++)
// {
// for(k = 0; k < srcTemp.channels(); k++)
// {
// ((float*)dstTemp.data)[srcTemp.channels() * (i * srcTemp.rows + j) + k] = (float)op(((float*)srcTemp.data)[srcTemp.channels() * (i * srcTemp.rows + j) + k]);
// }
// }
// }
//
// dst = dstTemp;
// dst = dstTemp;
//}
//
//static inline bool isAligned(const unsigned char* ptr, size_t size)
@ -622,54 +622,54 @@ void pyrDown_cus(const oclMat &src, oclMat &dst)
// return;
// }
//
// Mat srcTemp;
// Mat dstTemp;
// src.download(srcTemp);
// dst.download(dstTemp);
// Mat srcTemp;
// Mat dstTemp;
// src.download(srcTemp);
// dst.download(dstTemp);
//
// int x_shifted;
// int x_shifted;
//
// int i;
// int j;
// for(i = 0; i < srcTemp.rows; i++)
// {
// const double* srcRow = (const double*)srcTemp.data + i * srcTemp.rows;
// int i;
// int j;
// for(i = 0; i < srcTemp.rows; i++)
// {
// const double* srcRow = (const double*)srcTemp.data + i * srcTemp.rows;
// double* dstRow = (double*)dstTemp.data + i * dstTemp.rows;;
//
// for(j = 0; j < srcTemp.cols; j++)
// {
// x_shifted = j * 4;
// for(j = 0; j < srcTemp.cols; j++)
// {
// x_shifted = j * 4;
//
// if(x_shifted + 4 - 1 < srcTemp.cols)
// {
// dstRow[x_shifted ] = op(srcRow[x_shifted ]);
// dstRow[x_shifted + 1] = op(srcRow[x_shifted + 1]);
// dstRow[x_shifted + 2] = op(srcRow[x_shifted + 2]);
// dstRow[x_shifted + 3] = op(srcRow[x_shifted + 3]);
// }
// else
// {
// for (int real_x = x_shifted; real_x < srcTemp.cols; ++real_x)
// {
// ((float*)dstTemp.data)[i * srcTemp.rows + real_x] = op(((float*)srcTemp.data)[i * srcTemp.rows + real_x]);
// }
// }
// }
// }
// if(x_shifted + 4 - 1 < srcTemp.cols)
// {
// dstRow[x_shifted ] = op(srcRow[x_shifted ]);
// dstRow[x_shifted + 1] = op(srcRow[x_shifted + 1]);
// dstRow[x_shifted + 2] = op(srcRow[x_shifted + 2]);
// dstRow[x_shifted + 3] = op(srcRow[x_shifted + 3]);
// }
// else
// {
// for (int real_x = x_shifted; real_x < srcTemp.cols; ++real_x)
// {
// ((float*)dstTemp.data)[i * srcTemp.rows + real_x] = op(((float*)srcTemp.data)[i * srcTemp.rows + real_x]);
// }
// }
// }
// }
//}
//
//void multiply(const oclMat& src1, double val, oclMat& dst, double scale = 1.0f);
//void multiply(const oclMat& src1, double val, oclMat& dst, double scale)
//{
// MultiplyScalar op(val, scale);
// //if(src1.channels() == 1 && dst.channels() == 1)
// //{
// // callT(src1, dst, op, 0);
// //}
// //else
// //{
// callF(src1, dst, op, 0);
// //}
// //if(src1.channels() == 1 && dst.channels() == 1)
// //{
// // callT(src1, dst, op, 0);
// //}
// //else
// //{
// callF(src1, dst, op, 0);
// //}
//}
cl_mem bindTexture(const oclMat &mat, int depth, int channels)
@ -792,6 +792,12 @@ void lkSparse_run(oclMat &I, oclMat &J,
void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat *err)
{
if (prevImg.clCxt->impl->devName.find("Intel(R) HD Graphics") != string::npos)
{
cout << " Intel HD GPU device unsupported " << endl;
return;
}
if (prevPts.empty())
{
nextPts.release();

@ -197,11 +197,11 @@ public:
throw std::exception();
//!FIXME
// temp fix for missing min overload
oclMat temp(mask.size(), mask.type());
temp.setTo(Scalar::all(1.0));
//cv::ocl::min(mask, temp, surf_.mask1); ///////// disable this
integral(surf_.mask1, surf_.maskSum);
bindImgTex(surf_.maskSum, maskSumTex);
//oclMat temp(mask.size(), mask.type());
//temp.setTo(Scalar::all(1.0));
////cv::ocl::min(mask, temp, surf_.mask1); ///////// disable this
//integral(surf_.mask1, surf_.maskSum);
//bindImgTex(surf_.maskSum, maskSumTex);
}
}
@ -415,6 +415,11 @@ void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, oclMat
{
if (!img.empty())
{
if (img.clCxt->impl->devName.find("Intel(R) HD Graphics") != string::npos)
{
cout << " Intel HD GPU device unsupported " << endl;
return;
}
SURF_OCL_Invoker surf(*this, img, mask);
surf.detectKeypoints(keypoints);
@ -426,6 +431,11 @@ void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, oclMat
{
if (!img.empty())
{
if (img.clCxt->impl->devName.find("Intel(R) HD Graphics") != string::npos)
{
cout << " Intel HD GPU device unsupported " << endl;
return;
}
SURF_OCL_Invoker surf(*this, img, mask);
if (!useProvidedKeypoints)

@ -55,13 +55,13 @@
#include "cvconfig.h"
#include "opencv2/core/core.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/calib3d/calib3d.hpp"
//#include "opencv2/calib3d/calib3d.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/video/video.hpp"
#include "opencv2/ts/ts.hpp"
#include "opencv2/ts/ts_perf.hpp"
#include "opencv2/ocl/ocl.hpp"
#include "opencv2/nonfree/nonfree.hpp"
//#include "opencv2/nonfree/nonfree.hpp"
#include "utility.hpp"
#include "interpolation.hpp"

@ -199,8 +199,8 @@ TEST_P(HOG, Detect)
int threshold = 10;
int val = 32;
d_comp[0] = d_found.size();
comp[0] = found.size();
d_comp[0] = (int)d_found.size();
comp[0] = (int)found.size();
if (winSize == cv::Size(48, 96))
{
for(int i = 0; i < (int)d_found.size(); i++)

@ -213,8 +213,8 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size
dptr[3] = (uchar)c3;
COOR coor;
coor.x = x0;
coor.y = y0;
coor.x = (short)x0;
coor.y = (short)y0;
return coor;
}

Loading…
Cancel
Save