added cv::dft T-API test

pull/2018/head
Ilya Lavrenov 11 years ago
parent bd91b39593
commit c8cbfe5382
  1. 1
      modules/core/include/opencv2/core/ocl.hpp
  2. 4
      modules/core/include/opencv2/core/opencl/runtime/autogenerated/opencl_clamdfft.hpp
  3. 255
      modules/core/src/dxt.cpp
  4. 78
      modules/core/src/ocl.cpp
  5. 12
      modules/core/src/opencl/runtime/autogenerated/opencl_clamdfft_impl.hpp
  6. 6
      modules/core/src/opencl/runtime/generator/filter/opencl_clamdfft_functions.list
  7. 113
      modules/core/test/ocl/test_dft.cpp
  8. 64
      modules/ocl/src/fft.cpp
  9. 1
      modules/ocl/src/precomp.hpp
  10. 14
      modules/ocl/test/test_fft.cpp

@ -49,6 +49,7 @@ namespace cv { namespace ocl {
CV_EXPORTS bool haveOpenCL();
CV_EXPORTS bool useOpenCL();
CV_EXPORTS bool haveAmdBlas();
CV_EXPORTS bool haveAmdFft();
CV_EXPORTS void setUseOpenCL(bool flag);
CV_EXPORTS void finish2();

@ -95,7 +95,7 @@
#undef clAmdFftSetPlanOutStride
#define clAmdFftSetPlanOutStride clAmdFftSetPlanOutStride_pfn
#undef clAmdFftSetPlanPrecision
//#define clAmdFftSetPlanPrecision clAmdFftSetPlanPrecision_pfn
#define clAmdFftSetPlanPrecision clAmdFftSetPlanPrecision_pfn
#undef clAmdFftSetPlanScale
#define clAmdFftSetPlanScale clAmdFftSetPlanScale_pfn
#undef clAmdFftSetPlanTransposeResult
@ -134,7 +134,7 @@ extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanDistance)(clAmdFftPlanH
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanInStride)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides);
//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanLength)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, const size_t* clLengths);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanOutStride)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides);
//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanPrecision)(clAmdFftPlanHandle plHandle, clAmdFftPrecision precision);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanPrecision)(clAmdFftPlanHandle plHandle, clAmdFftPrecision precision);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanScale)(clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float scale);
//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanTransposeResult)(clAmdFftPlanHandle plHandle, clAmdFftResultTransposed transposed);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetResultLocation)(clAmdFftPlanHandle plHandle, clAmdFftResultLocation placeness);

@ -40,6 +40,8 @@
//M*/
#include "precomp.hpp"
#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
#include "opencv2/core/opencl/runtime/opencl_core.hpp"
namespace cv
{
@ -1473,8 +1475,261 @@ typedef IppStatus (CV_STDCALL* IppDFTGetSizeFunc)(int, int, IppHintAlgorithm, in
typedef IppStatus (CV_STDCALL* IppDFTInitFunc)(int, int, IppHintAlgorithm, void*, uchar*);
#endif
#ifdef HAVE_CLAMDFFT
namespace cv {
#define CLAMDDFT_Assert(func) \
{ \
clAmdFftStatus s = (func); \
CV_Assert(s == CLFFT_SUCCESS); \
}
enum FftType
{
R2R = 0, // real to real
C2R = 1, // opencl HERMITIAN_INTERLEAVED to real
R2C = 2, // real to opencl HERMITIAN_INTERLEAVED
C2C = 3 // complex to complex
};
class PlanCache
{
struct FftPlan
{
FftPlan(const Size & _dft_size, int _src_step, int _dst_step, bool _doubleFP, bool _inplace, int _flags, FftType _fftType) :
dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step),
doubleFP(_doubleFP), inplace(_inplace), flags(_flags), fftType(_fftType), plHandle(0)
{
bool dft_inverse = (flags & DFT_INVERSE) != 0;
bool dft_scale = (flags & DFT_SCALE) != 0;
bool dft_rows = (flags & DFT_ROWS) != 0;
clAmdFftLayout inLayout = CLFFT_REAL, outLayout = CLFFT_REAL;
clAmdFftDim dim = dft_size.height == 1 || dft_rows ? CLFFT_1D : CLFFT_2D;
size_t batchSize = dft_rows ? dft_size.height : 1;
size_t clLengthsIn[3] = { dft_size.width, dft_rows ? 1 : dft_size.height, 1 };
size_t clStridesIn[3] = { 1, 1, 1 };
size_t clStridesOut[3] = { 1, 1, 1 };
int elemSize = doubleFP ? sizeof(double) : sizeof(float);
switch (fftType)
{
case C2C:
inLayout = CLFFT_COMPLEX_INTERLEAVED;
outLayout = CLFFT_COMPLEX_INTERLEAVED;
clStridesIn[1] = src_step / (elemSize << 1);
clStridesOut[1] = dst_step / (elemSize << 1);
break;
case R2C:
inLayout = CLFFT_REAL;
outLayout = CLFFT_HERMITIAN_INTERLEAVED;
clStridesIn[1] = src_step / elemSize;
clStridesOut[1] = dst_step / (elemSize << 1);
break;
case C2R:
inLayout = CLFFT_HERMITIAN_INTERLEAVED;
outLayout = CLFFT_REAL;
clStridesIn[1] = src_step / (elemSize << 1);
clStridesOut[1] = dst_step / elemSize;
break;
case R2R:
default:
CV_Error(Error::StsNotImplemented, "AMD Fft does not support this type");
break;
}
clStridesIn[2] = dft_rows ? clStridesIn[1] : dft_size.width * clStridesIn[1];
clStridesOut[2] = dft_rows ? clStridesOut[1] : dft_size.width * clStridesOut[1];
// TODO remove all plans if context changed
CLAMDDFT_Assert(clAmdFftCreateDefaultPlan(&plHandle, (cl_context)ocl::Context2::getDefault().ptr(), dim, clLengthsIn))
// setting plan properties
CLAMDDFT_Assert(clAmdFftSetPlanPrecision(plHandle, doubleFP ? CLFFT_DOUBLE : CLFFT_SINGLE));
CLAMDDFT_Assert(clAmdFftSetResultLocation(plHandle, inplace ? CLFFT_INPLACE : CLFFT_OUTOFPLACE))
CLAMDDFT_Assert(clAmdFftSetLayout(plHandle, inLayout, outLayout))
CLAMDDFT_Assert(clAmdFftSetPlanBatchSize(plHandle, batchSize))
CLAMDDFT_Assert(clAmdFftSetPlanInStride(plHandle, dim, clStridesIn))
CLAMDDFT_Assert(clAmdFftSetPlanOutStride(plHandle, dim, clStridesOut))
CLAMDDFT_Assert(clAmdFftSetPlanDistance(plHandle, clStridesIn[dim], clStridesOut[dim]))
float scale = dft_scale ? 1.0f / (dft_rows ? dft_size.width : dft_size.area()) : 1.0f;
CLAMDDFT_Assert(clAmdFftSetPlanScale(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale))
// ready to bake
cl_command_queue commandQueue = (cl_command_queue)ocl::Queue::getDefault().ptr();
CLAMDDFT_Assert(clAmdFftBakePlan(plHandle, 1, &commandQueue, NULL, NULL))
}
~FftPlan()
{
// clAmdFftDestroyPlan(&plHandle);
}
friend class PlanCache;
private:
Size dft_size;
int src_step, dst_step;
bool doubleFP;
bool inplace;
int flags;
FftType fftType;
cl_context context;
clAmdFftPlanHandle plHandle;
};
public:
static PlanCache & getInstance()
{
static PlanCache planCache;
return planCache;
}
clAmdFftPlanHandle getPlanHandle(const Size & dft_size, int src_step, int dst_step, bool doubleFP,
bool inplace, int flags, FftType fftType)
{
cl_context currentContext = (cl_context)ocl::Context2::getDefault().ptr();
for (size_t i = 0, size = planStorage.size(); i < size; i ++)
{
const FftPlan * const plan = planStorage[i];
if (plan->dft_size == dft_size &&
plan->flags == flags &&
plan->src_step == src_step &&
plan->dst_step == dst_step &&
plan->doubleFP == doubleFP &&
plan->fftType == fftType &&
plan->inplace == inplace)
{
if (plan->context != currentContext)
{
planStorage.erase(planStorage.begin() + i);
break;
}
return plan->plHandle;
}
}
// no baked plan is found, so let's create a new one
FftPlan * newPlan = new FftPlan(dft_size, src_step, dst_step, doubleFP, inplace, flags, fftType);
planStorage.push_back(newPlan);
return newPlan->plHandle;
}
~PlanCache()
{
for (std::vector<FftPlan *>::iterator i = planStorage.begin(), end = planStorage.end(); i != end; ++i)
delete (*i);
planStorage.clear();
}
protected:
PlanCache() :
planStorage()
{
}
std::vector<FftPlan *> planStorage;
};
extern "C" {
static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
{
UMatData * u = (UMatData *)p;
if( u && CV_XADD(&u->urefcount, -1) == 1 )
u->currAllocator->deallocate(u);
u = 0;
clReleaseEvent(e), e = 0;
}
}
static bool ocl_dft(InputArray _src, OutputArray _dst, int flags)
{
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
Size ssize = _src.size();
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if ( (!doubleSupport && depth == CV_64F) ||
!(type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2) ||
_src.offset() != 0)
return false;
// if is not a multiplication of prime numbers { 2, 3, 5 }
if (ssize.area() != getOptimalDFTSize(ssize.area()))
return false;
int dst_complex_input = cn == 2 ? 1 : 0;
bool dft_inverse = (flags & DFT_INVERSE) != 0 ? 1 : 0;
int dft_complex_output = (flags & DFT_COMPLEX_OUTPUT) != 0;
bool dft_real_output = (flags & DFT_REAL_OUTPUT) != 0;
CV_Assert(dft_complex_output + dft_real_output < 2);
FftType fftType = (FftType)(dst_complex_input << 0 | dft_complex_output << 1);
switch (fftType)
{
case C2C:
_dst.create(ssize.height, ssize.width, CV_MAKE_TYPE(depth, 2));
break;
case R2C: // TODO implement it if possible
case C2R: // TODO implement it if possible
case R2R: // AMD Fft does not support this type
default:
return false;
}
UMat src = _src.getUMat(), dst = _dst.getUMat();
bool inplace = src.u == dst.u;
clAmdFftPlanHandle plHandle = PlanCache::getInstance().
getPlanHandle(ssize, (int)src.step, (int)dst.step,
depth == CV_64F, inplace, flags, fftType);
// get the bufferSize
size_t bufferSize = 0;
CLAMDDFT_Assert(clAmdFftGetTmpBufSize(plHandle, &bufferSize))
UMat tmpBuffer(1, (int)bufferSize, CV_8UC1);
cl_mem srcarg = (cl_mem)src.handle(ACCESS_READ);
cl_mem dstarg = (cl_mem)dst.handle(ACCESS_RW);
cl_command_queue commandQueue = (cl_command_queue)ocl::Queue::getDefault().ptr();
cl_event e = 0;
CLAMDDFT_Assert(clAmdFftEnqueueTransform(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
1, &commandQueue, 0, NULL, &e,
&srcarg, &dstarg, (cl_mem)tmpBuffer.handle(ACCESS_RW)))
tmpBuffer.addref();
clSetEventCallback(e, CL_COMPLETE, oclCleanupCallback, tmpBuffer.u);
return true;
}
#undef DFT_ASSERT
}
#endif // HAVE_CLAMDFFT
void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
{
#ifdef HAVE_CLAMDFFT
if (ocl::useOpenCL() && ocl::haveAmdFft() && _dst.isUMat() && _src0.dims() <= 2
&& nonzero_rows == 0 && ocl_dft(_src0, _dst, flags))
return;
#endif
static DFTFunc dft_tbl[6] =
{
(DFTFunc)DFT_32f,

@ -43,6 +43,7 @@
#include <map>
#include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
#ifdef HAVE_OPENCL
#include "opencv2/core/opencl/runtime/opencl_core.hpp"
@ -1423,6 +1424,83 @@ bool haveAmdBlas()
#endif
#ifdef HAVE_CLAMDFFT
class AmdFftHelper
{
public:
static AmdFftHelper & getInstance()
{
static AmdFftHelper amdFft;
return amdFft;
}
bool isAvailable() const
{
return g_isAmdFftAvailable;
}
~AmdFftHelper()
{
try
{
// clAmdFftTeardown();
}
catch (...) { }
}
protected:
AmdFftHelper()
{
if (!g_isAmdFftInitialized)
{
AutoLock lock(m);
if (!g_isAmdFftInitialized && haveOpenCL())
{
try
{
CV_Assert(clAmdFftInitSetupData(&setupData) == CLFFT_SUCCESS);
g_isAmdFftAvailable = true;
}
catch (const Exception &)
{
g_isAmdFftAvailable = false;
}
}
else
g_isAmdFftAvailable = false;
g_isAmdFftInitialized = true;
}
}
private:
static clAmdFftSetupData setupData;
static Mutex m;
static bool g_isAmdFftInitialized;
static bool g_isAmdFftAvailable;
};
clAmdFftSetupData AmdFftHelper::setupData;
bool AmdFftHelper::g_isAmdFftAvailable = false;
bool AmdFftHelper::g_isAmdFftInitialized = false;
Mutex AmdFftHelper::m;
bool haveAmdFft()
{
return AmdFftHelper::getInstance().isAvailable();
}
#else
bool haveAmdFft()
{
return false;
}
#endif
void finish2()
{
Queue::getDefault().finish();

@ -33,7 +33,7 @@ enum OPENCLAMDFFT_FN_ID {
OPENCLAMDFFT_FN_clAmdFftSetPlanInStride = 23,
// OPENCLAMDFFT_FN_clAmdFftSetPlanLength = 24,
OPENCLAMDFFT_FN_clAmdFftSetPlanOutStride = 25,
// OPENCLAMDFFT_FN_clAmdFftSetPlanPrecision = 26,
OPENCLAMDFFT_FN_clAmdFftSetPlanPrecision = 26,
OPENCLAMDFFT_FN_clAmdFftSetPlanScale = 27,
// OPENCLAMDFFT_FN_clAmdFftSetPlanTransposeResult = 28,
OPENCLAMDFFT_FN_clAmdFftSetResultLocation = 29,
@ -334,9 +334,9 @@ clAmdFftStatus (*clAmdFftSetPlanOutStride)(clAmdFftPlanHandle, const clAmdFftDim
openclamdfft_fn3<OPENCLAMDFFT_FN_clAmdFftSetPlanOutStride, clAmdFftStatus, clAmdFftPlanHandle, const clAmdFftDim, size_t*>::switch_fn;
static const struct DynamicFnEntry clAmdFftSetPlanOutStride_definition = { "clAmdFftSetPlanOutStride", (void**)&clAmdFftSetPlanOutStride};
//clAmdFftStatus (*clAmdFftSetPlanPrecision)(clAmdFftPlanHandle, clAmdFftPrecision) =
// openclamdfft_fn2<OPENCLAMDFFT_FN_clAmdFftSetPlanPrecision, clAmdFftStatus, clAmdFftPlanHandle, clAmdFftPrecision>::switch_fn;
//static const struct DynamicFnEntry clAmdFftSetPlanPrecision_definition = { "clAmdFftSetPlanPrecision", (void**)&clAmdFftSetPlanPrecision};
clAmdFftStatus (*clAmdFftSetPlanPrecision)(clAmdFftPlanHandle, clAmdFftPrecision) =
openclamdfft_fn2<OPENCLAMDFFT_FN_clAmdFftSetPlanPrecision, clAmdFftStatus, clAmdFftPlanHandle, clAmdFftPrecision>::switch_fn;
static const struct DynamicFnEntry clAmdFftSetPlanPrecision_definition = { "clAmdFftSetPlanPrecision", (void**)&clAmdFftSetPlanPrecision};
clAmdFftStatus (*clAmdFftSetPlanScale)(clAmdFftPlanHandle, clAmdFftDirection, cl_float) =
openclamdfft_fn3<OPENCLAMDFFT_FN_clAmdFftSetPlanScale, clAmdFftStatus, clAmdFftPlanHandle, clAmdFftDirection, cl_float>::switch_fn;
@ -387,7 +387,7 @@ static const struct DynamicFnEntry* openclamdfft_fn[] = {
&clAmdFftSetPlanInStride_definition,
NULL/*&clAmdFftSetPlanLength_definition*/,
&clAmdFftSetPlanOutStride_definition,
NULL/*&clAmdFftSetPlanPrecision_definition*/,
&clAmdFftSetPlanPrecision_definition,
&clAmdFftSetPlanScale_definition,
NULL/*&clAmdFftSetPlanTransposeResult_definition*/,
&clAmdFftSetResultLocation_definition,
@ -396,4 +396,4 @@ static const struct DynamicFnEntry* openclamdfft_fn[] = {
ADDITIONAL_FN_DEFINITIONS // macro for custom functions
};
// number of enabled functions: 14
// number of enabled functions: 15

@ -5,7 +5,7 @@ clAmdFftDestroyPlan
clAmdFftEnqueueTransform
//clAmdFftGetLayout
//clAmdFftGetPlanBatchSize
//clAmdFftGetPlanContext
clAmdFftGetPlanContext
//clAmdFftGetPlanDim
//clAmdFftGetPlanDistance
//clAmdFftGetPlanInStride
@ -22,9 +22,9 @@ clAmdFftSetPlanBatchSize
//clAmdFftSetPlanDim
clAmdFftSetPlanDistance
clAmdFftSetPlanInStride
//clAmdFftSetPlanLength
clAmdFftSetPlanLength
clAmdFftSetPlanOutStride
//clAmdFftSetPlanPrecision
clAmdFftSetPlanPrecision
clAmdFftSetPlanScale
//clAmdFftSetPlanTransposeResult
clAmdFftSetResultLocation

@ -0,0 +1,113 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
#include "opencv2/ts/ocl_test.hpp"
#ifdef HAVE_OPENCL
namespace cvtest {
namespace ocl {
////////////////////////////////////////////////////////////////////////////
// Dft
PARAM_TEST_CASE(Dft, cv::Size, MatDepth, bool, bool, bool, bool)
{
cv::Size dft_size;
int dft_flags, depth;
bool inplace;
TEST_DECLARE_INPUT_PARAMETER(src)
TEST_DECLARE_OUTPUT_PARAMETER(dst)
virtual void SetUp()
{
dft_size = GET_PARAM(0);
depth = GET_PARAM(1);
inplace = GET_PARAM(2);
dft_flags = 0;
if (GET_PARAM(3))
dft_flags |= cv::DFT_ROWS;
if (GET_PARAM(4))
dft_flags |= cv::DFT_SCALE;
if (GET_PARAM(5))
dft_flags |= cv::DFT_INVERSE;
}
void generateTestData(int cn = 2)
{
src = randomMat(dft_size, CV_MAKE_TYPE(depth, cn), 0.0, 100.0);
usrc = src.getUMat(ACCESS_READ);
if (inplace)
dst = src, udst = usrc;
}
};
OCL_TEST_P(Dft, C2C)
{
generateTestData();
OCL_OFF(cv::dft(src, dst, dft_flags | cv::DFT_COMPLEX_OUTPUT));
OCL_ON(cv::dft(usrc, udst, dft_flags | cv::DFT_COMPLEX_OUTPUT));
double eps = src.size().area() * 1e-4;
EXPECT_MAT_NEAR(dst, udst, eps);
}
OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(25, 20),
cv::Size(512, 1), cv::Size(1024, 768)),
Values(CV_32F, CV_64F),
Bool(), // inplace
Bool(), // DFT_ROWS
Bool(), // DFT_SCALE
Bool()) // DFT_INVERSE
);
} } // namespace cvtest::ocl
#endif // HAVE_OPENCL

@ -48,43 +48,53 @@ using namespace cv;
using namespace cv::ocl;
#if !defined HAVE_CLAMDFFT
void cv::ocl::dft(const oclMat&, oclMat&, Size, int)
{
CV_Error(Error::OpenCLNoAMDBlasFft, "OpenCL DFT is not implemented");
}
namespace cv { namespace ocl {
void fft_teardown();
}}
void cv::ocl::fft_teardown(){}
} }
void cv::ocl::fft_teardown() { }
#else
#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
namespace cv
{
namespace ocl
{
void fft_setup();
void fft_teardown();
enum FftType
{
C2R = 1, // complex to complex
R2C = 2, // real to opencl HERMITIAN_INTERLEAVED
C2C = 3 // opencl HERMITIAN_INTERLEAVED to real
};
struct FftPlan
{
protected:
clAmdFftPlanHandle plHandle;
FftPlan& operator=(const FftPlan&);
public:
FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
FftPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type);
~FftPlan();
inline clAmdFftPlanHandle getPlanHandle() { return plHandle; }
const Size dft_size;
const int src_step, dst_step;
const int depth;
const int flags;
const FftType type;
};
class PlanCache
{
protected:
@ -105,10 +115,11 @@ namespace cv
planCache = new PlanCache();
return planCache;
}
// return a baked plan->
// if there is one matched plan, return it
// if not, bake a new one, put it into the planStore and return it.
static FftPlan* getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
static FftPlan* getPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type);
// remove a single plan from the store
// return true if the plan is successfully removed
@ -117,6 +128,7 @@ namespace cv
};
}
}
PlanCache* PlanCache::planCache = NULL;
void cv::ocl::fft_setup()
@ -128,9 +140,11 @@ void cv::ocl::fft_setup()
}
if (pCache.setupData == NULL)
pCache.setupData = new clAmdFftSetupData;
openCLSafeCall(clAmdFftInitSetupData( pCache.setupData ));
pCache.started = true;
}
void cv::ocl::fft_teardown()
{
PlanCache& pCache = *PlanCache::getPlanCache();
@ -154,8 +168,8 @@ void cv::ocl::fft_teardown()
}
// bake a new plan
cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type)
: plHandle(0), dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), flags(_flags), type(_type)
cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type)
: plHandle(0), dft_size(_dft_size), src_step(_src_step), depth(_depth), dst_step(_dst_step), flags(_flags), type(_type)
{
fft_setup();
@ -184,20 +198,20 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
case C2C:
inLayout = CLFFT_COMPLEX_INTERLEAVED;
outLayout = CLFFT_COMPLEX_INTERLEAVED;
clStridesIn[1] = src_step / (2*sizeof(float));
clStridesOut[1] = clStridesIn[1];
clStridesIn[1] = src_step / (2*CV_ELEM_SIZE(_depth));
clStridesOut[1] = dst_step / (2*CV_ELEM_SIZE(_depth));
break;
case R2C:
inLayout = CLFFT_REAL;
outLayout = CLFFT_HERMITIAN_INTERLEAVED;
clStridesIn[1] = src_step / sizeof(float);
clStridesOut[1] = dst_step / (2*sizeof(float));
clStridesIn[1] = src_step / CV_ELEM_SIZE(_depth);
clStridesOut[1] = dst_step / (2*CV_ELEM_SIZE(_depth));
break;
case C2R:
inLayout = CLFFT_HERMITIAN_INTERLEAVED;
outLayout = CLFFT_REAL;
clStridesIn[1] = src_step / (2*sizeof(float));
clStridesOut[1] = dst_step / sizeof(float);
clStridesIn[1] = src_step / (2*CV_ELEM_SIZE(_depth));
clStridesOut[1] = dst_step / CV_ELEM_SIZE(_depth);
break;
default:
//std::runtime_error("does not support this convertion!");
@ -211,6 +225,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, *(cl_context*)getClContextPtr(), dim, clLengthsIn ) );
openCLSafeCall( clAmdFftSetPlanPrecision( plHandle, depth == CV_64F ? CLFFT_DOUBLE : CLFFT_SINGLE ) );
openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) );
openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) );
openCLSafeCall( clAmdFftSetPlanBatchSize( plHandle, batchSize ) );
@ -225,6 +240,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
//ready to bake
openCLSafeCall( clAmdFftBakePlan( plHandle, 1, (cl_command_queue*)getClCommandQueuePtr(), NULL, NULL ) );
}
cv::ocl::FftPlan::~FftPlan()
{
openCLSafeCall( clAmdFftDestroyPlan( &plHandle ) );
@ -242,7 +258,7 @@ cv::ocl::PlanCache::~PlanCache()
fft_teardown();
}
FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type)
FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type)
{
PlanCache& pCache = *PlanCache::getPlanCache();
std::vector<FftPlan *>& pStore = pCache.planStore;
@ -256,6 +272,7 @@ FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_ste
plan->flags == _flags &&
plan->src_step == _src_step &&
plan->dst_step == _dst_step &&
plan->depth == _depth &&
plan->type == _type
)
{
@ -263,7 +280,7 @@ FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_ste
}
}
// no baked plan is found
FftPlan *newPlan = new FftPlan(_dft_size, _src_step, _dst_step, _flags, _type);
FftPlan *newPlan = new FftPlan(_dft_size, _src_step, _dst_step, _depth, _flags, _type);
pStore.push_back(newPlan);
return newPlan;
}
@ -286,6 +303,8 @@ bool cv::ocl::PlanCache::removePlan(clAmdFftPlanHandle plHandle)
void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
{
CV_Assert(cv::ocl::haveAmdFft());
if(dft_size == Size(0, 0))
{
dft_size = src.size();
@ -296,9 +315,6 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
// the two flags are not compatible
CV_Assert( !((flags & DFT_SCALE) && (flags & DFT_ROWS)) );
// similar assertions with cuda module
CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2);
//bool is_1d_input = (src.rows == 1);
//int is_row_dft = flags & DFT_ROWS;
//int is_scaled_dft = flags & DFT_SCALE;
@ -306,6 +322,7 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
bool is_complex_input = src.channels() == 2;
bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
int depth = src.depth();
// We don't support real-to-real transform
CV_Assert(is_complex_input || is_complex_output);
@ -314,14 +331,17 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
switch(type)
{
case C2C:
dst.create(src.rows, src.cols, CV_32FC2);
dst.create(src.rows, src.cols, CV_MAKE_TYPE(depth, 2));
printf("C2C\n");
break;
case R2C:
dst.create(src.rows, src.cols / 2 + 1, CV_32FC2);
dst.create(src.rows, src.cols / 2 + 1, CV_MAKE_TYPE(depth, 2));
printf("R2C\n");
break;
case C2R:
CV_Assert(dft_size.width / 2 + 1 == src.cols && dft_size.height == src.rows);
dst.create(src.rows, dft_size.width, CV_32FC1);
dst.create(src.rows, dft_size.width, CV_MAKE_TYPE(depth, 1));
printf("C2R\n");
break;
default:
//std::runtime_error("does not support this convertion!");
@ -329,7 +349,7 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
throw std::exception();
break;
}
clAmdFftPlanHandle plHandle = PlanCache::getPlan(dft_size, src.step, dst.step, flags, type)->getPlanHandle();
clAmdFftPlanHandle plHandle = PlanCache::getPlan(dft_size, src.step, dst.step, depth, flags, type)->getPlanHandle();
//get the buffersize
size_t buffersize = 0;
@ -356,7 +376,7 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
{
openCLFree(clMedBuffer);
}
//fft_teardown();
fft_teardown();
}
#endif

@ -75,6 +75,7 @@
#include "opencv2/core/utility.hpp"
#include "opencv2/core/private.hpp"
#include "opencv2/core/ocl.hpp"
#define __ATI__

@ -50,32 +50,36 @@ using namespace std;
////////////////////////////////////////////////////////////////////////////
// Dft
PARAM_TEST_CASE(Dft, cv::Size, int)
PARAM_TEST_CASE(Dft, cv::Size, int, bool)
{
cv::Size dft_size;
int dft_flags;
bool doubleFP;
virtual void SetUp()
{
dft_size = GET_PARAM(0);
dft_flags = GET_PARAM(1);
doubleFP = GET_PARAM(2);
}
};
OCL_TEST_P(Dft, C2C)
{
cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 100.0);
cv::Mat a = randomMat(dft_size, doubleFP ? CV_64FC2 : CV_32FC2, 0.0, 100.0);
cv::Mat b_gold;
cv::ocl::oclMat d_b;
cv::dft(a, b_gold, dft_flags);
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), dft_flags);
EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4);
}
OCL_TEST_P(Dft, R2C)
{
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 100.0);
cv::Mat a = randomMat(dft_size, doubleFP ? CV_64FC1 : CV_32FC1, 0.0, 100.0);
cv::Mat b_gold, b_gold_roi;
cv::ocl::oclMat d_b, d_c;
@ -92,7 +96,7 @@ OCL_TEST_P(Dft, R2C)
OCL_TEST_P(Dft, R2CthenC2R)
{
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
cv::Mat a = randomMat(dft_size, doubleFP ? CV_64FC1 : CV_32FC1, 0.0, 10.0);
cv::ocl::oclMat d_b, d_c;
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), 0);
@ -102,7 +106,7 @@ OCL_TEST_P(Dft, R2CthenC2R)
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Dft, testing::Combine(
testing::Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(25, 20), cv::Size(512, 1), cv::Size(1024, 768)),
testing::Values(0, (int)cv::DFT_ROWS, (int)cv::DFT_SCALE) ));
testing::Values(0, (int)cv::DFT_ROWS, (int)cv::DFT_SCALE), testing::Bool()));
////////////////////////////////////////////////////////////////////////////
// MulSpectrums

Loading…
Cancel
Save