|
|
|
@ -1781,251 +1781,11 @@ static bool ippi_DFT_R_32F(const Mat& src, Mat& dst, bool inv, int norm_flag) |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#ifdef HAVE_CLAMDFFT |
|
|
|
|
|
|
|
|
|
namespace cv { |
|
|
|
|
|
|
|
|
|
#define CLAMDDFT_Assert(func) \ |
|
|
|
|
{ \
|
|
|
|
|
clAmdFftStatus s = (func); \
|
|
|
|
|
CV_Assert(s == CLFFT_SUCCESS); \
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
class PlanCache |
|
|
|
|
{ |
|
|
|
|
struct FftPlan |
|
|
|
|
{ |
|
|
|
|
FftPlan(const Size & _dft_size, int _src_step, int _dst_step, bool _doubleFP, bool _inplace, int _flags, FftType _fftType) : |
|
|
|
|
dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), |
|
|
|
|
doubleFP(_doubleFP), inplace(_inplace), flags(_flags), fftType(_fftType), |
|
|
|
|
context((cl_context)ocl::Context::getDefault().ptr()), plHandle(0) |
|
|
|
|
{ |
|
|
|
|
bool dft_inverse = (flags & DFT_INVERSE) != 0; |
|
|
|
|
bool dft_scale = (flags & DFT_SCALE) != 0; |
|
|
|
|
bool dft_rows = (flags & DFT_ROWS) != 0; |
|
|
|
|
|
|
|
|
|
clAmdFftLayout inLayout = CLFFT_REAL, outLayout = CLFFT_REAL; |
|
|
|
|
clAmdFftDim dim = dft_size.height == 1 || dft_rows ? CLFFT_1D : CLFFT_2D; |
|
|
|
|
|
|
|
|
|
size_t batchSize = dft_rows ? dft_size.height : 1; |
|
|
|
|
size_t clLengthsIn[3] = { dft_size.width, dft_rows ? 1 : dft_size.height, 1 }; |
|
|
|
|
size_t clStridesIn[3] = { 1, 1, 1 }; |
|
|
|
|
size_t clStridesOut[3] = { 1, 1, 1 }; |
|
|
|
|
int elemSize = doubleFP ? sizeof(double) : sizeof(float); |
|
|
|
|
|
|
|
|
|
switch (fftType) |
|
|
|
|
{ |
|
|
|
|
case C2C: |
|
|
|
|
inLayout = CLFFT_COMPLEX_INTERLEAVED; |
|
|
|
|
outLayout = CLFFT_COMPLEX_INTERLEAVED; |
|
|
|
|
clStridesIn[1] = src_step / (elemSize << 1); |
|
|
|
|
clStridesOut[1] = dst_step / (elemSize << 1); |
|
|
|
|
break; |
|
|
|
|
case R2C: |
|
|
|
|
inLayout = CLFFT_REAL; |
|
|
|
|
outLayout = CLFFT_HERMITIAN_INTERLEAVED; |
|
|
|
|
clStridesIn[1] = src_step / elemSize; |
|
|
|
|
clStridesOut[1] = dst_step / (elemSize << 1); |
|
|
|
|
break; |
|
|
|
|
case C2R: |
|
|
|
|
inLayout = CLFFT_HERMITIAN_INTERLEAVED; |
|
|
|
|
outLayout = CLFFT_REAL; |
|
|
|
|
clStridesIn[1] = src_step / (elemSize << 1); |
|
|
|
|
clStridesOut[1] = dst_step / elemSize; |
|
|
|
|
break; |
|
|
|
|
case R2R: |
|
|
|
|
default: |
|
|
|
|
CV_Error(Error::StsNotImplemented, "AMD Fft does not support this type"); |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
clStridesIn[2] = dft_rows ? clStridesIn[1] : dft_size.width * clStridesIn[1]; |
|
|
|
|
clStridesOut[2] = dft_rows ? clStridesOut[1] : dft_size.width * clStridesOut[1]; |
|
|
|
|
|
|
|
|
|
CLAMDDFT_Assert(clAmdFftCreateDefaultPlan(&plHandle, (cl_context)ocl::Context::getDefault().ptr(), dim, clLengthsIn)) |
|
|
|
|
|
|
|
|
|
// setting plan properties
|
|
|
|
|
CLAMDDFT_Assert(clAmdFftSetPlanPrecision(plHandle, doubleFP ? CLFFT_DOUBLE : CLFFT_SINGLE)); |
|
|
|
|
CLAMDDFT_Assert(clAmdFftSetResultLocation(plHandle, inplace ? CLFFT_INPLACE : CLFFT_OUTOFPLACE)) |
|
|
|
|
CLAMDDFT_Assert(clAmdFftSetLayout(plHandle, inLayout, outLayout)) |
|
|
|
|
CLAMDDFT_Assert(clAmdFftSetPlanBatchSize(plHandle, batchSize)) |
|
|
|
|
CLAMDDFT_Assert(clAmdFftSetPlanInStride(plHandle, dim, clStridesIn)) |
|
|
|
|
CLAMDDFT_Assert(clAmdFftSetPlanOutStride(plHandle, dim, clStridesOut)) |
|
|
|
|
CLAMDDFT_Assert(clAmdFftSetPlanDistance(plHandle, clStridesIn[dim], clStridesOut[dim])) |
|
|
|
|
|
|
|
|
|
float scale = dft_scale ? 1.0f / (dft_rows ? dft_size.width : dft_size.area()) : 1.0f; |
|
|
|
|
CLAMDDFT_Assert(clAmdFftSetPlanScale(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale)) |
|
|
|
|
|
|
|
|
|
// ready to bake
|
|
|
|
|
cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr(); |
|
|
|
|
CLAMDDFT_Assert(clAmdFftBakePlan(plHandle, 1, &queue, NULL, NULL)) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
~FftPlan() |
|
|
|
|
{ |
|
|
|
|
// clAmdFftDestroyPlan(&plHandle);
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
friend class PlanCache; |
|
|
|
|
|
|
|
|
|
private: |
|
|
|
|
Size dft_size; |
|
|
|
|
int src_step, dst_step; |
|
|
|
|
bool doubleFP; |
|
|
|
|
bool inplace; |
|
|
|
|
int flags; |
|
|
|
|
FftType fftType; |
|
|
|
|
|
|
|
|
|
cl_context context; |
|
|
|
|
clAmdFftPlanHandle plHandle; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
public: |
|
|
|
|
static PlanCache & getInstance() |
|
|
|
|
{ |
|
|
|
|
static PlanCache planCache; |
|
|
|
|
return planCache; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
clAmdFftPlanHandle getPlanHandle(const Size & dft_size, int src_step, int dst_step, bool doubleFP, |
|
|
|
|
bool inplace, int flags, FftType fftType) |
|
|
|
|
{ |
|
|
|
|
cl_context currentContext = (cl_context)ocl::Context::getDefault().ptr(); |
|
|
|
|
|
|
|
|
|
for (size_t i = 0, size = planStorage.size(); i < size; ++i) |
|
|
|
|
{ |
|
|
|
|
const FftPlan * const plan = planStorage[i]; |
|
|
|
|
|
|
|
|
|
if (plan->dft_size == dft_size && |
|
|
|
|
plan->flags == flags && |
|
|
|
|
plan->src_step == src_step && |
|
|
|
|
plan->dst_step == dst_step && |
|
|
|
|
plan->doubleFP == doubleFP && |
|
|
|
|
plan->fftType == fftType && |
|
|
|
|
plan->inplace == inplace) |
|
|
|
|
{ |
|
|
|
|
if (plan->context != currentContext) |
|
|
|
|
{ |
|
|
|
|
planStorage.erase(planStorage.begin() + i); |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return plan->plHandle; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// no baked plan is found, so let's create a new one
|
|
|
|
|
FftPlan * newPlan = new FftPlan(dft_size, src_step, dst_step, doubleFP, inplace, flags, fftType); |
|
|
|
|
planStorage.push_back(newPlan); |
|
|
|
|
|
|
|
|
|
return newPlan->plHandle; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
~PlanCache() |
|
|
|
|
{ |
|
|
|
|
for (std::vector<FftPlan *>::iterator i = planStorage.begin(), end = planStorage.end(); i != end; ++i) |
|
|
|
|
delete (*i); |
|
|
|
|
planStorage.clear(); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
protected: |
|
|
|
|
PlanCache() : |
|
|
|
|
planStorage() |
|
|
|
|
{ |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
std::vector<FftPlan *> planStorage; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
extern "C" { |
|
|
|
|
|
|
|
|
|
static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p) |
|
|
|
|
{ |
|
|
|
|
UMatData * u = (UMatData *)p; |
|
|
|
|
|
|
|
|
|
if( u && CV_XADD(&u->urefcount, -1) == 1 ) |
|
|
|
|
u->currAllocator->deallocate(u); |
|
|
|
|
u = 0; |
|
|
|
|
|
|
|
|
|
clReleaseEvent(e), e = 0; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static bool ocl_dft_amdfft(InputArray _src, OutputArray _dst, int flags) |
|
|
|
|
{ |
|
|
|
|
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); |
|
|
|
|
Size ssize = _src.size(); |
|
|
|
|
|
|
|
|
|
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; |
|
|
|
|
if ( (!doubleSupport && depth == CV_64F) || |
|
|
|
|
!(type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2) || |
|
|
|
|
_src.offset() != 0) |
|
|
|
|
return false; |
|
|
|
|
|
|
|
|
|
// if is not a multiplication of prime numbers { 2, 3, 5 }
|
|
|
|
|
if (ssize.area() != getOptimalDFTSize(ssize.area())) |
|
|
|
|
return false; |
|
|
|
|
|
|
|
|
|
int dst_complex_input = cn == 2 ? 1 : 0; |
|
|
|
|
bool dft_inverse = (flags & DFT_INVERSE) != 0 ? 1 : 0; |
|
|
|
|
int dft_complex_output = (flags & DFT_COMPLEX_OUTPUT) != 0; |
|
|
|
|
bool dft_real_output = (flags & DFT_REAL_OUTPUT) != 0; |
|
|
|
|
|
|
|
|
|
CV_Assert(dft_complex_output + dft_real_output < 2); |
|
|
|
|
FftType fftType = (FftType)(dst_complex_input << 0 | dft_complex_output << 1); |
|
|
|
|
|
|
|
|
|
switch (fftType) |
|
|
|
|
{ |
|
|
|
|
case C2C: |
|
|
|
|
_dst.create(ssize.height, ssize.width, CV_MAKE_TYPE(depth, 2)); |
|
|
|
|
break; |
|
|
|
|
case R2C: // TODO implement it if possible
|
|
|
|
|
case C2R: // TODO implement it if possible
|
|
|
|
|
case R2R: // AMD Fft does not support this type
|
|
|
|
|
default: |
|
|
|
|
return false; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
UMat src = _src.getUMat(), dst = _dst.getUMat(); |
|
|
|
|
bool inplace = src.u == dst.u; |
|
|
|
|
|
|
|
|
|
clAmdFftPlanHandle plHandle = PlanCache::getInstance(). |
|
|
|
|
getPlanHandle(ssize, (int)src.step, (int)dst.step, |
|
|
|
|
depth == CV_64F, inplace, flags, fftType); |
|
|
|
|
|
|
|
|
|
// get the bufferSize
|
|
|
|
|
size_t bufferSize = 0; |
|
|
|
|
CLAMDDFT_Assert(clAmdFftGetTmpBufSize(plHandle, &bufferSize)) |
|
|
|
|
UMat tmpBuffer(1, (int)bufferSize, CV_8UC1); |
|
|
|
|
|
|
|
|
|
cl_mem srcarg = (cl_mem)src.handle(ACCESS_READ); |
|
|
|
|
cl_mem dstarg = (cl_mem)dst.handle(ACCESS_RW); |
|
|
|
|
|
|
|
|
|
cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr(); |
|
|
|
|
cl_event e = 0; |
|
|
|
|
|
|
|
|
|
CLAMDDFT_Assert(clAmdFftEnqueueTransform(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, |
|
|
|
|
1, &queue, 0, NULL, &e, |
|
|
|
|
&srcarg, &dstarg, (cl_mem)tmpBuffer.handle(ACCESS_RW))) |
|
|
|
|
|
|
|
|
|
tmpBuffer.addref(); |
|
|
|
|
clSetEventCallback(e, CL_COMPLETE, oclCleanupCallback, tmpBuffer.u); |
|
|
|
|
|
|
|
|
|
return true; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#undef DFT_ASSERT |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#endif // HAVE_CLAMDFFT
|
|
|
|
|
#ifdef HAVE_OPENCL |
|
|
|
|
|
|
|
|
|
namespace cv |
|
|
|
|
{ |
|
|
|
|
|
|
|
|
|
#ifdef HAVE_OPENCL |
|
|
|
|
|
|
|
|
|
enum FftType |
|
|
|
|
{ |
|
|
|
|
R2R = 0, |
|
|
|
@ -2038,7 +1798,7 @@ static void ocl_getRadixes(int cols, std::vector<int>& radixes, std::vector<int> |
|
|
|
|
{ |
|
|
|
|
int factors[34]; |
|
|
|
|
int nf = DFTFactorize(cols, factors); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int n = 1; |
|
|
|
|
int factor_index = 0; |
|
|
|
|
min_radix = INT_MAX; |
|
|
|
@ -2118,7 +1878,7 @@ struct OCL_FftPlan |
|
|
|
|
ocl_getRadixes(dft_size, radixes, blocks, min_radix); |
|
|
|
|
thread_count = dft_size / min_radix; |
|
|
|
|
|
|
|
|
|
if (thread_count > ocl::Device::getDefault().maxWorkGroupSize()) |
|
|
|
|
if (thread_count > (int) ocl::Device::getDefault().maxWorkGroupSize()) |
|
|
|
|
{ |
|
|
|
|
status = false; |
|
|
|
|
return; |
|
|
|
@ -2141,13 +1901,13 @@ struct OCL_FftPlan |
|
|
|
|
Mat tw(1, twiddle_size, CV_32FC2); |
|
|
|
|
float* ptr = tw.ptr<float>(); |
|
|
|
|
int ptr_index = 0; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
n = 1; |
|
|
|
|
for (size_t i=0; i<radixes.size(); i++) |
|
|
|
|
{ |
|
|
|
|
int radix = radixes[i]; |
|
|
|
|
n *= radix; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (int j=1; j<radix; j++) |
|
|
|
|
{ |
|
|
|
|
double theta = -CV_TWO_PI*j/n; |
|
|
|
@ -2157,7 +1917,7 @@ struct OCL_FftPlan |
|
|
|
|
ptr[ptr_index++] = (float) cos(k*theta); |
|
|
|
|
ptr[ptr_index++] = (float) sin(k*theta); |
|
|
|
|
} |
|
|
|
|
}
|
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
twiddles = tw.getUMat(ACCESS_READ); |
|
|
|
|
|
|
|
|
@ -2165,7 +1925,7 @@ struct OCL_FftPlan |
|
|
|
|
dft_size, dft_size/thread_count, radix_processing.c_str()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
bool enqueueTransform(InputArray _src, OutputArray _dst, int dft_size, int flags, int fftType, bool rows = true) const |
|
|
|
|
bool enqueueTransform(InputArray _src, OutputArray _dst, int num_dfts, int flags, int fftType, bool rows = true) const |
|
|
|
|
{ |
|
|
|
|
if (!status) |
|
|
|
|
return false; |
|
|
|
@ -2177,7 +1937,7 @@ struct OCL_FftPlan |
|
|
|
|
size_t localsize[2]; |
|
|
|
|
String kernel_name; |
|
|
|
|
|
|
|
|
|
bool is1d = (flags & DFT_ROWS) != 0 || dft_size == 1; |
|
|
|
|
bool is1d = (flags & DFT_ROWS) != 0 || num_dfts == 1; |
|
|
|
|
bool inv = (flags & DFT_INVERSE) != 0; |
|
|
|
|
String options = buildOptions; |
|
|
|
|
|
|
|
|
@ -2191,7 +1951,7 @@ struct OCL_FftPlan |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
globalsize[0] = dft_size; globalsize[1] = thread_count; |
|
|
|
|
globalsize[0] = num_dfts; globalsize[1] = thread_count; |
|
|
|
|
localsize[0] = 1; localsize[1] = thread_count; |
|
|
|
|
kernel_name = !inv ? "fft_multi_radix_cols" : "ifft_multi_radix_cols"; |
|
|
|
|
if (flags & DFT_SCALE) |
|
|
|
@ -2201,7 +1961,7 @@ struct OCL_FftPlan |
|
|
|
|
options += src.channels() == 1 ? " -D REAL_INPUT" : " -D COMPLEX_INPUT"; |
|
|
|
|
options += dst.channels() == 1 ? " -D REAL_OUTPUT" : " -D COMPLEX_OUTPUT"; |
|
|
|
|
options += is1d ? " -D IS_1D" : ""; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (!inv) |
|
|
|
|
{ |
|
|
|
|
if ((is1d && src.channels() == 1) || (rows && (fftType == R2R))) |
|
|
|
@ -2219,7 +1979,7 @@ struct OCL_FftPlan |
|
|
|
|
if (k.empty()) |
|
|
|
|
return false; |
|
|
|
|
|
|
|
|
|
k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrReadOnly(twiddles), thread_count, dft_size); |
|
|
|
|
k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrReadOnly(twiddles), thread_count, num_dfts); |
|
|
|
|
return k.run(2, globalsize, localsize, false); |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
@ -2232,7 +1992,7 @@ public: |
|
|
|
|
static OCL_FftPlanCache planCache; |
|
|
|
|
return planCache; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
OCL_FftPlan* getFftPlan(int dft_size) |
|
|
|
|
{ |
|
|
|
|
for (size_t i = 0, size = planStorage.size(); i < size; ++i) |
|
|
|
@ -2280,11 +2040,9 @@ static bool ocl_dft_C2C_cols(InputArray _src, OutputArray _dst, int nonzero_cols |
|
|
|
|
|
|
|
|
|
static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_rows) |
|
|
|
|
{ |
|
|
|
|
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); |
|
|
|
|
int type = _src.type(), cn = CV_MAT_CN(type); |
|
|
|
|
Size ssize = _src.size(); |
|
|
|
|
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; |
|
|
|
|
if ( (!doubleSupport && depth == CV_64F) || |
|
|
|
|
!(type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2)) |
|
|
|
|
if ( !(type == CV_32FC1 || type == CV_32FC2) ) |
|
|
|
|
return false; |
|
|
|
|
|
|
|
|
|
// if is not a multiplication of prime numbers { 2, 3, 5 }
|
|
|
|
@ -2325,7 +2083,7 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro |
|
|
|
|
if (fftType == C2C || fftType == R2C) |
|
|
|
|
{ |
|
|
|
|
// complex output
|
|
|
|
|
_dst.create(src.size(), CV_32FC2);
|
|
|
|
|
_dst.create(src.size(), CV_32FC2); |
|
|
|
|
output = _dst.getUMat(); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
@ -2381,7 +2139,7 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro |
|
|
|
|
int nonzero_cols = src.cols/2 + 1; |
|
|
|
|
if (!ocl_dft_C2C_cols(src, output, nonzero_cols, flags, fftType)) |
|
|
|
|
return false; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (!ocl_dft_C2C_rows(output, _dst, nonzero_rows, flags, fftType)) |
|
|
|
|
return false; |
|
|
|
|
} |
|
|
|
@ -2390,11 +2148,248 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro |
|
|
|
|
return true; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
} // namespace cv;
|
|
|
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
} // namespace cv;
|
|
|
|
|
#ifdef HAVE_CLAMDFFT |
|
|
|
|
|
|
|
|
|
namespace cv { |
|
|
|
|
|
|
|
|
|
#define CLAMDDFT_Assert(func) \ |
|
|
|
|
{ \
|
|
|
|
|
clAmdFftStatus s = (func); \
|
|
|
|
|
CV_Assert(s == CLFFT_SUCCESS); \
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
class PlanCache |
|
|
|
|
{ |
|
|
|
|
struct FftPlan |
|
|
|
|
{ |
|
|
|
|
FftPlan(const Size & _dft_size, int _src_step, int _dst_step, bool _doubleFP, bool _inplace, int _flags, FftType _fftType) : |
|
|
|
|
dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), |
|
|
|
|
doubleFP(_doubleFP), inplace(_inplace), flags(_flags), fftType(_fftType), |
|
|
|
|
context((cl_context)ocl::Context::getDefault().ptr()), plHandle(0) |
|
|
|
|
{ |
|
|
|
|
bool dft_inverse = (flags & DFT_INVERSE) != 0; |
|
|
|
|
bool dft_scale = (flags & DFT_SCALE) != 0; |
|
|
|
|
bool dft_rows = (flags & DFT_ROWS) != 0; |
|
|
|
|
|
|
|
|
|
clAmdFftLayout inLayout = CLFFT_REAL, outLayout = CLFFT_REAL; |
|
|
|
|
clAmdFftDim dim = dft_size.height == 1 || dft_rows ? CLFFT_1D : CLFFT_2D; |
|
|
|
|
|
|
|
|
|
size_t batchSize = dft_rows ? dft_size.height : 1; |
|
|
|
|
size_t clLengthsIn[3] = { dft_size.width, dft_rows ? 1 : dft_size.height, 1 }; |
|
|
|
|
size_t clStridesIn[3] = { 1, 1, 1 }; |
|
|
|
|
size_t clStridesOut[3] = { 1, 1, 1 }; |
|
|
|
|
int elemSize = doubleFP ? sizeof(double) : sizeof(float); |
|
|
|
|
|
|
|
|
|
switch (fftType) |
|
|
|
|
{ |
|
|
|
|
case C2C: |
|
|
|
|
inLayout = CLFFT_COMPLEX_INTERLEAVED; |
|
|
|
|
outLayout = CLFFT_COMPLEX_INTERLEAVED; |
|
|
|
|
clStridesIn[1] = src_step / (elemSize << 1); |
|
|
|
|
clStridesOut[1] = dst_step / (elemSize << 1); |
|
|
|
|
break; |
|
|
|
|
case R2C: |
|
|
|
|
inLayout = CLFFT_REAL; |
|
|
|
|
outLayout = CLFFT_HERMITIAN_INTERLEAVED; |
|
|
|
|
clStridesIn[1] = src_step / elemSize; |
|
|
|
|
clStridesOut[1] = dst_step / (elemSize << 1); |
|
|
|
|
break; |
|
|
|
|
case C2R: |
|
|
|
|
inLayout = CLFFT_HERMITIAN_INTERLEAVED; |
|
|
|
|
outLayout = CLFFT_REAL; |
|
|
|
|
clStridesIn[1] = src_step / (elemSize << 1); |
|
|
|
|
clStridesOut[1] = dst_step / elemSize; |
|
|
|
|
break; |
|
|
|
|
case R2R: |
|
|
|
|
default: |
|
|
|
|
CV_Error(Error::StsNotImplemented, "AMD Fft does not support this type"); |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
clStridesIn[2] = dft_rows ? clStridesIn[1] : dft_size.width * clStridesIn[1]; |
|
|
|
|
clStridesOut[2] = dft_rows ? clStridesOut[1] : dft_size.width * clStridesOut[1]; |
|
|
|
|
|
|
|
|
|
CLAMDDFT_Assert(clAmdFftCreateDefaultPlan(&plHandle, (cl_context)ocl::Context::getDefault().ptr(), dim, clLengthsIn)) |
|
|
|
|
|
|
|
|
|
// setting plan properties
|
|
|
|
|
CLAMDDFT_Assert(clAmdFftSetPlanPrecision(plHandle, doubleFP ? CLFFT_DOUBLE : CLFFT_SINGLE)); |
|
|
|
|
CLAMDDFT_Assert(clAmdFftSetResultLocation(plHandle, inplace ? CLFFT_INPLACE : CLFFT_OUTOFPLACE)) |
|
|
|
|
CLAMDDFT_Assert(clAmdFftSetLayout(plHandle, inLayout, outLayout)) |
|
|
|
|
CLAMDDFT_Assert(clAmdFftSetPlanBatchSize(plHandle, batchSize)) |
|
|
|
|
CLAMDDFT_Assert(clAmdFftSetPlanInStride(plHandle, dim, clStridesIn)) |
|
|
|
|
CLAMDDFT_Assert(clAmdFftSetPlanOutStride(plHandle, dim, clStridesOut)) |
|
|
|
|
CLAMDDFT_Assert(clAmdFftSetPlanDistance(plHandle, clStridesIn[dim], clStridesOut[dim])) |
|
|
|
|
|
|
|
|
|
float scale = dft_scale ? 1.0f / (dft_rows ? dft_size.width : dft_size.area()) : 1.0f; |
|
|
|
|
CLAMDDFT_Assert(clAmdFftSetPlanScale(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale)) |
|
|
|
|
|
|
|
|
|
// ready to bake
|
|
|
|
|
cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr(); |
|
|
|
|
CLAMDDFT_Assert(clAmdFftBakePlan(plHandle, 1, &queue, NULL, NULL)) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
~FftPlan() |
|
|
|
|
{ |
|
|
|
|
// clAmdFftDestroyPlan(&plHandle);
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
friend class PlanCache; |
|
|
|
|
|
|
|
|
|
private: |
|
|
|
|
Size dft_size; |
|
|
|
|
int src_step, dst_step; |
|
|
|
|
bool doubleFP; |
|
|
|
|
bool inplace; |
|
|
|
|
int flags; |
|
|
|
|
FftType fftType; |
|
|
|
|
|
|
|
|
|
cl_context context; |
|
|
|
|
clAmdFftPlanHandle plHandle; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
public: |
|
|
|
|
static PlanCache & getInstance() |
|
|
|
|
{ |
|
|
|
|
static PlanCache planCache; |
|
|
|
|
return planCache; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
clAmdFftPlanHandle getPlanHandle(const Size & dft_size, int src_step, int dst_step, bool doubleFP, |
|
|
|
|
bool inplace, int flags, FftType fftType) |
|
|
|
|
{ |
|
|
|
|
cl_context currentContext = (cl_context)ocl::Context::getDefault().ptr(); |
|
|
|
|
|
|
|
|
|
for (size_t i = 0, size = planStorage.size(); i < size; ++i) |
|
|
|
|
{ |
|
|
|
|
const FftPlan * const plan = planStorage[i]; |
|
|
|
|
|
|
|
|
|
if (plan->dft_size == dft_size && |
|
|
|
|
plan->flags == flags && |
|
|
|
|
plan->src_step == src_step && |
|
|
|
|
plan->dst_step == dst_step && |
|
|
|
|
plan->doubleFP == doubleFP && |
|
|
|
|
plan->fftType == fftType && |
|
|
|
|
plan->inplace == inplace) |
|
|
|
|
{ |
|
|
|
|
if (plan->context != currentContext) |
|
|
|
|
{ |
|
|
|
|
planStorage.erase(planStorage.begin() + i); |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return plan->plHandle; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// no baked plan is found, so let's create a new one
|
|
|
|
|
FftPlan * newPlan = new FftPlan(dft_size, src_step, dst_step, doubleFP, inplace, flags, fftType); |
|
|
|
|
planStorage.push_back(newPlan); |
|
|
|
|
|
|
|
|
|
return newPlan->plHandle; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
~PlanCache() |
|
|
|
|
{ |
|
|
|
|
for (std::vector<FftPlan *>::iterator i = planStorage.begin(), end = planStorage.end(); i != end; ++i) |
|
|
|
|
delete (*i); |
|
|
|
|
planStorage.clear(); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
protected: |
|
|
|
|
PlanCache() : |
|
|
|
|
planStorage() |
|
|
|
|
{ |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
std::vector<FftPlan *> planStorage; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
extern "C" { |
|
|
|
|
|
|
|
|
|
static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p) |
|
|
|
|
{ |
|
|
|
|
UMatData * u = (UMatData *)p; |
|
|
|
|
|
|
|
|
|
if( u && CV_XADD(&u->urefcount, -1) == 1 ) |
|
|
|
|
u->currAllocator->deallocate(u); |
|
|
|
|
u = 0; |
|
|
|
|
|
|
|
|
|
clReleaseEvent(e), e = 0; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static bool ocl_dft_amdfft(InputArray _src, OutputArray _dst, int flags) |
|
|
|
|
{ |
|
|
|
|
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); |
|
|
|
|
Size ssize = _src.size(); |
|
|
|
|
|
|
|
|
|
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; |
|
|
|
|
if ( (!doubleSupport && depth == CV_64F) || |
|
|
|
|
!(type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2) || |
|
|
|
|
_src.offset() != 0) |
|
|
|
|
return false; |
|
|
|
|
|
|
|
|
|
// if is not a multiplication of prime numbers { 2, 3, 5 }
|
|
|
|
|
if (ssize.area() != getOptimalDFTSize(ssize.area())) |
|
|
|
|
return false; |
|
|
|
|
|
|
|
|
|
int dst_complex_input = cn == 2 ? 1 : 0; |
|
|
|
|
bool dft_inverse = (flags & DFT_INVERSE) != 0 ? 1 : 0; |
|
|
|
|
int dft_complex_output = (flags & DFT_COMPLEX_OUTPUT) != 0; |
|
|
|
|
bool dft_real_output = (flags & DFT_REAL_OUTPUT) != 0; |
|
|
|
|
|
|
|
|
|
CV_Assert(dft_complex_output + dft_real_output < 2); |
|
|
|
|
FftType fftType = (FftType)(dst_complex_input << 0 | dft_complex_output << 1); |
|
|
|
|
|
|
|
|
|
switch (fftType) |
|
|
|
|
{ |
|
|
|
|
case C2C: |
|
|
|
|
_dst.create(ssize.height, ssize.width, CV_MAKE_TYPE(depth, 2)); |
|
|
|
|
break; |
|
|
|
|
case R2C: // TODO implement it if possible
|
|
|
|
|
case C2R: // TODO implement it if possible
|
|
|
|
|
case R2R: // AMD Fft does not support this type
|
|
|
|
|
default: |
|
|
|
|
return false; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
UMat src = _src.getUMat(), dst = _dst.getUMat(); |
|
|
|
|
bool inplace = src.u == dst.u; |
|
|
|
|
|
|
|
|
|
clAmdFftPlanHandle plHandle = PlanCache::getInstance(). |
|
|
|
|
getPlanHandle(ssize, (int)src.step, (int)dst.step, |
|
|
|
|
depth == CV_64F, inplace, flags, fftType); |
|
|
|
|
|
|
|
|
|
// get the bufferSize
|
|
|
|
|
size_t bufferSize = 0; |
|
|
|
|
CLAMDDFT_Assert(clAmdFftGetTmpBufSize(plHandle, &bufferSize)) |
|
|
|
|
UMat tmpBuffer(1, (int)bufferSize, CV_8UC1); |
|
|
|
|
|
|
|
|
|
cl_mem srcarg = (cl_mem)src.handle(ACCESS_READ); |
|
|
|
|
cl_mem dstarg = (cl_mem)dst.handle(ACCESS_RW); |
|
|
|
|
|
|
|
|
|
cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr(); |
|
|
|
|
cl_event e = 0; |
|
|
|
|
|
|
|
|
|
CLAMDDFT_Assert(clAmdFftEnqueueTransform(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, |
|
|
|
|
1, &queue, 0, NULL, &e, |
|
|
|
|
&srcarg, &dstarg, (cl_mem)tmpBuffer.handle(ACCESS_RW))) |
|
|
|
|
|
|
|
|
|
tmpBuffer.addref(); |
|
|
|
|
clSetEventCallback(e, CL_COMPLETE, oclCleanupCallback, tmpBuffer.u); |
|
|
|
|
return true; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#undef DFT_ASSERT |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#endif // HAVE_CLAMDFFT
|
|
|
|
|
|
|
|
|
|
void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) |
|
|
|
|
{ |
|
|
|
|