|
|
|
@ -536,9 +536,9 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step, |
|
|
|
|
const int x = get_global_id(0); |
|
|
|
|
const int y = get_group_id(1); |
|
|
|
|
const int block_size = LOCAL_SIZE/kercn; |
|
|
|
|
__local CT smem[LOCAL_SIZE]; // used in (y < nz) code branch only, but should be declared in the outermost scope of a kernel function |
|
|
|
|
if (y < nz) |
|
|
|
|
{ |
|
|
|
|
__local CT smem[LOCAL_SIZE]; |
|
|
|
|
__global const CT* twiddles = (__global const CT*)(twiddles_ptr + twiddles_offset); |
|
|
|
|
const int ind = x; |
|
|
|
|
#ifdef IS_1D |
|
|
|
@ -615,9 +615,9 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step, |
|
|
|
|
const int x = get_group_id(0); |
|
|
|
|
const int y = get_global_id(1); |
|
|
|
|
|
|
|
|
|
__local CT smem[LOCAL_SIZE]; // used in (x < nz) code branch only, but should be declared in the outermost scope of a kernel function |
|
|
|
|
if (x < nz) |
|
|
|
|
{ |
|
|
|
|
__local CT smem[LOCAL_SIZE]; |
|
|
|
|
__global const uchar* src = src_ptr + mad24(y, src_step, mad24(x, (int)(sizeof(CT)), src_offset)); |
|
|
|
|
__global const CT* twiddles = (__global const CT*)(twiddles_ptr + twiddles_offset); |
|
|
|
|
const int ind = y; |
|
|
|
@ -682,9 +682,9 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step, |
|
|
|
|
const FT scale = (FT) 1/(dst_cols*dst_rows); |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
__local CT smem[LOCAL_SIZE]; // used in (y < nz) code branch only, but should be declared in the outermost scope of a kernel function |
|
|
|
|
if (y < nz) |
|
|
|
|
{ |
|
|
|
|
__local CT smem[LOCAL_SIZE]; |
|
|
|
|
__global const CT* twiddles = (__global const CT*)(twiddles_ptr + twiddles_offset); |
|
|
|
|
const int ind = x; |
|
|
|
|
|
|
|
|
@ -782,10 +782,10 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step, |
|
|
|
|
const int x = get_group_id(0); |
|
|
|
|
const int y = get_global_id(1); |
|
|
|
|
|
|
|
|
|
#ifdef COMPLEX_INPUT |
|
|
|
|
__local CT smem[LOCAL_SIZE]; // used in (x < nz) code branch only, but should be declared in the outermost scope of a kernel function |
|
|
|
|
if (x < nz) |
|
|
|
|
{ |
|
|
|
|
__local CT smem[LOCAL_SIZE]; |
|
|
|
|
#ifdef COMPLEX_INPUT |
|
|
|
|
__global const uchar* src = src_ptr + mad24(y, src_step, mad24(x, (int)(sizeof(CT)), src_offset)); |
|
|
|
|
__global uchar* dst = dst_ptr + mad24(y, dst_step, mad24(x, (int)(sizeof(CT)), dst_offset)); |
|
|
|
|
__global const CT* twiddles = (__global const CT*)(twiddles_ptr + twiddles_offset); |
|
|
|
@ -812,15 +812,11 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step, |
|
|
|
|
res[0].x = smem[y + i*block_size].x; |
|
|
|
|
res[0].y = -smem[y + i*block_size].y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
#else |
|
|
|
|
if (x < nz) |
|
|
|
|
{ |
|
|
|
|
__global const CT* twiddles = (__global const CT*)(twiddles_ptr + twiddles_offset); |
|
|
|
|
const int ind = y; |
|
|
|
|
const int block_size = LOCAL_SIZE/kercn; |
|
|
|
|
|
|
|
|
|
__local CT smem[LOCAL_SIZE]; |
|
|
|
|
#ifdef EVEN |
|
|
|
|
if (x!=0 && (x!=(nz-1))) |
|
|
|
|
#else |
|
|
|
@ -877,6 +873,6 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step, |
|
|
|
|
res[0].x = smem[y + i*block_size].x; |
|
|
|
|
res[0].y = -smem[y + i*block_size].y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|