|
|
|
@ -52,7 +52,7 @@ |
|
|
|
|
#endif |
|
|
|
|
#ifdef CPU |
|
|
|
|
|
|
|
|
|
static void reduce3(float val1, float val2, float val3, __local float* smem1, __local float* smem2, __local float* smem3, int tid) |
|
|
|
|
inline void reduce3(float val1, float val2, float val3, __local float* smem1, __local float* smem2, __local float* smem3, int tid) |
|
|
|
|
{ |
|
|
|
|
smem1[tid] = val1; |
|
|
|
|
smem2[tid] = val2; |
|
|
|
@ -71,7 +71,7 @@ static void reduce3(float val1, float val2, float val3, __local float* smem1, |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void reduce2(float val1, float val2, volatile __local float* smem1, volatile __local float* smem2, int tid) |
|
|
|
|
inline void reduce2(float val1, float val2, volatile __local float* smem1, volatile __local float* smem2, int tid) |
|
|
|
|
{ |
|
|
|
|
smem1[tid] = val1; |
|
|
|
|
smem2[tid] = val2; |
|
|
|
@ -88,7 +88,7 @@ static void reduce2(float val1, float val2, volatile __local float* smem1, volat |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void reduce1(float val1, volatile __local float* smem1, int tid) |
|
|
|
|
inline void reduce1(float val1, volatile __local float* smem1, int tid) |
|
|
|
|
{ |
|
|
|
|
smem1[tid] = val1; |
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
@ -103,7 +103,7 @@ static void reduce1(float val1, volatile __local float* smem1, int tid) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
#else |
|
|
|
|
static void reduce3(float val1, float val2, float val3, |
|
|
|
|
inline void reduce3(float val1, float val2, float val3, |
|
|
|
|
__local volatile float* smem1, __local volatile float* smem2, __local volatile float* smem3, int tid) |
|
|
|
|
{ |
|
|
|
|
smem1[tid] = val1; |
|
|
|
@ -150,7 +150,7 @@ static void reduce3(float val1, float val2, float val3, |
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void reduce2(float val1, float val2, __local volatile float* smem1, __local volatile float* smem2, int tid) |
|
|
|
|
inline void reduce2(float val1, float val2, __local volatile float* smem1, __local volatile float* smem2, int tid) |
|
|
|
|
{ |
|
|
|
|
smem1[tid] = val1; |
|
|
|
|
smem2[tid] = val2; |
|
|
|
@ -189,7 +189,7 @@ static void reduce2(float val1, float val2, __local volatile float* smem1, __loc |
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void reduce1(float val1, __local volatile float* smem1, int tid) |
|
|
|
|
inline void reduce1(float val1, __local volatile float* smem1, int tid) |
|
|
|
|
{ |
|
|
|
|
smem1[tid] = val1; |
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
@ -225,7 +225,7 @@ static void reduce1(float val1, __local volatile float* smem1, int tid) |
|
|
|
|
// Image read mode |
|
|
|
|
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR; |
|
|
|
|
|
|
|
|
|
static void SetPatch(image2d_t I, float x, float y, |
|
|
|
|
inline void SetPatch(image2d_t I, float x, float y, |
|
|
|
|
float* Pch, float* Dx, float* Dy, |
|
|
|
|
float* A11, float* A12, float* A22) |
|
|
|
|
{ |
|
|
|
@ -262,7 +262,7 @@ inline void GetError(image2d_t J, const float x, const float y, const float* Pch |
|
|
|
|
*errval += fabs(diff); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void SetPatch4(image2d_t I, const float x, const float y, |
|
|
|
|
inline void SetPatch4(image2d_t I, const float x, const float y, |
|
|
|
|
float4* Pch, float4* Dx, float4* Dy, |
|
|
|
|
float* A11, float* A12, float* A22) |
|
|
|
|
{ |
|
|
|
@ -285,7 +285,7 @@ static void SetPatch4(image2d_t I, const float x, const float y, |
|
|
|
|
*A22 += sqIdx.x + sqIdx.y + sqIdx.z; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void GetPatch4(image2d_t J, const float x, const float y, |
|
|
|
|
inline void GetPatch4(image2d_t J, const float x, const float y, |
|
|
|
|
const float4* Pch, const float4* Dx, const float4* Dy, |
|
|
|
|
float* b1, float* b2) |
|
|
|
|
{ |
|
|
|
@ -297,7 +297,7 @@ static void GetPatch4(image2d_t J, const float x, const float y, |
|
|
|
|
*b2 += xdiff.x + xdiff.y + xdiff.z; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void GetError4(image2d_t J, const float x, const float y, const float4* Pch, float* errval) |
|
|
|
|
inline void GetError4(image2d_t J, const float x, const float y, const float4* Pch, float* errval) |
|
|
|
|
{ |
|
|
|
|
float4 diff = read_imagef(J, sampler, (float2)(x,y))-*Pch; |
|
|
|
|
*errval += fabs(diff.x) + fabs(diff.y) + fabs(diff.z); |
|
|
|
|