|
|
|
@ -439,8 +439,6 @@ namespace integral_detail |
|
|
|
|
|
|
|
|
|
T sum = (tidx < cols) && (y < rows) ? *p : 0; |
|
|
|
|
|
|
|
|
|
y += blockDim.y; |
|
|
|
|
|
|
|
|
|
sums[threadIdx.x][threadIdx.y] = sum; |
|
|
|
|
__syncthreads(); |
|
|
|
|
|
|
|
|
@ -467,14 +465,17 @@ namespace integral_detail |
|
|
|
|
if (threadIdx.y > 0) |
|
|
|
|
sum += sums[threadIdx.x][threadIdx.y - 1]; |
|
|
|
|
|
|
|
|
|
if (tidx < cols) |
|
|
|
|
sum += stepSum; |
|
|
|
|
stepSum += sums[threadIdx.x][blockDim.y - 1]; |
|
|
|
|
|
|
|
|
|
__syncthreads(); |
|
|
|
|
|
|
|
|
|
if ((tidx < cols) && (y < rows)) |
|
|
|
|
{ |
|
|
|
|
sum += stepSum; |
|
|
|
|
stepSum += sums[threadIdx.x][blockDim.y - 1]; |
|
|
|
|
*p = sum; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__syncthreads(); |
|
|
|
|
y += blockDim.y; |
|
|
|
|
} |
|
|
|
|
#else |
|
|
|
|
__shared__ T smem[32][32]; |
|
|
|
|