|
|
|
@ -51,18 +51,18 @@ namespace cv { namespace gpu { namespace device |
|
|
|
|
{ |
|
|
|
|
const unsigned int lane = tid & 31; // index of thread in warp (0..31)
|
|
|
|
|
|
|
|
|
|
if (lane < 16) |
|
|
|
|
{
|
|
|
|
|
T partial = ptr[tid]; |
|
|
|
|
if (lane < 16) |
|
|
|
|
{ |
|
|
|
|
T partial = ptr[tid]; |
|
|
|
|
|
|
|
|
|
ptr[tid] = partial = partial + ptr[tid + 16]; |
|
|
|
|
ptr[tid] = partial = partial + ptr[tid + 8]; |
|
|
|
|
ptr[tid] = partial = partial + ptr[tid + 4]; |
|
|
|
|
ptr[tid] = partial = partial + ptr[tid + 2]; |
|
|
|
|
ptr[tid] = partial = partial + ptr[tid + 1];
|
|
|
|
|
} |
|
|
|
|
ptr[tid] = partial = partial + ptr[tid + 16]; |
|
|
|
|
ptr[tid] = partial = partial + ptr[tid + 8]; |
|
|
|
|
ptr[tid] = partial = partial + ptr[tid + 4]; |
|
|
|
|
ptr[tid] = partial = partial + ptr[tid + 2]; |
|
|
|
|
ptr[tid] = partial = partial + ptr[tid + 1]; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return ptr[tid - lane]; |
|
|
|
|
return ptr[tid - lane]; |
|
|
|
|
} |
|
|
|
|
}}} // namespace cv { namespace gpu { namespace device {
|
|
|
|
|
|
|
|
|
|