Merge branch 'work'

pull/2/head
Marina Kolpakova 13 years ago
parent 18918a5e44
commit 6620c5c92d
  1. 28
      modules/gpu/src/opencv2/gpu/device/warp_reduce.hpp

@ -44,25 +44,25 @@
#ifndef OPENCV_GPU_WARP_REDUCE_HPP__
#define OPENCV_GPU_WARP_REDUCE_HPP__
namespace cv { namespace gpu { namespace device
{
template <class T>
namespace cv { namespace gpu { namespace device
{
template <class T>
__device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
{
const unsigned int lane = tid & 31; // index of thread in warp (0..31)
if (lane < 16)
{
T partial = ptr[tid];
ptr[tid] = partial = partial + ptr[tid + 16];
ptr[tid] = partial = partial + ptr[tid + 8];
ptr[tid] = partial = partial + ptr[tid + 4];
ptr[tid] = partial = partial + ptr[tid + 2];
ptr[tid] = partial = partial + ptr[tid + 1];
}
if (lane < 16)
{
T partial = ptr[tid];
return ptr[tid - lane];
ptr[tid] = partial = partial + ptr[tid + 16];
ptr[tid] = partial = partial + ptr[tid + 8];
ptr[tid] = partial = partial + ptr[tid + 4];
ptr[tid] = partial = partial + ptr[tid + 2];
ptr[tid] = partial = partial + ptr[tid + 1];
}
return ptr[tid - lane];
}
}}} // namespace cv { namespace gpu { namespace device {

Loading…
Cancel
Save