From 781c04324eab9537dc3ddb0b01f75975990b8e14 Mon Sep 17 00:00:00 2001 From: "marina.kolpakova" Date: Wed, 14 Nov 2012 14:47:00 +0400 Subject: [PATCH] refactor: PrefixSum --- modules/gpu/src/cuda/isf-sc.cu | 60 ++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 25 deletions(-) diff --git a/modules/gpu/src/cuda/isf-sc.cu b/modules/gpu/src/cuda/isf-sc.cu index a4496bf678..27d60e6372 100644 --- a/modules/gpu/src/cuda/isf-sc.cu +++ b/modules/gpu/src/cuda/isf-sc.cu @@ -79,6 +79,39 @@ namespace icf { } } + template + struct PrefixSum + { + __device static void apply(float& impact) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + #pragma unroll + // scan on shuffl functions + for (int i = 1; i < Policy::WARP; i *= 2) + { + const float n = __shfl_up(impact, i, Policy::WARP); + + if (threadIdx.x >= i) + impact += n; + } + #else + __shared__ volatile float ptr[Policy::STA_X * Policy::STA_Y]; + + const int idx = threadIdx.y * Policy::STA_X + threadIdx.x; + + ptr[idx] = impact; + + if ( threadIdx.x >= 1) ptr [idx ] = (ptr [idx - 1] + ptr [idx]); + if ( threadIdx.x >= 2) ptr [idx ] = (ptr [idx - 2] + ptr [idx]); + if ( threadIdx.x >= 4) ptr [idx ] = (ptr [idx - 4] + ptr [idx]); + if ( threadIdx.x >= 8) ptr [idx ] = (ptr [idx - 8] + ptr [idx]); + if ( threadIdx.x >= 16) ptr [idx ] = (ptr [idx - 16] + ptr [idx]); + + impact = ptr[idx]; + #endif + } + }; + texture thogluv; template @@ -201,32 +234,9 @@ __device void CascadeInvoker::detect(Detection* objects, const uint ndet const int lShift = (next - 1) * 2 + (int)(sum >= threshold); float impact = leaves[(st + threadIdx.x) * 4 + lShift]; -#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 -#pragma unroll - // scan on shuffl functions - for (int i = 1; i < Policy::WARP; i *= 2) - { - const float n = __shfl_up(impact, i, Policy::WARP); - - if (threadIdx.x >= i) - impact += n; - } -#else - __shared__ volatile float ptr[Policy::STA_X * Policy::STA_Y]; - - const int idx = threadIdx.y * Policy::STA_X + threadIdx.x; - - ptr[idx] = impact; - - if ( threadIdx.x >= 1) ptr [idx ] = (ptr [idx - 1] + ptr [idx]); - if ( threadIdx.x >= 2) ptr [idx ] = (ptr [idx - 2] + ptr [idx]); - if ( threadIdx.x >= 4) ptr [idx ] = (ptr [idx - 4] + ptr [idx]); - if ( threadIdx.x >= 8) ptr [idx ] = (ptr [idx - 8] + ptr [idx]); - if ( threadIdx.x >= 16) ptr [idx ] = (ptr [idx - 16] + ptr [idx]); - - impact = ptr[idx]; -#endif + PrefixSum::apply(impact); confidence += impact; + if(__any((confidence <= stages[(st + threadIdx.x)]))) st += 2048; }