From 1b9bccb856c7ff2f80c99b6b085db264cffcf79d Mon Sep 17 00:00:00 2001 From: "marina.kolpakova" Date: Mon, 8 Oct 2012 20:20:57 +0400 Subject: [PATCH] move Level into shared memory --- modules/gpu/src/cuda/isf-sc.cu | 24 +++++++----------------- modules/gpu/src/icf.hpp | 2 ++ 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/modules/gpu/src/cuda/isf-sc.cu b/modules/gpu/src/cuda/isf-sc.cu index 8df6907df2..f755f85499 100644 --- a/modules/gpu/src/cuda/isf-sc.cu +++ b/modules/gpu/src/cuda/isf-sc.cu @@ -94,11 +94,6 @@ namespace icf { float relScale = level.relScale; float farea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y); - dprintf("%d: feature %d box %d %d %d %d\n",threadIdx.x, (node.threshold >> 28), scaledRect.x, scaledRect.y, - scaledRect.z, scaledRect.w); - dprintf("%d: rescale: %f [%f %f] selected %f\n",threadIdx.x, level.relScale, level.scaling[0], level.scaling[1], - level.scaling[(node.threshold >> 28) > 6]); - // rescale scaledRect.x = __float2int_rn(relScale * scaledRect.x); scaledRect.y = __float2int_rn(relScale * scaledRect.y); @@ -110,14 +105,7 @@ namespace icf { const float expected_new_area = farea * relScale * relScale; float approx = __fdividef(sarea, expected_new_area); - dprintf("%d: new rect: %d box %d %d %d %d rel areas %f %f\n",threadIdx.x, (node.threshold >> 28), - scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w, farea * relScale * relScale, sarea); - - float rootThreshold = (node.threshold & 0x0FFFFFFFU) * approx; - rootThreshold *= level.scaling[(node.threshold >> 28) > 6]; - - dprintf("%d: approximation %f %d -> %f %f\n",threadIdx.x, approx, (node.threshold & 0x0FFFFFFFU), rootThreshold, - level.scaling[(node.threshold >> 28) > 6]); + float rootThreshold = (node.threshold & 0x0FFFFFFFU) * approx * level.scaling[(node.threshold >> 28) > 6]; return rootThreshold; } @@ -179,18 +167,20 @@ namespace icf { const int y = blockIdx.y * blockDim.y + threadIdx.y; const int x = blockIdx.x; - __shared__ volatile char roiCache[8]; + // load Lavel + __shared__ Level level; + // check POI + __shared__ volatile char roiCache[8]; if (!threadIdx.y && !threadIdx.x) - { ((float2*)roiCache)[threadIdx.x] = tex2D(troi, blockIdx.y, x); - } __syncthreads(); if (!roiCache[threadIdx.y]) return; - Level level = levels[downscales + blockIdx.z]; + if (!threadIdx.x) + level = levels[downscales + blockIdx.z]; if(x >= level.workRect.x || y >= level.workRect.y) return; diff --git a/modules/gpu/src/icf.hpp b/modules/gpu/src/icf.hpp index 35658892f6..a103341fb0 100644 --- a/modules/gpu/src/icf.hpp +++ b/modules/gpu/src/icf.hpp @@ -90,6 +90,8 @@ struct __align__(8) Level //is actually 24 bytes objSize.x = round(oct.size.x * relScale); objSize.y = round(oct.size.y * relScale); } + + __device Level(){} }; struct __align__(8) Node