From ee291a15da3614cf8e76a18378de0eb9c4e69a4b Mon Sep 17 00:00:00 2001 From: "marina.kolpakova" Date: Wed, 28 Nov 2012 00:16:23 +0400 Subject: [PATCH] add preprocessing only function --- modules/gpu/include/opencv2/gpu/gpu.hpp | 3 + modules/gpu/src/cuda/icf-sc.cu | 103 ++++++++++++++++++++++++ modules/gpu/src/softcascade.cpp | 33 ++++++++ 3 files changed, 139 insertions(+) diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 2bbfe532f0..bdb9f8c6c2 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -1576,6 +1576,9 @@ public: // Param stream is stream is a high-level CUDA stream abstraction used for asynchronous execution virtual void detect(InputArray image, InputArray rois, OutputArray objects, Stream& stream = Stream::Null()) const; + // Preprocesing only + virtual void preprocess(InputArray image, OutputArray channels, Stream& stream = Stream::Null()) const; + // Convert ROI matrix into the suitable for detect method. // Param roi is an input matrix of the same size as the image. // There non zero value mean that detector should be executed in this point. diff --git a/modules/gpu/src/cuda/icf-sc.cu b/modules/gpu/src/cuda/icf-sc.cu index 9f1661c63a..f59b08e593 100644 --- a/modules/gpu/src/cuda/icf-sc.cu +++ b/modules/gpu/src/cuda/icf-sc.cu @@ -41,6 +41,7 @@ //M*/ #include +#include #include #include @@ -49,6 +50,108 @@ namespace cv { namespace gpu { namespace device { namespace icf { + __device__ __forceinline__ void luv(const float& b, const float& g, const float& r, uchar& __l, uchar& __u, uchar& __v) + { + // rgb -> XYZ + float x = 0.412453f * r + 0.357580f * g + 0.180423f * b; + float y = 0.212671f * r + 0.715160f * g + 0.072169f * b; + float z = 0.019334f * r + 0.119193f * g + 0.950227f * b; + + // computed for D65 + const float _ur = 0.19783303699678276f; + const float _vr = 0.46833047435252234f; + + const float divisor = fmax((x + 15.f * y + 3.f * z), FLT_EPSILON); + const float _u = __fdividef(4.f * x, divisor); + const float _v = __fdividef(9.f * y, divisor); + + const float L = fmax(0.f, ((116.f * cbrtf(y)) - 16.f)); + const float U = 13.f * L * (_u - _ur); + const float V = 13.f * L * (_v - _vr); + + // L in [0, 100], u in [-134, 220], v in [-140, 122] + __l = static_cast( L * (255.f / 100.f)); + __u = static_cast((U + 134.f) * (255.f / (220.f + 134.f ))); + __v = static_cast((V + 140.f) * (255.f / (122.f + 140.f ))); + } + + __global__ void bgr2Luv_d(const uchar* rgb, const int rgbPitch, uchar* luvg, const int luvgPitch) + { + const int y = blockIdx.y * blockDim.y + threadIdx.y; + const int x = blockIdx.x * blockDim.x + threadIdx.x; + + uchar3 color = ((uchar3*)(rgb + rgbPitch * y))[x]; + uchar l, u, v; + luv(color.x / 255.f, color.y / 255.f, color.z / 255.f, l, u, v); + + luvg[luvgPitch * y + x] = l; + luvg[luvgPitch * (y + 480) + x] = u; + luvg[luvgPitch * (y + 2 * 480) + x] = v; + } + + void bgr2Luv(const PtrStepSzb& bgr, PtrStepSzb luv) + { + dim3 block(32, 8); + dim3 grid(bgr.cols / 32, bgr.rows / 8); + + bgr2Luv_d<<>>((const uchar*)bgr.ptr(0), bgr.step, (uchar*)luv.ptr(0), luv.step); + + cudaSafeCall(cudaDeviceSynchronize()); + } + + __device__ __forceinline__ int fast_angle_bin(const float& dx, const float& dy) + { + const float angle_quantum = M_PI / 6.f; + float angle = atan2(dx, dy) + (angle_quantum / 2.f); + + if (angle < 0) angle += M_PI; + + const float angle_scaling = 1.f / angle_quantum; + return static_cast(angle * angle_scaling) % 6; + } + + texture tgray; + + __global__ void magnitude_d(PtrStepSzb mag) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + const float dx_a = tex2D(tgray, x + 1, y), + dx_b = tex2D(tgray, x - 1, y), + dx = dx_a - dx_b, + + dy_a = tex2D(tgray, x, y + 1), + dy_b = tex2D(tgray, x, y - 1), + dy = dy_a - dy_b; + + + const float magnitude_scaling = 1.0f/ sqrtf(2); + + const float magnitude = sqrtf((dx * dx) + (dy * dy)) * magnitude_scaling; + const uchar magnitude_u8 = static_cast(magnitude); + + mag( 480 * 6 + y, x) = magnitude_u8; + + int angle_channel_index; + + angle_channel_index = fast_angle_bin(dy, dx); + mag( 480 * angle_channel_index + y, x) = magnitude_u8; + } + + void magnitude(const PtrStepSzb& gray, PtrStepSzb mag) + { + dim3 block(32, 8); + dim3 grid(gray.cols / 32, gray.rows / 8); + + cudaChannelFormatDesc desc = cudaCreateChannelDesc(); + cudaSafeCall( cudaBindTexture2D(0, tgray, gray.data, desc, gray.cols, gray.rows, gray.step) ); + + magnitude_d<<>>(mag); + + cudaSafeCall(cudaDeviceSynchronize()); + } + // ToDo: use textures or uncached load instruction. __global__ void magToHist(const uchar* __restrict__ mag, const float* __restrict__ angle, const int angPitch, diff --git a/modules/gpu/src/softcascade.cpp b/modules/gpu/src/softcascade.cpp index 431eeba0ef..1d32736183 100644 --- a/modules/gpu/src/softcascade.cpp +++ b/modules/gpu/src/softcascade.cpp @@ -88,6 +88,9 @@ namespace icf { void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections, PtrStepSzb suppressed, cudaStream_t stream); + + void bgr2Luv(const PtrStepSzb& bgr, PtrStepSzb luv); + void magnitude(const PtrStepSzb& gray, PtrStepSzb mag); } namespace imgproc { @@ -606,4 +609,34 @@ void cv::gpu::SCascade::read(const FileNode& fn) Algorithm::read(fn); } +namespace { + +void bgr2Luv(const cv::gpu::GpuMat& input, cv::gpu::GpuMat& integral) +{ + cv::gpu::GpuMat bgr; + cv::gpu::GaussianBlur(input, bgr, cv::Size(3, 3), -1); + + cv::gpu::GpuMat gray, luv, shrunk, buffer; + luv.create(bgr.rows * 10, bgr.cols, CV_8UC1); + luv.setTo(0); + + cv::gpu::cvtColor(bgr, gray, CV_BGR2GRAY); + cv::gpu::device::icf::magnitude(gray, luv(cv::Rect(0, 0, bgr.cols, bgr.rows * 7))); + + cv::gpu::GpuMat __luv(luv, cv::Rect(0, bgr.rows * 7, bgr.cols, bgr.rows * 3)); + cv::gpu::device::icf::bgr2Luv(bgr, __luv); + + cv::gpu::resize(luv, shrunk, cv::Size(), 0.25f, 0.25f, CV_INTER_AREA); + cv::gpu::integralBuffered(shrunk, integral, buffer); +} +} + +void cv::gpu::SCascade::preprocess(InputArray _bgr, OutputArray _channels, Stream& stream) const +{ + CV_Assert(fields); + (void)stream; + const GpuMat bgr = _bgr.getGpuMat(), channels = _channels.getGpuMat(); +} + + #endif \ No newline at end of file