From 40c76b9de29fab13c24894d2096e265b88b7ff11 Mon Sep 17 00:00:00 2001 From: "marina.kolpakova" Date: Thu, 9 Aug 2012 18:48:25 +0400 Subject: [PATCH] 1.x related fixes --- modules/gpu/perf/perf_labeling.cpp | 4 +- modules/gpu/perf_cpu/perf_labeling.cpp | 157 +++++ modules/gpu/src/cuda/ccomponetns.cu | 9 +- modules/gpu/src/graphcuts.cpp | 570 +++++++++--------- .../gpu/src/opencv2/gpu/device/emulation.hpp | 261 ++++---- modules/gpu/test/test_labeling.cpp | 8 +- 6 files changed, 594 insertions(+), 415 deletions(-) create mode 100644 modules/gpu/perf_cpu/perf_labeling.cpp diff --git a/modules/gpu/perf/perf_labeling.cpp b/modules/gpu/perf/perf_labeling.cpp index 9958719fc6..5417133095 100644 --- a/modules/gpu/perf/perf_labeling.cpp +++ b/modules/gpu/perf/perf_labeling.cpp @@ -48,7 +48,9 @@ GPU_PERF_TEST(ConnectedComponents, cv::gpu::DeviceInfo, cv::Size) cv::gpu::DeviceInfo devInfo = GET_PARAM(0); cv::gpu::setDevice(devInfo.deviceID()); - cv::Mat image = readImage("gpu/labeling/label.png", cv::IMREAD_GRAYSCALE); + cv::Mat image = readImage("gpu/labeling/aloe-disp.png", cv::IMREAD_GRAYSCALE); + + cv::threshold(image, image, 150, 255, CV_THRESH_BINARY); cv::gpu::GpuMat mask; mask.create(image.rows, image.cols, CV_8UC1); diff --git a/modules/gpu/perf_cpu/perf_labeling.cpp b/modules/gpu/perf_cpu/perf_labeling.cpp new file mode 100644 index 0000000000..47d62839eb --- /dev/null +++ b/modules/gpu/perf_cpu/perf_labeling.cpp @@ -0,0 +1,157 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +//M*/ + +#include "perf_precomp.hpp" + +#ifdef HAVE_CUDA + +namespace { + + struct GreedyLabeling + { + struct dot + { + int x; + int y; + + static dot make(int i, int j) + { + dot d; d.x = i; d.y = j; + return d; + } + }; + + struct InInterval + { + InInterval(const int& _lo, const int& _hi) : lo(-_lo), hi(_hi) {}; + const int lo, hi; + + bool operator() (const unsigned char a, const unsigned char b) const + { + int d = a - b; + return lo <= d && d <= hi; + } + }; + + GreedyLabeling(cv::Mat img) + : image(img), _labels(image.size(), CV_32SC1, cv::Scalar::all(-1)) {stack = new dot[image.cols * image.rows];} + + ~GreedyLabeling(){delete[] stack;} + + void operator() (cv::Mat labels) const + { + InInterval inInt(0, 2); + int cc = -1; + + int* dist_labels = (int*)labels.data; + int pitch = labels.step1(); + + unsigned char* source = (unsigned char*)image.data; + int width = image.cols; + int height = image.rows; + + for (int j = 0; j < image.rows; ++j) + for (int i = 0; i < image.cols; ++i) + { + if (dist_labels[j * pitch + i] != -1) continue; + + dot* top = stack; + dot p = dot::make(i, j); + cc++; + + dist_labels[j * pitch + i] = cc; + + while (top >= stack) + { + int* dl = &dist_labels[p.y * pitch + p.x]; + unsigned char* sp = &source[p.y * image.step1() + p.x]; + + dl[0] = cc; + + //right + if( p.x < (width - 1) && dl[ +1] == -1 && inInt(sp[0], sp[+1])) + *top++ = dot::make(p.x + 1, p.y); + + //left + if( p.x > 0 && dl[-1] == -1 && inInt(sp[0], sp[-1])) + *top++ = dot::make(p.x - 1, p.y); + + //bottom + if( p.y < (height - 1) && dl[+pitch] == -1 && inInt(sp[0], sp[+image.step1()])) + *top++ = dot::make(p.x, p.y + 1); + + //top + if( p.y > 0 && dl[-pitch] == -1 && inInt(sp[0], sp[-image.step1()])) + *top++ = dot::make(p.x, p.y - 1); + + p = *--top; + } + } + } + + cv::Mat image; + cv::Mat _labels; + dot* stack; + }; +} + +GPU_PERF_TEST(ConnectedComponents, cv::gpu::DeviceInfo, cv::Size) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Mat image = readImage("gpu/labeling/aloe-disp.png", cv::IMREAD_GRAYSCALE); + + GreedyLabeling host(image); + + host(host._labels); + + declare.time(1.0); + + TEST_CYCLE() + { + host(host._labels); + } +} + +INSTANTIATE_TEST_CASE_P(Labeling, ConnectedComponents, testing::Combine(ALL_DEVICES, testing::Values(cv::Size(261, 262)))); + +#endif \ No newline at end of file diff --git a/modules/gpu/src/cuda/ccomponetns.cu b/modules/gpu/src/cuda/ccomponetns.cu index 11d4742df8..1f9dc114ef 100644 --- a/modules/gpu/src/cuda/ccomponetns.cu +++ b/modules/gpu/src/cuda/ccomponetns.cu @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -255,8 +256,7 @@ namespace cv { namespace gpu { namespace device edgesTile[yloc][xloc] = c; } - - for (int i = 0; ; ++i) + for (int k = 0; ;++k) { //1. backup #pragma unroll @@ -312,11 +312,12 @@ namespace cv { namespace gpu { namespace device if (new_labels[i][j] < old_labels[i][j]) { changed = 1; - atomicMin(&labelsTile[0][0] + old_labels[i][j], new_labels[i][j]); + Emulation::smem::atomicMin(&labelsTile[0][0] + old_labels[i][j], new_labels[i][j]); } } - changed = __syncthreads_or(changed); + changed = Emulation::sycthOr(changed); + if (!changed) break; diff --git a/modules/gpu/src/graphcuts.cpp b/modules/gpu/src/graphcuts.cpp index e0a43f2cb8..58fcde8f09 100644 --- a/modules/gpu/src/graphcuts.cpp +++ b/modules/gpu/src/graphcuts.cpp @@ -1,284 +1,286 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other GpuMaterials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or bpied warranties, including, but not limited to, the bpied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" - -#if !defined (HAVE_CUDA) - -void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } -void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } - -void cv::gpu::connectivityMask(const GpuMat&, GpuMat&, const cv::Scalar&, const cv::Scalar&, Stream&) { throw_nogpu(); } -void cv::gpu::labelComponents(const GpuMat& mask, GpuMat& components, int, Stream& stream) { throw_nogpu(); } - -#else /* !defined (HAVE_CUDA) */ - -namespace cv { namespace gpu { namespace device -{ - namespace ccl - { - void labelComponents(const DevMem2D& edges, DevMem2Di comps, int flags, cudaStream_t stream); - - template - void computeEdges(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream); - } -}}} - - -float4 scalarToCudaType(const cv::Scalar& in) -{ - float4 res; - res.x = in[0]; res.y = in[1]; res.z = in[2]; res.w = in[3]; - return res; -} - - -void cv::gpu::connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scalar& lo, const cv::Scalar& hi, Stream& s) -{ - CV_Assert(!image.empty()); - - int ch = image.channels(); - CV_Assert(ch <= 4); - - int depth = image.depth(); - - typedef void (*func_t)(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream); - - static const func_t suppotLookup[8][4] = - { // 1, 2, 3, 4 - { device::ccl::computeEdges, 0, device::ccl::computeEdges, device::ccl::computeEdges },// CV_8U - { 0, 0, 0, 0 },// CV_16U - { device::ccl::computeEdges, 0, device::ccl::computeEdges, device::ccl::computeEdges },// CV_8S - { 0, 0, 0, 0 },// CV_16S - { device::ccl::computeEdges, 0, 0, 0 },// CV_32S - { device::ccl::computeEdges, 0, 0, 0 },// CV_32F - { 0, 0, 0, 0 },// CV_64F - { 0, 0, 0, 0 } // CV_USRTYPE1 - }; - - func_t f = suppotLookup[depth][ch - 1]; - CV_Assert(f); - - if (image.size() != mask.size() || mask.type() != CV_8UC1) - mask.create(image.size(), CV_8UC1); - - cudaStream_t stream = StreamAccessor::getStream(s); - float4 culo = scalarToCudaType(lo), cuhi = scalarToCudaType(hi); - f(image, mask, culo, cuhi, stream); -} - -void cv::gpu::labelComponents(const GpuMat& mask, GpuMat& components, int flags, Stream& s) -{ - CV_Assert(!mask.empty() && mask.type() == CV_8U); - - if (mask.size() != components.size() || components.type() != CV_32SC1) - components.create(mask.size(), CV_32SC1); - - cudaStream_t stream = StreamAccessor::getStream(s); - device::ccl::labelComponents(mask, components, flags, stream); -} - -namespace -{ - typedef NppStatus (*init_func_t)(NppiSize oSize, NppiGraphcutState** ppState, Npp8u* pDeviceMem); - - class NppiGraphcutStateHandler - { - public: - NppiGraphcutStateHandler(NppiSize sznpp, Npp8u* pDeviceMem, const init_func_t func) - { - nppSafeCall( func(sznpp, &pState, pDeviceMem) ); - } - - ~NppiGraphcutStateHandler() - { - nppSafeCall( nppiGraphcutFree(pState) ); - } - - operator NppiGraphcutState*() - { - return pState; - } - - private: - NppiGraphcutState* pState; - }; -} - -void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf, Stream& s) -{ -#if (CUDA_VERSION < 5000) - CV_Assert(terminals.type() == CV_32S); -#else - CV_Assert(terminals.type() == CV_32S || terminals.type() == CV_32F); -#endif - - Size src_size = terminals.size(); - - CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width)); - CV_Assert(leftTransp.type() == terminals.type()); - - CV_Assert(rightTransp.size() == Size(src_size.height, src_size.width)); - CV_Assert(rightTransp.type() == terminals.type()); - - CV_Assert(top.size() == src_size); - CV_Assert(top.type() == terminals.type()); - - CV_Assert(bottom.size() == src_size); - CV_Assert(bottom.type() == terminals.type()); - - labels.create(src_size, CV_8U); - - NppiSize sznpp; - sznpp.width = src_size.width; - sznpp.height = src_size.height; - - int bufsz; - nppSafeCall( nppiGraphcutGetSize(sznpp, &bufsz) ); - - ensureSizeIsEnough(1, bufsz, CV_8U, buf); - - cudaStream_t stream = StreamAccessor::getStream(s); - - NppStreamHandler h(stream); - - NppiGraphcutStateHandler state(sznpp, buf.ptr(), nppiGraphcutInitAlloc); - -#if (CUDA_VERSION < 5000) - nppSafeCall( nppiGraphcut_32s8u(terminals.ptr(), leftTransp.ptr(), rightTransp.ptr(), top.ptr(), bottom.ptr(), - static_cast(terminals.step), static_cast(leftTransp.step), sznpp, labels.ptr(), static_cast(labels.step), state) ); -#else - if (terminals.type() == CV_32S) - { - nppSafeCall( nppiGraphcut_32s8u(terminals.ptr(), leftTransp.ptr(), rightTransp.ptr(), top.ptr(), bottom.ptr(), - static_cast(terminals.step), static_cast(leftTransp.step), sznpp, labels.ptr(), static_cast(labels.step), state) ); - } - else - { - nppSafeCall( nppiGraphcut_32f8u(terminals.ptr(), leftTransp.ptr(), rightTransp.ptr(), top.ptr(), bottom.ptr(), - static_cast(terminals.step), static_cast(leftTransp.step), sznpp, labels.ptr(), static_cast(labels.step), state) ); - } -#endif - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); -} - -void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight, - GpuMat& bottom, GpuMat& bottomLeft, GpuMat& bottomRight, GpuMat& labels, GpuMat& buf, Stream& s) -{ -#if (CUDA_VERSION < 5000) - CV_Assert(terminals.type() == CV_32S); -#else - CV_Assert(terminals.type() == CV_32S || terminals.type() == CV_32F); -#endif - - Size src_size = terminals.size(); - - CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width)); - CV_Assert(leftTransp.type() == terminals.type()); - - CV_Assert(rightTransp.size() == Size(src_size.height, src_size.width)); - CV_Assert(rightTransp.type() == terminals.type()); - - CV_Assert(top.size() == src_size); - CV_Assert(top.type() == terminals.type()); - - CV_Assert(topLeft.size() == src_size); - CV_Assert(topLeft.type() == terminals.type()); - - CV_Assert(topRight.size() == src_size); - CV_Assert(topRight.type() == terminals.type()); - - CV_Assert(bottom.size() == src_size); - CV_Assert(bottom.type() == terminals.type()); - - CV_Assert(bottomLeft.size() == src_size); - CV_Assert(bottomLeft.type() == terminals.type()); - - CV_Assert(bottomRight.size() == src_size); - CV_Assert(bottomRight.type() == terminals.type()); - - labels.create(src_size, CV_8U); - - NppiSize sznpp; - sznpp.width = src_size.width; - sznpp.height = src_size.height; - - int bufsz; - nppSafeCall( nppiGraphcut8GetSize(sznpp, &bufsz) ); - - ensureSizeIsEnough(1, bufsz, CV_8U, buf); - - cudaStream_t stream = StreamAccessor::getStream(s); - - NppStreamHandler h(stream); - - NppiGraphcutStateHandler state(sznpp, buf.ptr(), nppiGraphcut8InitAlloc); - -#if (CUDA_VERSION < 5000) - nppSafeCall( nppiGraphcut8_32s8u(terminals.ptr(), leftTransp.ptr(), rightTransp.ptr(), - top.ptr(), topLeft.ptr(), topRight.ptr(), - bottom.ptr(), bottomLeft.ptr(), bottomRight.ptr(), - static_cast(terminals.step), static_cast(leftTransp.step), sznpp, labels.ptr(), static_cast(labels.step), state) ); -#else - if (terminals.type() == CV_32S) - { - nppSafeCall( nppiGraphcut8_32s8u(terminals.ptr(), leftTransp.ptr(), rightTransp.ptr(), - top.ptr(), topLeft.ptr(), topRight.ptr(), - bottom.ptr(), bottomLeft.ptr(), bottomRight.ptr(), - static_cast(terminals.step), static_cast(leftTransp.step), sznpp, labels.ptr(), static_cast(labels.step), state) ); - } - else - { - nppSafeCall( nppiGraphcut8_32f8u(terminals.ptr(), leftTransp.ptr(), rightTransp.ptr(), - top.ptr(), topLeft.ptr(), topRight.ptr(), - bottom.ptr(), bottomLeft.ptr(), bottomRight.ptr(), - static_cast(terminals.step), static_cast(leftTransp.step), sznpp, labels.ptr(), static_cast(labels.step), state) ); - } -#endif - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); -} - -#endif /* !defined (HAVE_CUDA) */ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other GpuMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or bpied warranties, including, but not limited to, the bpied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +#if !defined (HAVE_CUDA) + +void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } +void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } + +void cv::gpu::connectivityMask(const GpuMat&, GpuMat&, const cv::Scalar&, const cv::Scalar&, Stream&) { throw_nogpu(); } +void cv::gpu::labelComponents(const GpuMat& mask, GpuMat& components, int, Stream& stream) { throw_nogpu(); } + +#else /* !defined (HAVE_CUDA) */ + +namespace cv { namespace gpu { namespace device +{ + namespace ccl + { + void labelComponents(const DevMem2D& edges, DevMem2Di comps, int flags, cudaStream_t stream); + + template + void computeEdges(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream); + } +}}} + + +float4 scalarToCudaType(const cv::Scalar& in) +{ + float4 res; + res.x = in[0]; res.y = in[1]; res.z = in[2]; res.w = in[3]; + return res; +} + + +void cv::gpu::connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scalar& lo, const cv::Scalar& hi, Stream& s) +{ + CV_Assert(!image.empty()); + + int ch = image.channels(); + CV_Assert(ch <= 4); + + int depth = image.depth(); + + typedef void (*func_t)(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream); + + static const func_t suppotLookup[8][4] = + { // 1, 2, 3, 4 + { device::ccl::computeEdges, 0, device::ccl::computeEdges, device::ccl::computeEdges },// CV_8U + { 0, 0, 0, 0 },// CV_16U + { device::ccl::computeEdges, 0, device::ccl::computeEdges, device::ccl::computeEdges },// CV_8S + { 0, 0, 0, 0 },// CV_16S + { device::ccl::computeEdges, 0, 0, 0 },// CV_32S + { device::ccl::computeEdges, 0, 0, 0 },// CV_32F + { 0, 0, 0, 0 },// CV_64F + { 0, 0, 0, 0 } // CV_USRTYPE1 + }; + + func_t f = suppotLookup[depth][ch - 1]; + CV_Assert(f); + + if (image.size() != mask.size() || mask.type() != CV_8UC1) + mask.create(image.size(), CV_8UC1); + + cudaStream_t stream = StreamAccessor::getStream(s); + float4 culo = scalarToCudaType(lo), cuhi = scalarToCudaType(hi); + f(image, mask, culo, cuhi, stream); +} + +void cv::gpu::labelComponents(const GpuMat& mask, GpuMat& components, int flags, Stream& s) +{ + if (!TargetArchs::builtWith(SHARED_ATOMICS) || !DeviceInfo().supports(SHARED_ATOMICS)) + CV_Error(CV_StsNotImplemented, "The device doesn't support shared atomics and communicative synchronization!"); + CV_Assert(!mask.empty() && mask.type() == CV_8U); + + if (mask.size() != components.size() || components.type() != CV_32SC1) + components.create(mask.size(), CV_32SC1); + + cudaStream_t stream = StreamAccessor::getStream(s); + device::ccl::labelComponents(mask, components, flags, stream); +} + +namespace +{ + typedef NppStatus (*init_func_t)(NppiSize oSize, NppiGraphcutState** ppState, Npp8u* pDeviceMem); + + class NppiGraphcutStateHandler + { + public: + NppiGraphcutStateHandler(NppiSize sznpp, Npp8u* pDeviceMem, const init_func_t func) + { + nppSafeCall( func(sznpp, &pState, pDeviceMem) ); + } + + ~NppiGraphcutStateHandler() + { + nppSafeCall( nppiGraphcutFree(pState) ); + } + + operator NppiGraphcutState*() + { + return pState; + } + + private: + NppiGraphcutState* pState; + }; +} + +void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf, Stream& s) +{ +#if (CUDA_VERSION < 5000) + CV_Assert(terminals.type() == CV_32S); +#else + CV_Assert(terminals.type() == CV_32S || terminals.type() == CV_32F); +#endif + + Size src_size = terminals.size(); + + CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width)); + CV_Assert(leftTransp.type() == terminals.type()); + + CV_Assert(rightTransp.size() == Size(src_size.height, src_size.width)); + CV_Assert(rightTransp.type() == terminals.type()); + + CV_Assert(top.size() == src_size); + CV_Assert(top.type() == terminals.type()); + + CV_Assert(bottom.size() == src_size); + CV_Assert(bottom.type() == terminals.type()); + + labels.create(src_size, CV_8U); + + NppiSize sznpp; + sznpp.width = src_size.width; + sznpp.height = src_size.height; + + int bufsz; + nppSafeCall( nppiGraphcutGetSize(sznpp, &bufsz) ); + + ensureSizeIsEnough(1, bufsz, CV_8U, buf); + + cudaStream_t stream = StreamAccessor::getStream(s); + + NppStreamHandler h(stream); + + NppiGraphcutStateHandler state(sznpp, buf.ptr(), nppiGraphcutInitAlloc); + +#if (CUDA_VERSION < 5000) + nppSafeCall( nppiGraphcut_32s8u(terminals.ptr(), leftTransp.ptr(), rightTransp.ptr(), top.ptr(), bottom.ptr(), + static_cast(terminals.step), static_cast(leftTransp.step), sznpp, labels.ptr(), static_cast(labels.step), state) ); +#else + if (terminals.type() == CV_32S) + { + nppSafeCall( nppiGraphcut_32s8u(terminals.ptr(), leftTransp.ptr(), rightTransp.ptr(), top.ptr(), bottom.ptr(), + static_cast(terminals.step), static_cast(leftTransp.step), sznpp, labels.ptr(), static_cast(labels.step), state) ); + } + else + { + nppSafeCall( nppiGraphcut_32f8u(terminals.ptr(), leftTransp.ptr(), rightTransp.ptr(), top.ptr(), bottom.ptr(), + static_cast(terminals.step), static_cast(leftTransp.step), sznpp, labels.ptr(), static_cast(labels.step), state) ); + } +#endif + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); +} + +void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight, + GpuMat& bottom, GpuMat& bottomLeft, GpuMat& bottomRight, GpuMat& labels, GpuMat& buf, Stream& s) +{ +#if (CUDA_VERSION < 5000) + CV_Assert(terminals.type() == CV_32S); +#else + CV_Assert(terminals.type() == CV_32S || terminals.type() == CV_32F); +#endif + + Size src_size = terminals.size(); + + CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width)); + CV_Assert(leftTransp.type() == terminals.type()); + + CV_Assert(rightTransp.size() == Size(src_size.height, src_size.width)); + CV_Assert(rightTransp.type() == terminals.type()); + + CV_Assert(top.size() == src_size); + CV_Assert(top.type() == terminals.type()); + + CV_Assert(topLeft.size() == src_size); + CV_Assert(topLeft.type() == terminals.type()); + + CV_Assert(topRight.size() == src_size); + CV_Assert(topRight.type() == terminals.type()); + + CV_Assert(bottom.size() == src_size); + CV_Assert(bottom.type() == terminals.type()); + + CV_Assert(bottomLeft.size() == src_size); + CV_Assert(bottomLeft.type() == terminals.type()); + + CV_Assert(bottomRight.size() == src_size); + CV_Assert(bottomRight.type() == terminals.type()); + + labels.create(src_size, CV_8U); + + NppiSize sznpp; + sznpp.width = src_size.width; + sznpp.height = src_size.height; + + int bufsz; + nppSafeCall( nppiGraphcut8GetSize(sznpp, &bufsz) ); + + ensureSizeIsEnough(1, bufsz, CV_8U, buf); + + cudaStream_t stream = StreamAccessor::getStream(s); + + NppStreamHandler h(stream); + + NppiGraphcutStateHandler state(sznpp, buf.ptr(), nppiGraphcut8InitAlloc); + +#if (CUDA_VERSION < 5000) + nppSafeCall( nppiGraphcut8_32s8u(terminals.ptr(), leftTransp.ptr(), rightTransp.ptr(), + top.ptr(), topLeft.ptr(), topRight.ptr(), + bottom.ptr(), bottomLeft.ptr(), bottomRight.ptr(), + static_cast(terminals.step), static_cast(leftTransp.step), sznpp, labels.ptr(), static_cast(labels.step), state) ); +#else + if (terminals.type() == CV_32S) + { + nppSafeCall( nppiGraphcut8_32s8u(terminals.ptr(), leftTransp.ptr(), rightTransp.ptr(), + top.ptr(), topLeft.ptr(), topRight.ptr(), + bottom.ptr(), bottomLeft.ptr(), bottomRight.ptr(), + static_cast(terminals.step), static_cast(leftTransp.step), sznpp, labels.ptr(), static_cast(labels.step), state) ); + } + else + { + nppSafeCall( nppiGraphcut8_32f8u(terminals.ptr(), leftTransp.ptr(), rightTransp.ptr(), + top.ptr(), topLeft.ptr(), topRight.ptr(), + bottom.ptr(), bottomLeft.ptr(), bottomRight.ptr(), + static_cast(terminals.step), static_cast(leftTransp.step), sznpp, labels.ptr(), static_cast(labels.step), state) ); + } +#endif + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); +} + +#endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/src/opencv2/gpu/device/emulation.hpp b/modules/gpu/src/opencv2/gpu/device/emulation.hpp index fe5452b5cd..0999495aeb 100644 --- a/modules/gpu/src/opencv2/gpu/device/emulation.hpp +++ b/modules/gpu/src/opencv2/gpu/device/emulation.hpp @@ -1,126 +1,137 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or bpied warranties, including, but not limited to, the bpied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef OPENCV_GPU_EMULATION_HPP_ -#define OPENCV_GPU_EMULATION_HPP_ - -#include "warp_reduce.hpp" -#include - -namespace cv { namespace gpu { namespace device -{ - struct Emulation - { - template - static __forceinline__ __device__ int Ballot(int predicate) - { -#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200) - return __ballot(predicate); -#else - __shared__ volatile int cta_buffer[CTA_SIZE]; - - int tid = threadIdx.x; - cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0; - return warp_reduce(cta_buffer); -#endif - } - - struct smem - { - enum { TAG_MASK = (1U << ( (sizeof(unsigned int) << 3) - 5U)) - 1U }; - - template - static __device__ __forceinline__ T atomicInc(T* address, T val) - { -#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120) - T count; - unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U); - do - { - count = *address & TAG_MASK; - count = tag | (count + 1); - *address = count; - } while (*address != count); - - return (count & TAG_MASK) - 1; -#else - return ::atomicInc(address, val); -#endif - } - - template - static __device__ __forceinline__ void atomicAdd(T* address, T val) - { -#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120) - T count; - unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U); - do - { - count = *address & TAG_MASK; - count = tag | (count + val); - *address = count; - } while (*address != count); -#else - ::atomicAdd(address, val); -#endif - } - - template - static __device__ __forceinline__ T atomicMin(T* address, T val) - { -#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120) - T count = min(*address, val); - do - { - *address = count; - } while (*address > count); - - return count; -#else - return ::atomicMin(address, val); -#endif - } - }; - }; -}}} // namespace cv { namespace gpu { namespace device - +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or bpied warranties, including, but not limited to, the bpied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_GPU_EMULATION_HPP_ +#define OPENCV_GPU_EMULATION_HPP_ + +#include "warp_reduce.hpp" +#include + +namespace cv { namespace gpu { namespace device +{ + struct Emulation + { + + static __device__ __forceinline__ int sycthOr(int pred) + { +#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120) + // just campilation stab + return false; +#else + return __syncthreads_or(pred); +#endif + } + + template + static __forceinline__ __device__ int Ballot(int predicate) + { +#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200) + return __ballot(predicate); +#else + __shared__ volatile int cta_buffer[CTA_SIZE]; + + int tid = threadIdx.x; + cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0; + return warp_reduce(cta_buffer); +#endif + } + + struct smem + { + enum { TAG_MASK = (1U << ( (sizeof(unsigned int) << 3) - 5U)) - 1U }; + + template + static __device__ __forceinline__ T atomicInc(T* address, T val) + { +#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120) + T count; + unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U); + do + { + count = *address & TAG_MASK; + count = tag | (count + 1); + *address = count; + } while (*address != count); + + return (count & TAG_MASK) - 1; +#else + return ::atomicInc(address, val); +#endif + } + + template + static __device__ __forceinline__ void atomicAdd(T* address, T val) + { +#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120) + T count; + unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U); + do + { + count = *address & TAG_MASK; + count = tag | (count + val); + *address = count; + } while (*address != count); +#else + ::atomicAdd(address, val); +#endif + } + + template + static __device__ __forceinline__ T atomicMin(T* address, T val) + { +#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120) + T count = min(*address, val); + do + { + *address = count; + } while (*address > count); + + return count; +#else + return ::atomicMin(address, val); +#endif + } + }; + }; +}}} // namespace cv { namespace gpu { namespace device + #endif /* OPENCV_GPU_EMULATION_HPP_ */ \ No newline at end of file diff --git a/modules/gpu/test/test_labeling.cpp b/modules/gpu/test/test_labeling.cpp index ae9bb8e91c..c88109af19 100644 --- a/modules/gpu/test/test_labeling.cpp +++ b/modules/gpu/test/test_labeling.cpp @@ -164,7 +164,7 @@ struct Labeling : testing::TestWithParam cv::Mat loat_image() { - return cv::imread(std::string( cvtest::TS::ptr()->get_data_path() ) + "labeling/label.png"); + return cv::imread(std::string( cvtest::TS::ptr()->get_data_path() ) + "labeling/IMG_0727.JPG"); } }; @@ -173,6 +173,8 @@ TEST_P(Labeling, ConnectedComponents) cv::Mat image; cvtColor(loat_image(), image, CV_BGR2GRAY); + cv::threshold(image, image, 150, 255, CV_THRESH_BINARY); + ASSERT_TRUE(image.type() == CV_8UC1); GreedyLabeling host(image); @@ -189,6 +191,10 @@ TEST_P(Labeling, ConnectedComponents) ASSERT_NO_THROW(cv::gpu::labelComponents(mask, components)); host.checkCorrectness(cv::Mat(components)); + cv::imshow("test", image); + cv::waitKey(0); + cv::imshow("test", host._labels); + cv::waitKey(0); } INSTANTIATE_TEST_CASE_P(ConnectedComponents, Labeling, ALL_DEVICES);