Merge pull request #1540 from jet47:gpuarithm-cudev
commit
21233656bd
64 changed files with 5049 additions and 8607 deletions
@ -1,145 +0,0 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __ARITHM_FUNC_TRAITS_HPP__ |
||||
#define __ARITHM_FUNC_TRAITS_HPP__ |
||||
|
||||
#include <cstddef> |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <size_t src_size, size_t dst_size> struct ArithmFuncTraits |
||||
{ |
||||
enum { simple_block_dim_x = 32 }; |
||||
enum { simple_block_dim_y = 8 }; |
||||
|
||||
enum { smart_block_dim_x = 32 }; |
||||
enum { smart_block_dim_y = 8 }; |
||||
enum { smart_shift = 1 }; |
||||
}; |
||||
|
||||
template <> struct ArithmFuncTraits<1, 1> |
||||
{ |
||||
enum { simple_block_dim_x = 32 }; |
||||
enum { simple_block_dim_y = 8 }; |
||||
|
||||
enum { smart_block_dim_x = 32 }; |
||||
enum { smart_block_dim_y = 8 }; |
||||
enum { smart_shift = 4 }; |
||||
}; |
||||
template <> struct ArithmFuncTraits<1, 2> |
||||
{ |
||||
enum { simple_block_dim_x = 32 }; |
||||
enum { simple_block_dim_y = 8 }; |
||||
|
||||
enum { smart_block_dim_x = 32 }; |
||||
enum { smart_block_dim_y = 8 }; |
||||
enum { smart_shift = 4 }; |
||||
}; |
||||
template <> struct ArithmFuncTraits<1, 4> |
||||
{ |
||||
enum { simple_block_dim_x = 32 }; |
||||
enum { simple_block_dim_y = 8 }; |
||||
|
||||
enum { smart_block_dim_x = 32 }; |
||||
enum { smart_block_dim_y = 8 }; |
||||
enum { smart_shift = 4 }; |
||||
}; |
||||
|
||||
template <> struct ArithmFuncTraits<2, 1> |
||||
{ |
||||
enum { simple_block_dim_x = 32 }; |
||||
enum { simple_block_dim_y = 8 }; |
||||
|
||||
enum { smart_block_dim_x = 32 }; |
||||
enum { smart_block_dim_y = 8 }; |
||||
enum { smart_shift = 4 }; |
||||
}; |
||||
template <> struct ArithmFuncTraits<2, 2> |
||||
{ |
||||
enum { simple_block_dim_x = 32 }; |
||||
enum { simple_block_dim_y = 8 }; |
||||
|
||||
enum { smart_block_dim_x = 32 }; |
||||
enum { smart_block_dim_y = 8 }; |
||||
enum { smart_shift = 4 }; |
||||
}; |
||||
template <> struct ArithmFuncTraits<2, 4> |
||||
{ |
||||
enum { simple_block_dim_x = 32 }; |
||||
enum { simple_block_dim_y = 8 }; |
||||
|
||||
enum { smart_block_dim_x = 32 }; |
||||
enum { smart_block_dim_y = 8 }; |
||||
enum { smart_shift = 4 }; |
||||
}; |
||||
|
||||
template <> struct ArithmFuncTraits<4, 1> |
||||
{ |
||||
enum { simple_block_dim_x = 32 }; |
||||
enum { simple_block_dim_y = 8 }; |
||||
|
||||
enum { smart_block_dim_x = 32 }; |
||||
enum { smart_block_dim_y = 8 }; |
||||
enum { smart_shift = 4 }; |
||||
}; |
||||
template <> struct ArithmFuncTraits<4, 2> |
||||
{ |
||||
enum { simple_block_dim_x = 32 }; |
||||
enum { simple_block_dim_y = 8 }; |
||||
|
||||
enum { smart_block_dim_x = 32 }; |
||||
enum { smart_block_dim_y = 8 }; |
||||
enum { smart_shift = 4 }; |
||||
}; |
||||
template <> struct ArithmFuncTraits<4, 4> |
||||
{ |
||||
enum { simple_block_dim_x = 32 }; |
||||
enum { simple_block_dim_y = 8 }; |
||||
|
||||
enum { smart_block_dim_x = 32 }; |
||||
enum { smart_block_dim_y = 8 }; |
||||
enum { smart_shift = 4 }; |
||||
}; |
||||
} |
||||
|
||||
#endif // __ARITHM_FUNC_TRAITS_HPP__
|
@ -0,0 +1,207 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#include "opencv2/opencv_modules.hpp" |
||||
|
||||
#ifndef HAVE_OPENCV_CUDEV |
||||
|
||||
#error "opencv_cudev is required" |
||||
|
||||
#else |
||||
|
||||
#include "opencv2/cudaarithm.hpp" |
||||
#include "opencv2/cudev.hpp" |
||||
|
||||
using namespace cv; |
||||
using namespace cv::cudev; |
||||
|
||||
namespace |
||||
{ |
||||
texture<uchar, cudaTextureType1D, cudaReadModeElementType> texLutTable; |
||||
|
||||
class LookUpTableImpl : public LookUpTable |
||||
{ |
||||
public: |
||||
LookUpTableImpl(InputArray lut); |
||||
~LookUpTableImpl(); |
||||
|
||||
void transform(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); |
||||
|
||||
private: |
||||
GpuMat d_lut; |
||||
cudaTextureObject_t texLutTableObj; |
||||
bool cc30; |
||||
}; |
||||
|
||||
LookUpTableImpl::LookUpTableImpl(InputArray _lut) |
||||
{ |
||||
if (_lut.kind() == _InputArray::GPU_MAT) |
||||
{ |
||||
d_lut = _lut.getGpuMat(); |
||||
} |
||||
else |
||||
{ |
||||
Mat h_lut = _lut.getMat(); |
||||
d_lut.upload(Mat(1, 256, h_lut.type(), h_lut.data)); |
||||
} |
||||
|
||||
CV_Assert( d_lut.depth() == CV_8U ); |
||||
CV_Assert( d_lut.rows == 1 && d_lut.cols == 256 ); |
||||
|
||||
cc30 = deviceSupports(FEATURE_SET_COMPUTE_30); |
||||
|
||||
if (cc30) |
||||
{ |
||||
// Use the texture object |
||||
cudaResourceDesc texRes; |
||||
std::memset(&texRes, 0, sizeof(texRes)); |
||||
texRes.resType = cudaResourceTypeLinear; |
||||
texRes.res.linear.devPtr = d_lut.data; |
||||
texRes.res.linear.desc = cudaCreateChannelDesc<uchar>(); |
||||
texRes.res.linear.sizeInBytes = 256 * d_lut.channels() * sizeof(uchar); |
||||
|
||||
cudaTextureDesc texDescr; |
||||
std::memset(&texDescr, 0, sizeof(texDescr)); |
||||
|
||||
CV_CUDEV_SAFE_CALL( cudaCreateTextureObject(&texLutTableObj, &texRes, &texDescr, 0) ); |
||||
} |
||||
else |
||||
{ |
||||
// Use the texture reference |
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar>(); |
||||
CV_CUDEV_SAFE_CALL( cudaBindTexture(0, &texLutTable, d_lut.data, &desc) ); |
||||
} |
||||
} |
||||
|
||||
LookUpTableImpl::~LookUpTableImpl() |
||||
{ |
||||
if (cc30) |
||||
{ |
||||
// Use the texture object |
||||
cudaDestroyTextureObject(texLutTableObj); |
||||
} |
||||
else |
||||
{ |
||||
// Use the texture reference |
||||
cudaUnbindTexture(texLutTable); |
||||
} |
||||
} |
||||
|
||||
struct LutTablePtrC1 |
||||
{ |
||||
typedef uchar value_type; |
||||
typedef uchar index_type; |
||||
|
||||
cudaTextureObject_t texLutTableObj; |
||||
|
||||
__device__ __forceinline__ uchar operator ()(uchar, uchar x) const |
||||
{ |
||||
#if CV_CUDEV_ARCH < 300 |
||||
// Use the texture reference |
||||
return tex1Dfetch(texLutTable, x); |
||||
#else |
||||
// Use the texture object |
||||
return tex1Dfetch<uchar>(texLutTableObj, x); |
||||
#endif |
||||
} |
||||
}; |
||||
struct LutTablePtrC3 |
||||
{ |
||||
typedef uchar3 value_type; |
||||
typedef uchar3 index_type; |
||||
|
||||
cudaTextureObject_t texLutTableObj; |
||||
|
||||
__device__ __forceinline__ uchar3 operator ()(const uchar3&, const uchar3& x) const |
||||
{ |
||||
#if CV_CUDEV_ARCH < 300 |
||||
// Use the texture reference |
||||
return make_uchar3(tex1Dfetch(texLutTable, x.x * 3), tex1Dfetch(texLutTable, x.y * 3 + 1), tex1Dfetch(texLutTable, x.z * 3 + 2)); |
||||
#else |
||||
// Use the texture object |
||||
return make_uchar3(tex1Dfetch<uchar>(texLutTableObj, x.x * 3), tex1Dfetch<uchar>(texLutTableObj, x.y * 3 + 1), tex1Dfetch<uchar>(texLutTableObj, x.z * 3 + 2)); |
||||
#endif |
||||
} |
||||
}; |
||||
|
||||
void LookUpTableImpl::transform(InputArray _src, OutputArray _dst, Stream& stream) |
||||
{ |
||||
GpuMat src = _src.getGpuMat(); |
||||
|
||||
const int cn = src.channels(); |
||||
const int lut_cn = d_lut.channels(); |
||||
|
||||
CV_Assert( src.type() == CV_8UC1 || src.type() == CV_8UC3 ); |
||||
CV_Assert( lut_cn == 1 || lut_cn == cn ); |
||||
|
||||
_dst.create(src.size(), src.type()); |
||||
GpuMat dst = _dst.getGpuMat(); |
||||
|
||||
if (lut_cn == 1) |
||||
{ |
||||
GpuMat_<uchar> src1(src.reshape(1)); |
||||
GpuMat_<uchar> dst1(dst.reshape(1)); |
||||
|
||||
LutTablePtrC1 tbl; |
||||
tbl.texLutTableObj = texLutTableObj; |
||||
|
||||
dst1.assign(lut_(src1, tbl), stream); |
||||
} |
||||
else if (lut_cn == 3) |
||||
{ |
||||
GpuMat_<uchar3>& src3 = (GpuMat_<uchar3>&) src; |
||||
GpuMat_<uchar3>& dst3 = (GpuMat_<uchar3>&) dst; |
||||
|
||||
LutTablePtrC3 tbl; |
||||
tbl.texLutTableObj = texLutTableObj; |
||||
|
||||
dst3.assign(lut_(src3, tbl), stream); |
||||
} |
||||
} |
||||
} |
||||
|
||||
Ptr<LookUpTable> cv::cuda::createLookUpTable(InputArray lut) |
||||
{ |
||||
return makePtr<LookUpTableImpl>(lut); |
||||
} |
||||
|
||||
#endif |
@ -0,0 +1,119 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#include "opencv2/opencv_modules.hpp" |
||||
|
||||
#ifndef HAVE_OPENCV_CUDEV |
||||
|
||||
#error "opencv_cudev is required" |
||||
|
||||
#else |
||||
|
||||
#include "opencv2/cudaarithm.hpp" |
||||
#include "opencv2/cudev.hpp" |
||||
|
||||
using namespace cv::cudev; |
||||
|
||||
namespace |
||||
{ |
||||
double normDiffInf(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf) |
||||
{ |
||||
const GpuMat_<uchar>& src1 = (const GpuMat_<uchar>&) _src1; |
||||
const GpuMat_<uchar>& src2 = (const GpuMat_<uchar>&) _src2; |
||||
GpuMat_<int>& buf = (GpuMat_<int>&) _buf; |
||||
|
||||
gridFindMinMaxVal(abs_(cvt_<int>(src1) - cvt_<int>(src2)), buf); |
||||
|
||||
int data[2]; |
||||
buf.download(cv::Mat(1, 2, buf.type(), data)); |
||||
|
||||
return data[1]; |
||||
} |
||||
|
||||
double normDiffL1(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf) |
||||
{ |
||||
const GpuMat_<uchar>& src1 = (const GpuMat_<uchar>&) _src1; |
||||
const GpuMat_<uchar>& src2 = (const GpuMat_<uchar>&) _src2; |
||||
GpuMat_<int>& buf = (GpuMat_<int>&) _buf; |
||||
|
||||
gridCalcSum(abs_(cvt_<int>(src1) - cvt_<int>(src2)), buf); |
||||
|
||||
int data; |
||||
buf.download(cv::Mat(1, 1, buf.type(), &data)); |
||||
|
||||
return data; |
||||
} |
||||
|
||||
double normDiffL2(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf) |
||||
{ |
||||
const GpuMat_<uchar>& src1 = (const GpuMat_<uchar>&) _src1; |
||||
const GpuMat_<uchar>& src2 = (const GpuMat_<uchar>&) _src2; |
||||
GpuMat_<double>& buf = (GpuMat_<double>&) _buf; |
||||
|
||||
gridCalcSum(sqr_(cvt_<double>(src1) - cvt_<double>(src2)), buf); |
||||
|
||||
double data; |
||||
buf.download(cv::Mat(1, 1, buf.type(), &data)); |
||||
|
||||
return std::sqrt(data); |
||||
} |
||||
} |
||||
|
||||
double cv::cuda::norm(InputArray _src1, InputArray _src2, GpuMat& buf, int normType) |
||||
{ |
||||
typedef double (*func_t)(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf); |
||||
static const func_t funcs[] = |
||||
{ |
||||
0, normDiffInf, normDiffL1, 0, normDiffL2 |
||||
}; |
||||
|
||||
GpuMat src1 = _src1.getGpuMat(); |
||||
GpuMat src2 = _src2.getGpuMat(); |
||||
|
||||
CV_Assert( src1.type() == CV_8UC1 ); |
||||
CV_Assert( src1.size() == src2.size() && src1.type() == src2.type() ); |
||||
CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 ); |
||||
|
||||
return funcs[normType](src1, src2, buf); |
||||
} |
||||
|
||||
#endif |
@ -1,135 +0,0 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __UNROLL_DETAIL_HPP__ |
||||
#define __UNROLL_DETAIL_HPP__ |
||||
|
||||
#include <thrust/tuple.h> |
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/vec_traits.hpp" |
||||
|
||||
namespace detail |
||||
{ |
||||
template <int cn> struct Unroll; |
||||
template <> struct Unroll<1> |
||||
{ |
||||
template <int BLOCK_SIZE, typename R> |
||||
static __device__ __forceinline__ volatile R* smem_tuple(R* smem) |
||||
{ |
||||
return smem; |
||||
} |
||||
|
||||
template <typename R> |
||||
static __device__ __forceinline__ R& tie(R& val) |
||||
{ |
||||
return val; |
||||
} |
||||
|
||||
template <class Op> |
||||
static __device__ __forceinline__ const Op& op(const Op& op) |
||||
{ |
||||
return op; |
||||
} |
||||
}; |
||||
template <> struct Unroll<2> |
||||
{ |
||||
template <int BLOCK_SIZE, typename R> |
||||
static __device__ __forceinline__ thrust::tuple<volatile R*, volatile R*> smem_tuple(R* smem) |
||||
{ |
||||
return cv::cuda::device::smem_tuple(smem, smem + BLOCK_SIZE); |
||||
} |
||||
|
||||
template <typename R> |
||||
static __device__ __forceinline__ thrust::tuple<typename cv::cuda::device::VecTraits<R>::elem_type&, typename cv::cuda::device::VecTraits<R>::elem_type&> tie(R& val) |
||||
{ |
||||
return thrust::tie(val.x, val.y); |
||||
} |
||||
|
||||
template <class Op> |
||||
static __device__ __forceinline__ const thrust::tuple<Op, Op> op(const Op& op) |
||||
{ |
||||
return thrust::make_tuple(op, op); |
||||
} |
||||
}; |
||||
template <> struct Unroll<3> |
||||
{ |
||||
template <int BLOCK_SIZE, typename R> |
||||
static __device__ __forceinline__ thrust::tuple<volatile R*, volatile R*, volatile R*> smem_tuple(R* smem) |
||||
{ |
||||
return cv::cuda::device::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE); |
||||
} |
||||
|
||||
template <typename R> |
||||
static __device__ __forceinline__ thrust::tuple<typename cv::cuda::device::VecTraits<R>::elem_type&, typename cv::cuda::device::VecTraits<R>::elem_type&, typename cv::cuda::device::VecTraits<R>::elem_type&> tie(R& val) |
||||
{ |
||||
return thrust::tie(val.x, val.y, val.z); |
||||
} |
||||
|
||||
template <class Op> |
||||
static __device__ __forceinline__ const thrust::tuple<Op, Op, Op> op(const Op& op) |
||||
{ |
||||
return thrust::make_tuple(op, op, op); |
||||
} |
||||
}; |
||||
template <> struct Unroll<4> |
||||
{ |
||||
template <int BLOCK_SIZE, typename R> |
||||
static __device__ __forceinline__ thrust::tuple<volatile R*, volatile R*, volatile R*, volatile R*> smem_tuple(R* smem) |
||||
{ |
||||
return cv::cuda::device::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE); |
||||
} |
||||
|
||||
template <typename R> |
||||
static __device__ __forceinline__ thrust::tuple<typename cv::cuda::device::VecTraits<R>::elem_type&, typename cv::cuda::device::VecTraits<R>::elem_type&, typename cv::cuda::device::VecTraits<R>::elem_type&, typename cv::cuda::device::VecTraits<R>::elem_type&> tie(R& val) |
||||
{ |
||||
return thrust::tie(val.x, val.y, val.z, val.w); |
||||
} |
||||
|
||||
template <class Op> |
||||
static __device__ __forceinline__ const thrust::tuple<Op, Op, Op, Op> op(const Op& op) |
||||
{ |
||||
return thrust::make_tuple(op, op, op, op); |
||||
} |
||||
}; |
||||
} |
||||
|
||||
#endif // __UNROLL_DETAIL_HPP__
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,177 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#pragma once |
||||
|
||||
#ifndef __OPENCV_CUDEV_GRID_MINMAXLOC_DETAIL_HPP__ |
||||
#define __OPENCV_CUDEV_GRID_MINMAXLOC_DETAIL_HPP__ |
||||
|
||||
#include "../../common.hpp" |
||||
#include "../../util/vec_traits.hpp" |
||||
#include "../../util/type_traits.hpp" |
||||
#include "../../util/limits.hpp" |
||||
#include "../../block/reduce.hpp" |
||||
|
||||
namespace cv { namespace cudev { |
||||
|
||||
namespace grid_minmaxloc_detail |
||||
{ |
||||
template <int BLOCK_SIZE, class SrcPtr, typename ResType, class MaskPtr> |
||||
__global__ void minMaxLoc_pass_1(const SrcPtr src, ResType* minVal, ResType* maxVal, int* minLoc, int* maxLoc, const MaskPtr mask, const int rows, const int cols, const int patch_y, const int patch_x) |
||||
{ |
||||
__shared__ ResType sMinVal[BLOCK_SIZE]; |
||||
__shared__ ResType sMaxVal[BLOCK_SIZE]; |
||||
__shared__ uint sMinLoc[BLOCK_SIZE]; |
||||
__shared__ uint sMaxLoc[BLOCK_SIZE]; |
||||
|
||||
const int x0 = blockIdx.x * blockDim.x * patch_x + threadIdx.x; |
||||
const int y0 = blockIdx.y * blockDim.y * patch_y + threadIdx.y; |
||||
|
||||
ResType myMin = numeric_limits<ResType>::max(); |
||||
ResType myMax = -numeric_limits<ResType>::max(); |
||||
int myMinLoc = -1; |
||||
int myMaxLoc = -1; |
||||
|
||||
for (int i = 0, y = y0; i < patch_y && y < rows; ++i, y += blockDim.y) |
||||
{ |
||||
for (int j = 0, x = x0; j < patch_x && x < cols; ++j, x += blockDim.x) |
||||
{ |
||||
if (mask(y, x)) |
||||
{ |
||||
const ResType srcVal = src(y, x); |
||||
|
||||
if (srcVal < myMin) |
||||
{ |
||||
myMin = srcVal; |
||||
myMinLoc = y * cols + x; |
||||
} |
||||
|
||||
if (srcVal > myMax) |
||||
{ |
||||
myMax = srcVal; |
||||
myMaxLoc = y * cols + x; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
const int tid = threadIdx.y * blockDim.x + threadIdx.x; |
||||
|
||||
blockReduceKeyVal<BLOCK_SIZE>(smem_tuple(sMinVal, sMaxVal), tie(myMin, myMax), |
||||
smem_tuple(sMinLoc, sMaxLoc), tie(myMinLoc, myMaxLoc), |
||||
tid, |
||||
make_tuple(less<ResType>(), greater<ResType>())); |
||||
|
||||
const int bid = blockIdx.y * gridDim.x + blockIdx.x; |
||||
|
||||
if (tid == 0) |
||||
{ |
||||
minVal[bid] = myMin; |
||||
maxVal[bid] = myMax; |
||||
minLoc[bid] = myMinLoc; |
||||
maxLoc[bid] = myMaxLoc; |
||||
} |
||||
} |
||||
|
||||
template <int BLOCK_SIZE, typename T> |
||||
__global__ void minMaxLoc_pass_2(T* minMal, T* maxVal, int* minLoc, int* maxLoc, int count) |
||||
{ |
||||
__shared__ T sMinVal[BLOCK_SIZE]; |
||||
__shared__ T sMaxVal[BLOCK_SIZE]; |
||||
__shared__ int sMinLoc[BLOCK_SIZE]; |
||||
__shared__ int sMaxLoc[BLOCK_SIZE]; |
||||
|
||||
const int idx = ::min(threadIdx.x, count - 1); |
||||
|
||||
T myMin = minMal[idx]; |
||||
T myMax = maxVal[idx]; |
||||
int myMinLoc = minLoc[idx]; |
||||
int myMaxLoc = maxLoc[idx]; |
||||
|
||||
blockReduceKeyVal<BLOCK_SIZE>(smem_tuple(sMinVal, sMaxVal), tie(myMin, myMax), |
||||
smem_tuple(sMinLoc, sMaxLoc), tie(myMinLoc, myMaxLoc), |
||||
threadIdx.x, |
||||
make_tuple(less<T>(), greater<T>())); |
||||
|
||||
if (threadIdx.x == 0) |
||||
{ |
||||
minMal[0] = myMin; |
||||
maxVal[0] = myMax; |
||||
minLoc[0] = myMinLoc; |
||||
maxLoc[0] = myMaxLoc; |
||||
} |
||||
} |
||||
|
||||
template <class Policy> |
||||
void getLaunchCfg(int rows, int cols, dim3& block, dim3& grid) |
||||
{ |
||||
block = dim3(Policy::block_size_x, Policy::block_size_y); |
||||
grid = dim3(divUp(cols, block.x * Policy::patch_size_x), divUp(rows, block.y * Policy::patch_size_y)); |
||||
|
||||
grid.x = ::min(grid.x, block.x); |
||||
grid.y = ::min(grid.y, block.y); |
||||
} |
||||
|
||||
template <class Policy, class SrcPtr, typename ResType, class MaskPtr> |
||||
__host__ void minMaxLoc(const SrcPtr& src, ResType* minVal, ResType* maxVal, int* minLoc, int* maxLoc, const MaskPtr& mask, int rows, int cols, cudaStream_t stream) |
||||
{ |
||||
dim3 block, grid; |
||||
getLaunchCfg<Policy>(cols, rows, block, grid); |
||||
|
||||
const int patch_x = divUp(divUp(cols, grid.x), block.x); |
||||
const int patch_y = divUp(divUp(rows, grid.y), block.y); |
||||
|
||||
minMaxLoc_pass_1<Policy::block_size_x * Policy::block_size_y><<<grid, block, 0, stream>>>(src, minVal, maxVal, minLoc, maxLoc, mask, rows, cols, patch_y, patch_x); |
||||
CV_CUDEV_SAFE_CALL( cudaGetLastError() ); |
||||
|
||||
minMaxLoc_pass_2<Policy::block_size_x * Policy::block_size_y><<<1, Policy::block_size_x * Policy::block_size_y, 0, stream>>>(minVal, maxVal, minLoc, maxLoc, grid.x * grid.y); |
||||
CV_CUDEV_SAFE_CALL( cudaGetLastError() ); |
||||
|
||||
if (stream == 0) |
||||
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); |
||||
} |
||||
} |
||||
|
||||
}} |
||||
|
||||
#endif |
Loading…
Reference in new issue