diff --git a/modules/gpu/src/cascadeclassifier.cpp b/modules/gpu/src/cascadeclassifier.cpp index 6bef7fbe68..0af5fa252f 100644 --- a/modules/gpu/src/cascadeclassifier.cpp +++ b/modules/gpu/src/cascadeclassifier.cpp @@ -129,7 +129,7 @@ struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl private: - static void NCVDebugOutputHandler(const char* msg) { CV_Error(CV_GpuApiCallError, msg); } + static void NCVDebugOutputHandler(const std::string &msg) { CV_Error(CV_GpuApiCallError, msg.c_str()); } NCVStatus load(const string& classifierFile) diff --git a/modules/gpu/src/nvidia/core/NCV.cu b/modules/gpu/src/nvidia/core/NCV.cu index 6b55740aba..a5af00a8ea 100644 --- a/modules/gpu/src/nvidia/core/NCV.cu +++ b/modules/gpu/src/nvidia/core/NCV.cu @@ -40,10 +40,9 @@ //M*/ -#include -#include +#include +#include #include -#include #include "NCV.hpp" using namespace std; @@ -56,24 +55,18 @@ using namespace std; //============================================================================== -static void stdioDebugOutput(const char *msg) +static void stdDebugOutput(const string &msg) { - printf("%s", msg); + cout << msg; } -static NCVDebugOutputHandler *debugOutputHandler = stdioDebugOutput; +static NCVDebugOutputHandler *debugOutputHandler = stdDebugOutput; -void ncvDebugOutput(const char *msg, ...) +void ncvDebugOutput(const string &msg) { - const int K_DEBUG_STRING_MAXLEN = 1024; - char buffer[K_DEBUG_STRING_MAXLEN]; - va_list args; - va_start(args, msg); - vsnprintf(buffer, K_DEBUG_STRING_MAXLEN, msg, args); - va_end (args); - debugOutputHandler(buffer); + debugOutputHandler(msg); } @@ -288,7 +281,7 @@ NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, allocBegin = NULL; - if (reusePtr == NULL) + if (reusePtr == NULL && capacity != 0) { bReusesMemory = false; switch (memT) @@ -329,7 +322,7 @@ NCVMemStackAllocator::~NCVMemStackAllocator() { ncvAssertPrintCheck(currentSize == 0, "NCVMemStackAllocator dtor:: not all objects were deallocated properly, forcing destruction"); - if (!bReusesMemory) + if (!bReusesMemory && (allocBegin != (Ncv8u *)(0x1))) { switch (_memType) { @@ -355,7 +348,7 @@ NCVStatus NCVMemStackAllocator::alloc(NCVMemSegment &seg, size_t size) seg.clear(); ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC); - size = alignUp(static_cast(size), this->_alignment); + size = alignUp(size, this->_alignment); this->currentSize += size; this->_maxSize = std::max(this->_maxSize, this->currentSize); @@ -464,7 +457,7 @@ NCVStatus NCVMemNativeAllocator::alloc(NCVMemSegment &seg, size_t size) break; } - this->currentSize += alignUp(static_cast(size), this->_alignment); + this->currentSize += alignUp(size, this->_alignment); this->_maxSize = std::max(this->_maxSize, this->currentSize); seg.begin.memtype = this->_memType; @@ -480,8 +473,8 @@ NCVStatus NCVMemNativeAllocator::dealloc(NCVMemSegment &seg) ncvAssertReturn(seg.begin.memtype == this->_memType, NCV_ALLOCATOR_BAD_DEALLOC); ncvAssertReturn(seg.begin.ptr != NULL, NCV_ALLOCATOR_BAD_DEALLOC); - ncvAssertReturn(currentSize >= alignUp(static_cast(seg.size), this->_alignment), NCV_ALLOCATOR_BAD_DEALLOC); - currentSize -= alignUp(static_cast(seg.size), this->_alignment); + ncvAssertReturn(currentSize >= alignUp(seg.size, this->_alignment), NCV_ALLOCATOR_BAD_DEALLOC); + currentSize -= alignUp(seg.size, this->_alignment); switch (this->_memType) { diff --git a/modules/gpu/src/nvidia/core/NCV.hpp b/modules/gpu/src/nvidia/core/NCV.hpp index f310e14c4b..530aa0b8e8 100644 --- a/modules/gpu/src/nvidia/core/NCV.hpp +++ b/modules/gpu/src/nvidia/core/NCV.hpp @@ -42,7 +42,7 @@ #ifndef _ncv_hpp_ #define _ncv_hpp_ -#if (defined WIN32 || defined _WIN32 || defined WINCE) && defined CVAPI_EXPORTS //&& !defined(__CUDACC__) +#if (defined WIN32 || defined _WIN32 || defined WINCE) && defined CVAPI_EXPORTS #define NCV_EXPORTS __declspec(dllexport) #else #define NCV_EXPORTS @@ -53,6 +53,8 @@ #endif #include +#include +#include //============================================================================== @@ -78,7 +80,7 @@ namespace NcvCTprep } -#define NCV_CT_PREP_PASTE_AUX(a,b) a##b ///< Concatenation indirection macro +#define NCV_CT_PREP_PASTE_AUX(a,b) a##b ///< Concatenation indirection macro #define NCV_CT_PREP_PASTE(a,b) NCV_CT_PREP_PASTE_AUX(a, b) ///< Concatenation macro @@ -181,6 +183,25 @@ struct NcvSize32u Ncv32u height; ///< Rectangle height. __host__ __device__ NcvSize32u() : width(0), height(0) {}; __host__ __device__ NcvSize32u(Ncv32u width, Ncv32u height) : width(width), height(height) {} + __host__ __device__ bool operator == (const NcvSize32u &another) const {return this->width == another.width && this->height == another.height;} +}; + + +struct NcvPoint2D32s +{ + Ncv32s x; ///< Point X. + Ncv32s y; ///< Point Y. + __host__ __device__ NcvPoint2D32s() : x(0), y(0) {}; + __host__ __device__ NcvPoint2D32s(Ncv32s x, Ncv32s y) : x(x), y(y) {} +}; + + +struct NcvPoint2D32u +{ + Ncv32u x; ///< Point X. + Ncv32u y; ///< Point Y. + __host__ __device__ NcvPoint2D32u() : x(0), y(0) {}; + __host__ __device__ NcvPoint2D32u(Ncv32u x, Ncv32u y) : x(x), y(y) {} }; @@ -199,6 +220,7 @@ NCV_CT_ASSERT(sizeof(NcvRect8u) == sizeof(Ncv32u)); NCV_CT_ASSERT(sizeof(NcvRect32s) == 4 * sizeof(Ncv32s)); NCV_CT_ASSERT(sizeof(NcvRect32u) == 4 * sizeof(Ncv32u)); NCV_CT_ASSERT(sizeof(NcvSize32u) == 2 * sizeof(Ncv32u)); +NCV_CT_ASSERT(sizeof(NcvPoint2D32u) == 2 * sizeof(Ncv32u)); //============================================================================== @@ -219,49 +241,44 @@ const Ncv32u K_LOG2_WARP_SIZE = 5; //============================================================================== -#define NCV_CT_PREP_STRINGIZE_AUX(x) #x -#define NCV_CT_PREP_STRINGIZE(x) NCV_CT_PREP_STRINGIZE_AUX(x) - +NCV_EXPORTS void ncvDebugOutput(const std::string &msg); -NCV_EXPORTS void ncvDebugOutput(const char *msg, ...); - -typedef void NCVDebugOutputHandler(const char* msg); +typedef void NCVDebugOutputHandler(const std::string &msg); NCV_EXPORTS void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func); #define ncvAssertPrintCheck(pred, msg) \ - ((pred) ? true : (ncvDebugOutput("\n%s\n", \ - "NCV Assertion Failed: " msg ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__) \ - ), false)) - - -#define ncvAssertPrintReturn(pred, msg, err) \ - if (ncvAssertPrintCheck(pred, msg)) ; else return err - - -#define ncvAssertReturn(pred, err) \ do \ { \ if (!(pred)) \ { \ - ncvDebugOutput("\n%s%d%s\n", "NCV Assertion Failed: retcode=", (int)err, ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \ - return err; \ + std::ostringstream oss; \ + oss << "NCV Assertion Failed: " << msg << ", file=" << __FILE__ << ", line=" << __LINE__ << std::endl; \ + ncvDebugOutput(oss.str()); \ } \ } while (0) +#define ncvAssertPrintReturn(pred, msg, err) \ + do \ + { \ + ncvAssertPrintCheck(pred, msg); \ + if (!(pred)) return err; \ + } while (0) + + +#define ncvAssertReturn(pred, err) \ + ncvAssertPrintReturn(pred, "retcode=" << (int)err, err) + + #define ncvAssertReturnNcvStat(ncvOp) \ do \ { \ NCVStatus _ncvStat = ncvOp; \ - if (NCV_SUCCESS != _ncvStat) \ - { \ - ncvDebugOutput("\n%s%d%s\n", "NCV Assertion Failed: NcvStat=", (int)_ncvStat, ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \ - return _ncvStat; \ - } \ + ncvAssertPrintReturn(NCV_SUCCESS==_ncvStat, "NcvStat=" << (int)_ncvStat, _ncvStat); \ } while (0) @@ -270,18 +287,14 @@ NCV_EXPORTS void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func); { \ cudaError_t resCall = cudacall; \ cudaError_t resGLE = cudaGetLastError(); \ - if (cudaSuccess != resCall || cudaSuccess != resGLE) \ - { \ - ncvDebugOutput("\n%s%d%s\n", "NCV CUDA Assertion Failed: cudaError_t=", (int)(resCall | resGLE), ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \ - return errCode; \ - } \ + ncvAssertPrintReturn(cudaSuccess==resCall && cudaSuccess==resGLE, "cudaError_t=" << (int)(resCall | resGLE), errCode); \ } while (0) /** * Return-codes for status notification, errors and warnings */ -enum NCVStatus +enum { //NCV statuses NCV_SUCCESS, @@ -338,9 +351,14 @@ enum NCVStatus NPPST_MEM_INSUFFICIENT_BUFFER, ///< Insufficient user-allocated buffer NPPST_MEM_RESIDENCE_ERROR, ///< Memory residence error detected (check if pointers should be device or pinned) NPPST_MEM_INTERNAL_ERROR, ///< Internal memory management error + + NCV_LAST_STATUS ///< Marker to continue error numeration in other files }; +typedef Ncv32u NCVStatus; + + #define NCV_SET_SKIP_COND(x) \ bool __ncv_skip_cond = x @@ -774,9 +792,20 @@ public: return ncvStat; } + T &at(Ncv32u x, Ncv32u y) const + { + if (x >= this->_width || y >= this->_height) + { + printf("Error addressing matrix at [%d, %d]\n", x, y); + return *this->_ptr; + } + return ((T *)((Ncv8u *)this->_ptr + y * this->_pitch))[x]; + } + T *ptr() const {return this->_ptr;} Ncv32u width() const {return this->_width;} Ncv32u height() const {return this->_height;} + NcvSize32u size() const {return NcvSize32u(this->_width, this->_height);} Ncv32u pitch() const {return this->_pitch;} NCVMemoryType memType() const {return this->_memtype;} @@ -923,7 +952,7 @@ public: this->_width = roi.width; this->_height = roi.height; this->_pitch = mat.pitch(); - this->_ptr = mat.ptr() + roi.y * mat.stride() + roi.x; + this->_ptr = &mat.at(roi.x, roi.y); this->_memtype = mat.memType(); this->bReused = true; @@ -962,4 +991,24 @@ NCV_EXPORTS NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst, Ncv32u dstStride, Ncv NCV_EXPORTS NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight, NcvRect32u *d_rects, Ncv32u numRects, Ncv32u color, cudaStream_t cuStream); + +#define CLAMP(x,a,b) ( (x) > (b) ? (b) : ( (x) < (a) ? (a) : (x) ) ) +#define CLAMP_TOP(x, a) (((x) > (a)) ? (a) : (x)) +#define CLAMP_BOTTOM(x, a) (((x) < (a)) ? (a) : (x)) +#define CLAMP_0_255(x) CLAMP(x,0,255) + + +#define SUB_BEGIN(type, name) struct { __inline type name +#define SUB_END(name) } name; +#define SUB_CALL(name) name.name + +#define SQR(x) ((x)*(x)) + + +#define ncvSafeMatAlloc(name, type, alloc, width, height, err) \ + NCVMatrixAlloc name(alloc, width, height); \ + ncvAssertReturn(name.isMemAllocated(), err); + + + #endif // _ncv_hpp_ diff --git a/modules/gpu/src/nvidia/core/NCVColorConversion.hpp b/modules/gpu/src/nvidia/core/NCVColorConversion.hpp new file mode 100644 index 0000000000..e7b4afbd1b --- /dev/null +++ b/modules/gpu/src/nvidia/core/NCVColorConversion.hpp @@ -0,0 +1,96 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef _ncv_color_conversion_hpp_ +#define _ncv_color_conversion_hpp_ + +#include "NCVPixelOperations.hpp" + +enum NCVColorSpace +{ + NCVColorSpaceGray, + NCVColorSpaceRGBA, +}; + +template struct __pixColorConv { +static void _pixColorConv(const Tin &pixIn, Tout &pixOut); +}; + +template struct __pixColorConv { +static void _pixColorConv(const Tin &pixIn, Tout &pixOut) +{ + Ncv32f luma = 0.299f * pixIn.x + 0.587f * pixIn.y + 0.114f * pixIn.z; + _TDemoteClampNN(luma, pixOut.x); +}}; + +template struct __pixColorConv { +static void _pixColorConv(const Tin &pixIn, Tout &pixOut) +{ + _TDemoteClampNN(pixIn.x, pixOut.x); + _TDemoteClampNN(pixIn.x, pixOut.y); + _TDemoteClampNN(pixIn.x, pixOut.z); + pixOut.w = 0; +}}; + +template +static +NCVStatus _ncvColorConv_host(const NCVMatrix &h_imgIn, + const NCVMatrix &h_imgOut) +{ + ncvAssertReturn(h_imgIn.size() == h_imgOut.size(), NCV_DIMENSIONS_INVALID); + ncvAssertReturn(h_imgIn.memType() == h_imgOut.memType() && + (h_imgIn.memType() == NCVMemoryTypeHostPinned || h_imgIn.memType() == NCVMemoryTypeNone), NCV_MEM_RESIDENCE_ERROR); + NCV_SET_SKIP_COND(h_imgIn.memType() == NCVMemoryTypeNone); + NCV_SKIP_COND_BEGIN + + for (Ncv32u i=0; i::_pixColorConv(h_imgIn.at(j,i), h_imgOut.at(j,i)); + } + } + + NCV_SKIP_COND_END + return NCV_SUCCESS; +} + +#endif //_ncv_color_conversion_hpp_ diff --git a/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp b/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp new file mode 100644 index 0000000000..3951a2f435 --- /dev/null +++ b/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp @@ -0,0 +1,350 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef _ncv_pixel_operations_hpp_ +#define _ncv_pixel_operations_hpp_ + +#include +#include +#include "NCV.hpp" + +template inline TBase _pixMaxVal(); +template<> static inline Ncv8u _pixMaxVal() {return UCHAR_MAX;} +template<> static inline Ncv16u _pixMaxVal() {return USHRT_MAX;} +template<> static inline Ncv32u _pixMaxVal() {return UINT_MAX;} +template<> static inline Ncv8s _pixMaxVal() {return CHAR_MAX;} +template<> static inline Ncv16s _pixMaxVal() {return SHRT_MAX;} +template<> static inline Ncv32s _pixMaxVal() {return INT_MAX;} +template<> static inline Ncv32f _pixMaxVal() {return FLT_MAX;} +template<> static inline Ncv64f _pixMaxVal() {return DBL_MAX;} + +template inline TBase _pixMinVal(); +template<> static inline Ncv8u _pixMinVal() {return 0;} +template<> static inline Ncv16u _pixMinVal() {return 0;} +template<> static inline Ncv32u _pixMinVal() {return 0;} +template<> static inline Ncv8s _pixMinVal() {return CHAR_MIN;} +template<> static inline Ncv16s _pixMinVal() {return SHRT_MIN;} +template<> static inline Ncv32s _pixMinVal() {return INT_MIN;} +template<> static inline Ncv32f _pixMinVal() {return FLT_MIN;} +template<> static inline Ncv64f _pixMinVal() {return DBL_MIN;} + +template struct TConvVec2Base; +template<> struct TConvVec2Base {typedef Ncv8u TBase;}; +template<> struct TConvVec2Base {typedef Ncv8u TBase;}; +template<> struct TConvVec2Base {typedef Ncv8u TBase;}; +template<> struct TConvVec2Base {typedef Ncv16u TBase;}; +template<> struct TConvVec2Base {typedef Ncv16u TBase;}; +template<> struct TConvVec2Base {typedef Ncv16u TBase;}; +template<> struct TConvVec2Base {typedef Ncv32u TBase;}; +template<> struct TConvVec2Base {typedef Ncv32u TBase;}; +template<> struct TConvVec2Base {typedef Ncv32u TBase;}; +template<> struct TConvVec2Base {typedef Ncv32f TBase;}; +template<> struct TConvVec2Base {typedef Ncv32f TBase;}; +template<> struct TConvVec2Base {typedef Ncv32f TBase;}; +template<> struct TConvVec2Base {typedef Ncv64f TBase;}; +template<> struct TConvVec2Base {typedef Ncv64f TBase;}; +template<> struct TConvVec2Base {typedef Ncv64f TBase;}; + +#define NC(T) (sizeof(T) / sizeof(TConvVec2Base::TBase)) + +template struct TConvBase2Vec; +template<> struct TConvBase2Vec {typedef uchar1 TVec;}; +template<> struct TConvBase2Vec {typedef uchar3 TVec;}; +template<> struct TConvBase2Vec {typedef uchar4 TVec;}; +template<> struct TConvBase2Vec {typedef ushort1 TVec;}; +template<> struct TConvBase2Vec {typedef ushort3 TVec;}; +template<> struct TConvBase2Vec {typedef ushort4 TVec;}; +template<> struct TConvBase2Vec {typedef uint1 TVec;}; +template<> struct TConvBase2Vec {typedef uint3 TVec;}; +template<> struct TConvBase2Vec {typedef uint4 TVec;}; +template<> struct TConvBase2Vec {typedef float1 TVec;}; +template<> struct TConvBase2Vec {typedef float3 TVec;}; +template<> struct TConvBase2Vec {typedef float4 TVec;}; +template<> struct TConvBase2Vec {typedef double1 TVec;}; +template<> struct TConvBase2Vec {typedef double3 TVec;}; +template<> struct TConvBase2Vec {typedef double4 TVec;}; + +//TODO: consider using CUDA intrinsics to avoid branching +template static inline void _TDemoteClampZ(Tin &a, Ncv8u &out) {out = (Ncv8u)CLAMP_0_255(a);}; +template static inline void _TDemoteClampZ(Tin &a, Ncv16u &out) {out = (Ncv16u)CLAMP(a, 0, USHRT_MAX);} +template static inline void _TDemoteClampZ(Tin &a, Ncv32u &out) {out = (Ncv32u)CLAMP(a, 0, UINT_MAX);} +template static inline void _TDemoteClampZ(Tin &a, Ncv32f &out) {out = (Ncv32f)a;} + +//TODO: consider using CUDA intrinsics to avoid branching +template static inline void _TDemoteClampNN(Tin &a, Ncv8u &out) {out = (Ncv8u)CLAMP_0_255(a+0.5f);} +template static inline void _TDemoteClampNN(Tin &a, Ncv16u &out) {out = (Ncv16u)CLAMP(a+0.5f, 0, USHRT_MAX);} +template static inline void _TDemoteClampNN(Tin &a, Ncv32u &out) {out = (Ncv32u)CLAMP(a+0.5f, 0, UINT_MAX);} +template static inline void _TDemoteClampNN(Tin &a, Ncv32f &out) {out = (Ncv32f)a;} + +template inline Tout _pixMakeZero(); +template<> static inline uchar1 _pixMakeZero() {return make_uchar1(0);} +template<> static inline uchar3 _pixMakeZero() {return make_uchar3(0,0,0);} +template<> static inline uchar4 _pixMakeZero() {return make_uchar4(0,0,0,0);} +template<> static inline ushort1 _pixMakeZero() {return make_ushort1(0);} +template<> static inline ushort3 _pixMakeZero() {return make_ushort3(0,0,0);} +template<> static inline ushort4 _pixMakeZero() {return make_ushort4(0,0,0,0);} +template<> static inline uint1 _pixMakeZero() {return make_uint1(0);} +template<> static inline uint3 _pixMakeZero() {return make_uint3(0,0,0);} +template<> static inline uint4 _pixMakeZero() {return make_uint4(0,0,0,0);} +template<> static inline float1 _pixMakeZero() {return make_float1(0.f);} +template<> static inline float3 _pixMakeZero() {return make_float3(0.f,0.f,0.f);} +template<> static inline float4 _pixMakeZero() {return make_float4(0.f,0.f,0.f,0.f);} +template<> static inline double1 _pixMakeZero() {return make_double1(0.);} +template<> static inline double3 _pixMakeZero() {return make_double3(0.,0.,0.);} +template<> static inline double4 _pixMakeZero() {return make_double4(0.,0.,0.,0.);} + +static inline uchar1 _pixMake(Ncv8u x) {return make_uchar1(x);} +static inline uchar3 _pixMake(Ncv8u x, Ncv8u y, Ncv8u z) {return make_uchar3(x,y,z);} +static inline uchar4 _pixMake(Ncv8u x, Ncv8u y, Ncv8u z, Ncv8u w) {return make_uchar4(x,y,z,w);} +static inline ushort1 _pixMake(Ncv16u x) {return make_ushort1(x);} +static inline ushort3 _pixMake(Ncv16u x, Ncv16u y, Ncv16u z) {return make_ushort3(x,y,z);} +static inline ushort4 _pixMake(Ncv16u x, Ncv16u y, Ncv16u z, Ncv16u w) {return make_ushort4(x,y,z,w);} +static inline uint1 _pixMake(Ncv32u x) {return make_uint1(x);} +static inline uint3 _pixMake(Ncv32u x, Ncv32u y, Ncv32u z) {return make_uint3(x,y,z);} +static inline uint4 _pixMake(Ncv32u x, Ncv32u y, Ncv32u z, Ncv32u w) {return make_uint4(x,y,z,w);} +static inline float1 _pixMake(Ncv32f x) {return make_float1(x);} +static inline float3 _pixMake(Ncv32f x, Ncv32f y, Ncv32f z) {return make_float3(x,y,z);} +static inline float4 _pixMake(Ncv32f x, Ncv32f y, Ncv32f z, Ncv32f w) {return make_float4(x,y,z,w);} +static inline double1 _pixMake(Ncv64f x) {return make_double1(x);} +static inline double3 _pixMake(Ncv64f x, Ncv64f y, Ncv64f z) {return make_double3(x,y,z);} +static inline double4 _pixMake(Ncv64f x, Ncv64f y, Ncv64f z, Ncv64f w) {return make_double4(x,y,z,w);} + + +template struct __pixDemoteClampZ_CN {static Tout _pixDemoteClampZ_CN(Tin &pix);}; + +template struct __pixDemoteClampZ_CN { +static Tout _pixDemoteClampZ_CN(Tin &pix) +{ + Tout out; + _TDemoteClampZ(pix.x, out.x); + return out; +}}; + +template struct __pixDemoteClampZ_CN { +static Tout _pixDemoteClampZ_CN(Tin &pix) +{ + Tout out; + _TDemoteClampZ(pix.x, out.x); + _TDemoteClampZ(pix.y, out.y); + _TDemoteClampZ(pix.z, out.z); + return out; +}}; + +template struct __pixDemoteClampZ_CN { +static Tout _pixDemoteClampZ_CN(Tin &pix) +{ + Tout out; + _TDemoteClampZ(pix.x, out.x); + _TDemoteClampZ(pix.y, out.y); + _TDemoteClampZ(pix.z, out.z); + _TDemoteClampZ(pix.w, out.w); + return out; +}}; + +template static inline Tout _pixDemoteClampZ(Tin &pix) +{ + return __pixDemoteClampZ_CN::_pixDemoteClampZ_CN(pix); +} + + +template struct __pixDemoteClampNN_CN {static Tout _pixDemoteClampNN_CN(Tin &pix);}; + +template struct __pixDemoteClampNN_CN { +static Tout _pixDemoteClampNN_CN(Tin &pix) +{ + Tout out; + _TDemoteClampNN(pix.x, out.x); + return out; +}}; + +template struct __pixDemoteClampNN_CN { +static Tout _pixDemoteClampNN_CN(Tin &pix) +{ + Tout out; + _TDemoteClampNN(pix.x, out.x); + _TDemoteClampNN(pix.y, out.y); + _TDemoteClampNN(pix.z, out.z); + return out; +}}; + +template struct __pixDemoteClampNN_CN { +static Tout _pixDemoteClampNN_CN(Tin &pix) +{ + Tout out; + _TDemoteClampNN(pix.x, out.x); + _TDemoteClampNN(pix.y, out.y); + _TDemoteClampNN(pix.z, out.z); + _TDemoteClampNN(pix.w, out.w); + return out; +}}; + +template static inline Tout _pixDemoteClampNN(Tin &pix) +{ + return __pixDemoteClampNN_CN::_pixDemoteClampNN_CN(pix); +} + + +template struct __pixScale_CN {static Tout _pixScale_CN(Tin &pix, Tw w);}; + +template struct __pixScale_CN { +static Tout _pixScale_CN(Tin &pix, Tw w) +{ + Tout out; + typedef typename TConvVec2Base::TBase TBout; + out.x = (TBout)(pix.x * w); + return out; +}}; + +template struct __pixScale_CN { +static Tout _pixScale_CN(Tin &pix, Tw w) +{ + Tout out; + typedef typename TConvVec2Base::TBase TBout; + out.x = (TBout)(pix.x * w); + out.y = (TBout)(pix.y * w); + out.z = (TBout)(pix.z * w); + return out; +}}; + +template struct __pixScale_CN { +static Tout _pixScale_CN(Tin &pix, Tw w) +{ + Tout out; + typedef typename TConvVec2Base::TBase TBout; + out.x = (TBout)(pix.x * w); + out.y = (TBout)(pix.y * w); + out.z = (TBout)(pix.z * w); + out.w = (TBout)(pix.w * w); + return out; +}}; + +template static Tout _pixScale(Tin &pix, Tw w) +{ + return __pixScale_CN::_pixScale_CN(pix, w); +} + + +template struct __pixAdd_CN {static Tout _pixAdd_CN(Tout &pix1, Tin &pix2);}; + +template struct __pixAdd_CN { +static Tout _pixAdd_CN(Tout &pix1, Tin &pix2) +{ + Tout out; + out.x = pix1.x + pix2.x; + return out; +}}; + +template struct __pixAdd_CN { +static Tout _pixAdd_CN(Tout &pix1, Tin &pix2) +{ + Tout out; + out.x = pix1.x + pix2.x; + out.y = pix1.y + pix2.y; + out.z = pix1.z + pix2.z; + return out; +}}; + +template struct __pixAdd_CN { +static Tout _pixAdd_CN(Tout &pix1, Tin &pix2) +{ + Tout out; + out.x = pix1.x + pix2.x; + out.y = pix1.y + pix2.y; + out.z = pix1.z + pix2.z; + out.w = pix1.w + pix2.w; + return out; +}}; + +template static Tout _pixAdd(Tout &pix1, Tin &pix2) +{ + return __pixAdd_CN::_pixAdd_CN(pix1, pix2); +} + + +template struct __pixDist_CN {static Tout _pixDist_CN(Tin &pix1, Tin &pix2);}; + +template struct __pixDist_CN { +static Tout _pixDist_CN(Tin &pix1, Tin &pix2) +{ + return Tout(SQR(pix1.x - pix2.x)); +}}; + +template struct __pixDist_CN { +static Tout _pixDist_CN(Tin &pix1, Tin &pix2) +{ + return Tout(SQR(pix1.x - pix2.x) + SQR(pix1.y - pix2.y) + SQR(pix1.z - pix2.z)); +}}; + +template struct __pixDist_CN { +static Tout _pixDist_CN(Tin &pix1, Tin &pix2) +{ + return Tout(SQR(pix1.x - pix2.x) + SQR(pix1.y - pix2.y) + SQR(pix1.z - pix2.z) + SQR(pix1.w - pix2.w)); +}}; + +template static Tout _pixDist(Tin &pix1, Tin &pix2) +{ + return __pixDist_CN::_pixDist_CN(pix1, pix2); +} + + +template struct TAccPixWeighted; +template<> struct TAccPixWeighted {typedef double1 type;}; +template<> struct TAccPixWeighted {typedef double3 type;}; +template<> struct TAccPixWeighted {typedef double4 type;}; +template<> struct TAccPixWeighted {typedef double1 type;}; +template<> struct TAccPixWeighted {typedef double3 type;}; +template<> struct TAccPixWeighted {typedef double4 type;}; +template<> struct TAccPixWeighted {typedef double1 type;}; +template<> struct TAccPixWeighted {typedef double3 type;}; +template<> struct TAccPixWeighted {typedef double4 type;}; + +template struct TAccPixDist {}; +template<> struct TAccPixDist {typedef Ncv32u type;}; +template<> struct TAccPixDist {typedef Ncv32u type;}; +template<> struct TAccPixDist {typedef Ncv32u type;}; +template<> struct TAccPixDist {typedef Ncv32u type;}; +template<> struct TAccPixDist {typedef Ncv32u type;}; +template<> struct TAccPixDist {typedef Ncv32u type;}; +template<> struct TAccPixDist {typedef Ncv32f type;}; +template<> struct TAccPixDist {typedef Ncv32f type;}; +template<> struct TAccPixDist {typedef Ncv32f type;}; + +#endif //_ncv_pixel_operations_hpp_ diff --git a/modules/gpu/src/nvidia/core/NCVPyramid.cu b/modules/gpu/src/nvidia/core/NCVPyramid.cu new file mode 100644 index 0000000000..20e52e580b --- /dev/null +++ b/modules/gpu/src/nvidia/core/NCVPyramid.cu @@ -0,0 +1,397 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + + +#include +#include +#include "NCV.hpp" +#include "NCVPyramid.hpp" +#include "NCVPixelOperations.hpp" + + +template struct __average4_CN {static T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11);}; + +template struct __average4_CN { +static T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11) +{ + T out; + out.x = ((Ncv32s)p00.x + p01.x + p10.x + p11.x + 2) / 4; + return out; +}}; + +template<> struct __average4_CN { +static float1 _average4_CN(const float1 &p00, const float1 &p01, const float1 &p10, const float1 &p11) +{ + float1 out; + out.x = (p00.x + p01.x + p10.x + p11.x) / 4; + return out; +}}; + +template<> struct __average4_CN { +static double1 _average4_CN(const double1 &p00, const double1 &p01, const double1 &p10, const double1 &p11) +{ + double1 out; + out.x = (p00.x + p01.x + p10.x + p11.x) / 4; + return out; +}}; + +template struct __average4_CN { +static T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11) +{ + T out; + out.x = ((Ncv32s)p00.x + p01.x + p10.x + p11.x + 2) / 4; + out.y = ((Ncv32s)p00.y + p01.y + p10.y + p11.y + 2) / 4; + out.z = ((Ncv32s)p00.z + p01.z + p10.z + p11.z + 2) / 4; + return out; +}}; + +template<> struct __average4_CN { +static float3 _average4_CN(const float3 &p00, const float3 &p01, const float3 &p10, const float3 &p11) +{ + float3 out; + out.x = (p00.x + p01.x + p10.x + p11.x) / 4; + out.y = (p00.y + p01.y + p10.y + p11.y) / 4; + out.z = (p00.z + p01.z + p10.z + p11.z) / 4; + return out; +}}; + +template<> struct __average4_CN { +static double3 _average4_CN(const double3 &p00, const double3 &p01, const double3 &p10, const double3 &p11) +{ + double3 out; + out.x = (p00.x + p01.x + p10.x + p11.x) / 4; + out.y = (p00.y + p01.y + p10.y + p11.y) / 4; + out.z = (p00.z + p01.z + p10.z + p11.z) / 4; + return out; +}}; + +template struct __average4_CN { +static T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11) +{ + T out; + out.x = ((Ncv32s)p00.x + p01.x + p10.x + p11.x + 2) / 4; + out.y = ((Ncv32s)p00.y + p01.y + p10.y + p11.y + 2) / 4; + out.z = ((Ncv32s)p00.z + p01.z + p10.z + p11.z + 2) / 4; + out.w = ((Ncv32s)p00.w + p01.w + p10.w + p11.w + 2) / 4; + return out; +}}; + +template<> struct __average4_CN { +static float4 _average4_CN(const float4 &p00, const float4 &p01, const float4 &p10, const float4 &p11) +{ + float4 out; + out.x = (p00.x + p01.x + p10.x + p11.x) / 4; + out.y = (p00.y + p01.y + p10.y + p11.y) / 4; + out.z = (p00.z + p01.z + p10.z + p11.z) / 4; + out.w = (p00.w + p01.w + p10.w + p11.w) / 4; + return out; +}}; + +template<> struct __average4_CN { +static double4 _average4_CN(const double4 &p00, const double4 &p01, const double4 &p10, const double4 &p11) +{ + double4 out; + out.x = (p00.x + p01.x + p10.x + p11.x) / 4; + out.y = (p00.y + p01.y + p10.y + p11.y) / 4; + out.z = (p00.z + p01.z + p10.z + p11.z) / 4; + out.w = (p00.w + p01.w + p10.w + p11.w) / 4; + return out; +}}; + +template static T _average4(const T &p00, const T &p01, const T &p10, const T &p11) +{ + return __average4_CN::_average4_CN(p00, p01, p10, p11); +} + + +template struct __lerp_CN {static Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d);}; + +template struct __lerp_CN { +static Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d) +{ + typedef typename TConvVec2Base::TBase TB; + return _pixMake(TB(b.x * d + a.x * (1 - d))); +}}; + +template struct __lerp_CN { +static Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d) +{ + typedef typename TConvVec2Base::TBase TB; + return _pixMake(TB(b.x * d + a.x * (1 - d)), + TB(b.y * d + a.y * (1 - d)), + TB(b.z * d + a.z * (1 - d))); +}}; + +template struct __lerp_CN { +static Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d) +{ + typedef typename TConvVec2Base::TBase TB; + return _pixMake(TB(b.x * d + a.x * (1 - d)), + TB(b.y * d + a.y * (1 - d)), + TB(b.z * d + a.z * (1 - d)), + TB(b.w * d + a.w * (1 - d))); +}}; + +template static Tout _lerp(const Tin &a, const Tin &b, Ncv32f d) +{ + return __lerp_CN::_lerp_CN(a, b, d); +} + + +template +static T _interpLinear(const T &a, const T &b, Ncv32f d) +{ + typedef typename TConvBase2Vec::TVec TVFlt; + TVFlt tmp = _lerp(a, b, d); + return _pixDemoteClampZ(tmp); +} + + +template +static T _interpBilinear(const NCVMatrix &refLayer, Ncv32f x, Ncv32f y) +{ + Ncv32u xl = (Ncv32u)x; + Ncv32u xh = xl+1; + Ncv32f dx = x - xl; + Ncv32u yl = (Ncv32u)y; + Ncv32u yh = yl+1; + Ncv32f dy = y - yl; + T p00, p01, p10, p11; + p00 = refLayer.at(xl, yl); + p01 = xh < refLayer.width() ? refLayer.at(xh, yl) : p00; + p10 = yh < refLayer.height() ? refLayer.at(xl, yh) : p00; + p11 = (xh < refLayer.width() && yh < refLayer.height()) ? refLayer.at(xh, yh) : p00; + typedef typename TConvBase2Vec::TVec TVFlt; + TVFlt m_00_01 = _lerp(p00, p01, dx); + TVFlt m_10_11 = _lerp(p10, p11, dx); + TVFlt mixture = _lerp(m_00_01, m_10_11, dy); + return _pixDemoteClampZ(mixture); +} + + +template +NCVImagePyramid::NCVImagePyramid(const NCVMatrix &img, + Ncv8u numLayers, + INCVMemAllocator &alloc, + cudaStream_t cuStream) +{ + this->_isInitialized = false; + ncvAssertPrintReturn(img.memType() == alloc.memType(), "NCVImagePyramid_host::ctor error", ); + + this->layer0 = &img; + NcvSize32u szLastLayer(img.width(), img.height()); + this->nLayers = 1; + + NCV_SET_SKIP_COND(alloc.isCounting()); + NcvBool bDeviceCode = alloc.memType() == NCVMemoryTypeDevice; + + if (numLayers == 0) + { + numLayers = 255; //it will cut-off when any of the dimensions goes 1 + } + + for (Ncv32u i=0; i<(Ncv32u)numLayers-1; i++) + { + NcvSize32u szCurLayer(szLastLayer.width / 2, szLastLayer.height / 2); + if (szCurLayer.width == 0 || szCurLayer.height == 0) + { + break; + } + + this->pyramid.push_back(new NCVMatrixAlloc(alloc, szCurLayer.width, szCurLayer.height)); + ncvAssertPrintReturn(((NCVMatrixAlloc *)(this->pyramid[i]))->isMemAllocated(), "NCVImagePyramid_host::ctor error", ); + this->nLayers++; + + //fill in the layer + NCV_SKIP_COND_BEGIN + + const NCVMatrix *prevLayer = i == 0 ? this->layer0 : this->pyramid[i-1]; + NCVMatrix *curLayer = this->pyramid[i]; + + if (bDeviceCode) + { + //TODO: in cuStream + } + else + { + for (Ncv32u i=0; iat(2*j+0, 2*i+0); + T p01 = prevLayer->at(2*j+1, 2*i+0); + T p10 = prevLayer->at(2*j+0, 2*i+1); + T p11 = prevLayer->at(2*j+1, 2*i+1); + curLayer->at(j, i) = _average4(p00, p01, p10, p11); + } + } + } + + NCV_SKIP_COND_END + + szLastLayer = szCurLayer; + } + + this->_isInitialized = true; +} + + +template +NCVImagePyramid::~NCVImagePyramid() +{ +} + + +template +NcvBool NCVImagePyramid::isInitialized() const +{ + return this->_isInitialized; +} + + +template +NCVStatus NCVImagePyramid::getLayer(NCVMatrix &outImg, + NcvSize32u outRoi, + NcvBool bTrilinear, + cudaStream_t cuStream) const +{ + ncvAssertReturn(this->isInitialized(), NCV_UNKNOWN_ERROR); + ncvAssertReturn(outImg.memType() == this->layer0->memType(), NCV_MEM_RESIDENCE_ERROR); + ncvAssertReturn(outRoi.width <= this->layer0->width() && outRoi.height <= this->layer0->height() && + outRoi.width > 0 && outRoi.height > 0, NCV_DIMENSIONS_INVALID); + + if (outRoi.width == this->layer0->width() && outRoi.height == this->layer0->height()) + { + ncvAssertReturnNcvStat(this->layer0->copy2D(outImg, NcvSize32u(this->layer0->width(), this->layer0->height()), cuStream)); + return NCV_SUCCESS; + } + + Ncv32f lastScale = 1.0f; + Ncv32f curScale; + const NCVMatrix *lastLayer = this->layer0; + const NCVMatrix *curLayer = NULL; + NcvBool bUse2Refs = false; + + for (Ncv32u i=0; inLayers-1; i++) + { + curScale = lastScale * 0.5f; + curLayer = this->pyramid[i]; + + if (outRoi.width == curLayer->width() && outRoi.height == curLayer->height()) + { + ncvAssertReturnNcvStat(this->pyramid[i]->copy2D(outImg, NcvSize32u(this->pyramid[i]->width(), this->pyramid[i]->height()), cuStream)); + return NCV_SUCCESS; + } + + if (outRoi.width >= curLayer->width() && outRoi.height >= curLayer->height()) + { + if (outRoi.width < lastLayer->width() && outRoi.height < lastLayer->height()) + { + bUse2Refs = true; + } + break; + } + + lastScale = curScale; + lastLayer = curLayer; + } + + bUse2Refs = bUse2Refs && bTrilinear; + + NCV_SET_SKIP_COND(outImg.memType() == NCVMemoryTypeNone); + NcvBool bDeviceCode = this->layer0->memType() == NCVMemoryTypeDevice; + + NCV_SKIP_COND_BEGIN + + if (bDeviceCode) + { + //TODO: in cuStream + } + else + { + for (Ncv32u i=0; iwidth(), lastLayer->height()); + Ncv32f ptTopX = 1.0f * (szTopLayer.width - 1) * j / (outRoi.width - 1); + Ncv32f ptTopY = 1.0f * (szTopLayer.height - 1) * i / (outRoi.height - 1); + T topPix = _interpBilinear(*lastLayer, ptTopX, ptTopY); + T trilinearPix = topPix; + + if (bUse2Refs) + { + //bottom layer pixel (exists only if the requested scale is greater than the smallest layer scale) + NcvSize32u szBottomLayer(curLayer->width(), curLayer->height()); + Ncv32f ptBottomX = 1.0f * (szBottomLayer.width - 1) * j / (outRoi.width - 1); + Ncv32f ptBottomY = 1.0f * (szBottomLayer.height - 1) * i / (outRoi.height - 1); + T bottomPix = _interpBilinear(*curLayer, ptBottomX, ptBottomY); + + Ncv32f scale = (1.0f * outRoi.width / layer0->width() + 1.0f * outRoi.height / layer0->height()) / 2; + Ncv32f dl = (scale - curScale) / (lastScale - curScale); + dl = CLAMP(dl, 0.0f, 1.0f); + trilinearPix = _interpLinear(bottomPix, topPix, dl); + } + + outImg.at(j, i) = trilinearPix; + } + } + } + + NCV_SKIP_COND_END + + return NCV_SUCCESS; +} + + +template class NCVImagePyramid; +template class NCVImagePyramid; +template class NCVImagePyramid; +template class NCVImagePyramid; +template class NCVImagePyramid; +template class NCVImagePyramid; +template class NCVImagePyramid; +template class NCVImagePyramid; +template class NCVImagePyramid; +template class NCVImagePyramid; +template class NCVImagePyramid; +template class NCVImagePyramid; diff --git a/modules/gpu/src/nvidia/core/NCVPyramid.hpp b/modules/gpu/src/nvidia/core/NCVPyramid.hpp new file mode 100644 index 0000000000..92cf90fab9 --- /dev/null +++ b/modules/gpu/src/nvidia/core/NCVPyramid.hpp @@ -0,0 +1,97 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + + +#ifndef _ncvpyramid_hpp_ +#define _ncvpyramid_hpp_ + +#include +#include +#include "NCV.hpp" + + +template +class NCV_EXPORTS NCVMatrixStack +{ +public: + NCVMatrixStack() {this->_arr.clear();} + ~NCVMatrixStack() + { + const Ncv32u nElem = this->_arr.size(); + for (Ncv32u i=0; i *elem) {this->_arr.push_back(std::tr1::shared_ptr< NCVMatrix >(elem));} + void pop_back() {this->_arr.pop_back();} + NCVMatrix * operator [] (int i) const {return this->_arr[i].get();} +private: + std::vector< std::tr1::shared_ptr< NCVMatrix > > _arr; +}; + + +template +class NCV_EXPORTS NCVImagePyramid +{ +public: + + NCVImagePyramid(const NCVMatrix &img, + Ncv8u nLayers, + INCVMemAllocator &alloc, + cudaStream_t cuStream); + ~NCVImagePyramid(); + NcvBool isInitialized() const; + NCVStatus getLayer(NCVMatrix &outImg, + NcvSize32u outRoi, + NcvBool bTrilinear, + cudaStream_t cuStream) const; + +private: + + NcvBool _isInitialized; + const NCVMatrix *layer0; + NCVMatrixStack pyramid; + Ncv32u nLayers; +}; + + +#endif //_ncvpyramid_hpp_ diff --git a/modules/gpu/src/optical_flow.cpp b/modules/gpu/src/optical_flow.cpp index 19754c0578..9c6513a87c 100644 --- a/modules/gpu/src/optical_flow.cpp +++ b/modules/gpu/src/optical_flow.cpp @@ -68,10 +68,7 @@ namespace namespace { - void outputHandler(const char* msg) - { - CV_Error(CV_GpuApiCallError, msg); - } + static void outputHandler(const std::string &msg) { CV_Error(CV_GpuApiCallError, msg.c_str()); } } void cv::gpu::BroxOpticalFlow::operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& s)