[+] Pixel types via templates

[+] Color conversions stub via pixel types
[+] Pyramid calculation (required for mipmaps in CUDA 4.1)
[~] Changed C strings to C++ throughout NCV
[~] Fixed a couple of bugs in NCV
pull/13383/head
Anton Obukhov 13 years ago
parent 0b192cb4ea
commit 5afb445283
  1. 2
      modules/gpu/src/cascadeclassifier.cpp
  2. 33
      modules/gpu/src/nvidia/core/NCV.cu
  3. 113
      modules/gpu/src/nvidia/core/NCV.hpp
  4. 96
      modules/gpu/src/nvidia/core/NCVColorConversion.hpp
  5. 350
      modules/gpu/src/nvidia/core/NCVPixelOperations.hpp
  6. 397
      modules/gpu/src/nvidia/core/NCVPyramid.cu
  7. 97
      modules/gpu/src/nvidia/core/NCVPyramid.hpp
  8. 5
      modules/gpu/src/optical_flow.cpp

@ -129,7 +129,7 @@ struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
private:
static void NCVDebugOutputHandler(const char* msg) { CV_Error(CV_GpuApiCallError, msg); }
static void NCVDebugOutputHandler(const std::string &msg) { CV_Error(CV_GpuApiCallError, msg.c_str()); }
NCVStatus load(const string& classifierFile)

@ -40,10 +40,9 @@
//M*/
#include <ios>
#include <stdarg.h>
#include <iostream>
#include <string>
#include <vector>
#include <cstdio>
#include "NCV.hpp"
using namespace std;
@ -56,24 +55,18 @@ using namespace std;
//==============================================================================
static void stdioDebugOutput(const char *msg)
static void stdDebugOutput(const string &msg)
{
printf("%s", msg);
cout << msg;
}
static NCVDebugOutputHandler *debugOutputHandler = stdioDebugOutput;
static NCVDebugOutputHandler *debugOutputHandler = stdDebugOutput;
void ncvDebugOutput(const char *msg, ...)
void ncvDebugOutput(const string &msg)
{
const int K_DEBUG_STRING_MAXLEN = 1024;
char buffer[K_DEBUG_STRING_MAXLEN];
va_list args;
va_start(args, msg);
vsnprintf(buffer, K_DEBUG_STRING_MAXLEN, msg, args);
va_end (args);
debugOutputHandler(buffer);
debugOutputHandler(msg);
}
@ -288,7 +281,7 @@ NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity,
allocBegin = NULL;
if (reusePtr == NULL)
if (reusePtr == NULL && capacity != 0)
{
bReusesMemory = false;
switch (memT)
@ -329,7 +322,7 @@ NCVMemStackAllocator::~NCVMemStackAllocator()
{
ncvAssertPrintCheck(currentSize == 0, "NCVMemStackAllocator dtor:: not all objects were deallocated properly, forcing destruction");
if (!bReusesMemory)
if (!bReusesMemory && (allocBegin != (Ncv8u *)(0x1)))
{
switch (_memType)
{
@ -355,7 +348,7 @@ NCVStatus NCVMemStackAllocator::alloc(NCVMemSegment &seg, size_t size)
seg.clear();
ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);
size = alignUp(static_cast<Ncv32u>(size), this->_alignment);
size = alignUp(size, this->_alignment);
this->currentSize += size;
this->_maxSize = std::max(this->_maxSize, this->currentSize);
@ -464,7 +457,7 @@ NCVStatus NCVMemNativeAllocator::alloc(NCVMemSegment &seg, size_t size)
break;
}
this->currentSize += alignUp(static_cast<Ncv32u>(size), this->_alignment);
this->currentSize += alignUp(size, this->_alignment);
this->_maxSize = std::max(this->_maxSize, this->currentSize);
seg.begin.memtype = this->_memType;
@ -480,8 +473,8 @@ NCVStatus NCVMemNativeAllocator::dealloc(NCVMemSegment &seg)
ncvAssertReturn(seg.begin.memtype == this->_memType, NCV_ALLOCATOR_BAD_DEALLOC);
ncvAssertReturn(seg.begin.ptr != NULL, NCV_ALLOCATOR_BAD_DEALLOC);
ncvAssertReturn(currentSize >= alignUp(static_cast<Ncv32u>(seg.size), this->_alignment), NCV_ALLOCATOR_BAD_DEALLOC);
currentSize -= alignUp(static_cast<Ncv32u>(seg.size), this->_alignment);
ncvAssertReturn(currentSize >= alignUp(seg.size, this->_alignment), NCV_ALLOCATOR_BAD_DEALLOC);
currentSize -= alignUp(seg.size, this->_alignment);
switch (this->_memType)
{

@ -42,7 +42,7 @@
#ifndef _ncv_hpp_
#define _ncv_hpp_
#if (defined WIN32 || defined _WIN32 || defined WINCE) && defined CVAPI_EXPORTS //&& !defined(__CUDACC__)
#if (defined WIN32 || defined _WIN32 || defined WINCE) && defined CVAPI_EXPORTS
#define NCV_EXPORTS __declspec(dllexport)
#else
#define NCV_EXPORTS
@ -53,6 +53,8 @@
#endif
#include <cuda_runtime.h>
#include <sstream>
#include <iostream>
//==============================================================================
@ -78,7 +80,7 @@ namespace NcvCTprep
}
#define NCV_CT_PREP_PASTE_AUX(a,b) a##b ///< Concatenation indirection macro
#define NCV_CT_PREP_PASTE_AUX(a,b) a##b ///< Concatenation indirection macro
#define NCV_CT_PREP_PASTE(a,b) NCV_CT_PREP_PASTE_AUX(a, b) ///< Concatenation macro
@ -181,6 +183,25 @@ struct NcvSize32u
Ncv32u height; ///< Rectangle height.
__host__ __device__ NcvSize32u() : width(0), height(0) {};
__host__ __device__ NcvSize32u(Ncv32u width, Ncv32u height) : width(width), height(height) {}
__host__ __device__ bool operator == (const NcvSize32u &another) const {return this->width == another.width && this->height == another.height;}
};
struct NcvPoint2D32s
{
Ncv32s x; ///< Point X.
Ncv32s y; ///< Point Y.
__host__ __device__ NcvPoint2D32s() : x(0), y(0) {};
__host__ __device__ NcvPoint2D32s(Ncv32s x, Ncv32s y) : x(x), y(y) {}
};
struct NcvPoint2D32u
{
Ncv32u x; ///< Point X.
Ncv32u y; ///< Point Y.
__host__ __device__ NcvPoint2D32u() : x(0), y(0) {};
__host__ __device__ NcvPoint2D32u(Ncv32u x, Ncv32u y) : x(x), y(y) {}
};
@ -199,6 +220,7 @@ NCV_CT_ASSERT(sizeof(NcvRect8u) == sizeof(Ncv32u));
NCV_CT_ASSERT(sizeof(NcvRect32s) == 4 * sizeof(Ncv32s));
NCV_CT_ASSERT(sizeof(NcvRect32u) == 4 * sizeof(Ncv32u));
NCV_CT_ASSERT(sizeof(NcvSize32u) == 2 * sizeof(Ncv32u));
NCV_CT_ASSERT(sizeof(NcvPoint2D32u) == 2 * sizeof(Ncv32u));
//==============================================================================
@ -219,49 +241,44 @@ const Ncv32u K_LOG2_WARP_SIZE = 5;
//==============================================================================
#define NCV_CT_PREP_STRINGIZE_AUX(x) #x
#define NCV_CT_PREP_STRINGIZE(x) NCV_CT_PREP_STRINGIZE_AUX(x)
NCV_EXPORTS void ncvDebugOutput(const std::string &msg);
NCV_EXPORTS void ncvDebugOutput(const char *msg, ...);
typedef void NCVDebugOutputHandler(const char* msg);
typedef void NCVDebugOutputHandler(const std::string &msg);
NCV_EXPORTS void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
#define ncvAssertPrintCheck(pred, msg) \
((pred) ? true : (ncvDebugOutput("\n%s\n", \
"NCV Assertion Failed: " msg ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__) \
), false))
#define ncvAssertPrintReturn(pred, msg, err) \
if (ncvAssertPrintCheck(pred, msg)) ; else return err
#define ncvAssertReturn(pred, err) \
do \
{ \
if (!(pred)) \
{ \
ncvDebugOutput("\n%s%d%s\n", "NCV Assertion Failed: retcode=", (int)err, ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \
return err; \
std::ostringstream oss; \
oss << "NCV Assertion Failed: " << msg << ", file=" << __FILE__ << ", line=" << __LINE__ << std::endl; \
ncvDebugOutput(oss.str()); \
} \
} while (0)
#define ncvAssertPrintReturn(pred, msg, err) \
do \
{ \
ncvAssertPrintCheck(pred, msg); \
if (!(pred)) return err; \
} while (0)
#define ncvAssertReturn(pred, err) \
ncvAssertPrintReturn(pred, "retcode=" << (int)err, err)
#define ncvAssertReturnNcvStat(ncvOp) \
do \
{ \
NCVStatus _ncvStat = ncvOp; \
if (NCV_SUCCESS != _ncvStat) \
{ \
ncvDebugOutput("\n%s%d%s\n", "NCV Assertion Failed: NcvStat=", (int)_ncvStat, ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \
return _ncvStat; \
} \
ncvAssertPrintReturn(NCV_SUCCESS==_ncvStat, "NcvStat=" << (int)_ncvStat, _ncvStat); \
} while (0)
@ -270,18 +287,14 @@ NCV_EXPORTS void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
{ \
cudaError_t resCall = cudacall; \
cudaError_t resGLE = cudaGetLastError(); \
if (cudaSuccess != resCall || cudaSuccess != resGLE) \
{ \
ncvDebugOutput("\n%s%d%s\n", "NCV CUDA Assertion Failed: cudaError_t=", (int)(resCall | resGLE), ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \
return errCode; \
} \
ncvAssertPrintReturn(cudaSuccess==resCall && cudaSuccess==resGLE, "cudaError_t=" << (int)(resCall | resGLE), errCode); \
} while (0)
/**
* Return-codes for status notification, errors and warnings
*/
enum NCVStatus
enum
{
//NCV statuses
NCV_SUCCESS,
@ -338,9 +351,14 @@ enum NCVStatus
NPPST_MEM_INSUFFICIENT_BUFFER, ///< Insufficient user-allocated buffer
NPPST_MEM_RESIDENCE_ERROR, ///< Memory residence error detected (check if pointers should be device or pinned)
NPPST_MEM_INTERNAL_ERROR, ///< Internal memory management error
NCV_LAST_STATUS ///< Marker to continue error numeration in other files
};
typedef Ncv32u NCVStatus;
#define NCV_SET_SKIP_COND(x) \
bool __ncv_skip_cond = x
@ -774,9 +792,20 @@ public:
return ncvStat;
}
T &at(Ncv32u x, Ncv32u y) const
{
if (x >= this->_width || y >= this->_height)
{
printf("Error addressing matrix at [%d, %d]\n", x, y);
return *this->_ptr;
}
return ((T *)((Ncv8u *)this->_ptr + y * this->_pitch))[x];
}
T *ptr() const {return this->_ptr;}
Ncv32u width() const {return this->_width;}
Ncv32u height() const {return this->_height;}
NcvSize32u size() const {return NcvSize32u(this->_width, this->_height);}
Ncv32u pitch() const {return this->_pitch;}
NCVMemoryType memType() const {return this->_memtype;}
@ -923,7 +952,7 @@ public:
this->_width = roi.width;
this->_height = roi.height;
this->_pitch = mat.pitch();
this->_ptr = mat.ptr() + roi.y * mat.stride() + roi.x;
this->_ptr = &mat.at(roi.x, roi.y);
this->_memtype = mat.memType();
this->bReused = true;
@ -962,4 +991,24 @@ NCV_EXPORTS NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst, Ncv32u dstStride, Ncv
NCV_EXPORTS NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
NcvRect32u *d_rects, Ncv32u numRects, Ncv32u color, cudaStream_t cuStream);
#define CLAMP(x,a,b) ( (x) > (b) ? (b) : ( (x) < (a) ? (a) : (x) ) )
#define CLAMP_TOP(x, a) (((x) > (a)) ? (a) : (x))
#define CLAMP_BOTTOM(x, a) (((x) < (a)) ? (a) : (x))
#define CLAMP_0_255(x) CLAMP(x,0,255)
#define SUB_BEGIN(type, name) struct { __inline type name
#define SUB_END(name) } name;
#define SUB_CALL(name) name.name
#define SQR(x) ((x)*(x))
#define ncvSafeMatAlloc(name, type, alloc, width, height, err) \
NCVMatrixAlloc<type> name(alloc, width, height); \
ncvAssertReturn(name.isMemAllocated(), err);
#endif // _ncv_hpp_

@ -0,0 +1,96 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef _ncv_color_conversion_hpp_
#define _ncv_color_conversion_hpp_
#include "NCVPixelOperations.hpp"
enum NCVColorSpace
{
NCVColorSpaceGray,
NCVColorSpaceRGBA,
};
template<NCVColorSpace CSin, NCVColorSpace CSout, typename Tin, typename Tout> struct __pixColorConv {
static void _pixColorConv(const Tin &pixIn, Tout &pixOut);
};
template<typename Tin, typename Tout> struct __pixColorConv<NCVColorSpaceRGBA, NCVColorSpaceGray, Tin, Tout> {
static void _pixColorConv(const Tin &pixIn, Tout &pixOut)
{
Ncv32f luma = 0.299f * pixIn.x + 0.587f * pixIn.y + 0.114f * pixIn.z;
_TDemoteClampNN(luma, pixOut.x);
}};
template<typename Tin, typename Tout> struct __pixColorConv<NCVColorSpaceGray, NCVColorSpaceRGBA, Tin, Tout> {
static void _pixColorConv(const Tin &pixIn, Tout &pixOut)
{
_TDemoteClampNN(pixIn.x, pixOut.x);
_TDemoteClampNN(pixIn.x, pixOut.y);
_TDemoteClampNN(pixIn.x, pixOut.z);
pixOut.w = 0;
}};
template<NCVColorSpace CSin, NCVColorSpace CSout, typename Tin, typename Tout>
static
NCVStatus _ncvColorConv_host(const NCVMatrix<Tin> &h_imgIn,
const NCVMatrix<Tout> &h_imgOut)
{
ncvAssertReturn(h_imgIn.size() == h_imgOut.size(), NCV_DIMENSIONS_INVALID);
ncvAssertReturn(h_imgIn.memType() == h_imgOut.memType() &&
(h_imgIn.memType() == NCVMemoryTypeHostPinned || h_imgIn.memType() == NCVMemoryTypeNone), NCV_MEM_RESIDENCE_ERROR);
NCV_SET_SKIP_COND(h_imgIn.memType() == NCVMemoryTypeNone);
NCV_SKIP_COND_BEGIN
for (Ncv32u i=0; i<h_imgIn.height(); i++)
{
for (Ncv32u j=0; j<h_imgIn.width(); j++)
{
__pixColorConv<CSin, CSout, Tin, Tout>::_pixColorConv(h_imgIn.at(j,i), h_imgOut.at(j,i));
}
}
NCV_SKIP_COND_END
return NCV_SUCCESS;
}
#endif //_ncv_color_conversion_hpp_

@ -0,0 +1,350 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef _ncv_pixel_operations_hpp_
#define _ncv_pixel_operations_hpp_
#include <limits.h>
#include <float.h>
#include "NCV.hpp"
template<typename TBase> inline TBase _pixMaxVal();
template<> static inline Ncv8u _pixMaxVal<Ncv8u>() {return UCHAR_MAX;}
template<> static inline Ncv16u _pixMaxVal<Ncv16u>() {return USHRT_MAX;}
template<> static inline Ncv32u _pixMaxVal<Ncv32u>() {return UINT_MAX;}
template<> static inline Ncv8s _pixMaxVal<Ncv8s>() {return CHAR_MAX;}
template<> static inline Ncv16s _pixMaxVal<Ncv16s>() {return SHRT_MAX;}
template<> static inline Ncv32s _pixMaxVal<Ncv32s>() {return INT_MAX;}
template<> static inline Ncv32f _pixMaxVal<Ncv32f>() {return FLT_MAX;}
template<> static inline Ncv64f _pixMaxVal<Ncv64f>() {return DBL_MAX;}
template<typename TBase> inline TBase _pixMinVal();
template<> static inline Ncv8u _pixMinVal<Ncv8u>() {return 0;}
template<> static inline Ncv16u _pixMinVal<Ncv16u>() {return 0;}
template<> static inline Ncv32u _pixMinVal<Ncv32u>() {return 0;}
template<> static inline Ncv8s _pixMinVal<Ncv8s>() {return CHAR_MIN;}
template<> static inline Ncv16s _pixMinVal<Ncv16s>() {return SHRT_MIN;}
template<> static inline Ncv32s _pixMinVal<Ncv32s>() {return INT_MIN;}
template<> static inline Ncv32f _pixMinVal<Ncv32f>() {return FLT_MIN;}
template<> static inline Ncv64f _pixMinVal<Ncv64f>() {return DBL_MIN;}
template<typename Tvec> struct TConvVec2Base;
template<> struct TConvVec2Base<uchar1> {typedef Ncv8u TBase;};
template<> struct TConvVec2Base<uchar3> {typedef Ncv8u TBase;};
template<> struct TConvVec2Base<uchar4> {typedef Ncv8u TBase;};
template<> struct TConvVec2Base<ushort1> {typedef Ncv16u TBase;};
template<> struct TConvVec2Base<ushort3> {typedef Ncv16u TBase;};
template<> struct TConvVec2Base<ushort4> {typedef Ncv16u TBase;};
template<> struct TConvVec2Base<uint1> {typedef Ncv32u TBase;};
template<> struct TConvVec2Base<uint3> {typedef Ncv32u TBase;};
template<> struct TConvVec2Base<uint4> {typedef Ncv32u TBase;};
template<> struct TConvVec2Base<float1> {typedef Ncv32f TBase;};
template<> struct TConvVec2Base<float3> {typedef Ncv32f TBase;};
template<> struct TConvVec2Base<float4> {typedef Ncv32f TBase;};
template<> struct TConvVec2Base<double1> {typedef Ncv64f TBase;};
template<> struct TConvVec2Base<double3> {typedef Ncv64f TBase;};
template<> struct TConvVec2Base<double4> {typedef Ncv64f TBase;};
#define NC(T) (sizeof(T) / sizeof(TConvVec2Base<T>::TBase))
template<typename TBase, Ncv32u NC> struct TConvBase2Vec;
template<> struct TConvBase2Vec<Ncv8u, 1> {typedef uchar1 TVec;};
template<> struct TConvBase2Vec<Ncv8u, 3> {typedef uchar3 TVec;};
template<> struct TConvBase2Vec<Ncv8u, 4> {typedef uchar4 TVec;};
template<> struct TConvBase2Vec<Ncv16u, 1> {typedef ushort1 TVec;};
template<> struct TConvBase2Vec<Ncv16u, 3> {typedef ushort3 TVec;};
template<> struct TConvBase2Vec<Ncv16u, 4> {typedef ushort4 TVec;};
template<> struct TConvBase2Vec<Ncv32u, 1> {typedef uint1 TVec;};
template<> struct TConvBase2Vec<Ncv32u, 3> {typedef uint3 TVec;};
template<> struct TConvBase2Vec<Ncv32u, 4> {typedef uint4 TVec;};
template<> struct TConvBase2Vec<Ncv32f, 1> {typedef float1 TVec;};
template<> struct TConvBase2Vec<Ncv32f, 3> {typedef float3 TVec;};
template<> struct TConvBase2Vec<Ncv32f, 4> {typedef float4 TVec;};
template<> struct TConvBase2Vec<Ncv64f, 1> {typedef double1 TVec;};
template<> struct TConvBase2Vec<Ncv64f, 3> {typedef double3 TVec;};
template<> struct TConvBase2Vec<Ncv64f, 4> {typedef double4 TVec;};
//TODO: consider using CUDA intrinsics to avoid branching
template<typename Tin> static inline void _TDemoteClampZ(Tin &a, Ncv8u &out) {out = (Ncv8u)CLAMP_0_255(a);};
template<typename Tin> static inline void _TDemoteClampZ(Tin &a, Ncv16u &out) {out = (Ncv16u)CLAMP(a, 0, USHRT_MAX);}
template<typename Tin> static inline void _TDemoteClampZ(Tin &a, Ncv32u &out) {out = (Ncv32u)CLAMP(a, 0, UINT_MAX);}
template<typename Tin> static inline void _TDemoteClampZ(Tin &a, Ncv32f &out) {out = (Ncv32f)a;}
//TODO: consider using CUDA intrinsics to avoid branching
template<typename Tin> static inline void _TDemoteClampNN(Tin &a, Ncv8u &out) {out = (Ncv8u)CLAMP_0_255(a+0.5f);}
template<typename Tin> static inline void _TDemoteClampNN(Tin &a, Ncv16u &out) {out = (Ncv16u)CLAMP(a+0.5f, 0, USHRT_MAX);}
template<typename Tin> static inline void _TDemoteClampNN(Tin &a, Ncv32u &out) {out = (Ncv32u)CLAMP(a+0.5f, 0, UINT_MAX);}
template<typename Tin> static inline void _TDemoteClampNN(Tin &a, Ncv32f &out) {out = (Ncv32f)a;}
template<typename Tout> inline Tout _pixMakeZero();
template<> static inline uchar1 _pixMakeZero<uchar1>() {return make_uchar1(0);}
template<> static inline uchar3 _pixMakeZero<uchar3>() {return make_uchar3(0,0,0);}
template<> static inline uchar4 _pixMakeZero<uchar4>() {return make_uchar4(0,0,0,0);}
template<> static inline ushort1 _pixMakeZero<ushort1>() {return make_ushort1(0);}
template<> static inline ushort3 _pixMakeZero<ushort3>() {return make_ushort3(0,0,0);}
template<> static inline ushort4 _pixMakeZero<ushort4>() {return make_ushort4(0,0,0,0);}
template<> static inline uint1 _pixMakeZero<uint1>() {return make_uint1(0);}
template<> static inline uint3 _pixMakeZero<uint3>() {return make_uint3(0,0,0);}
template<> static inline uint4 _pixMakeZero<uint4>() {return make_uint4(0,0,0,0);}
template<> static inline float1 _pixMakeZero<float1>() {return make_float1(0.f);}
template<> static inline float3 _pixMakeZero<float3>() {return make_float3(0.f,0.f,0.f);}
template<> static inline float4 _pixMakeZero<float4>() {return make_float4(0.f,0.f,0.f,0.f);}
template<> static inline double1 _pixMakeZero<double1>() {return make_double1(0.);}
template<> static inline double3 _pixMakeZero<double3>() {return make_double3(0.,0.,0.);}
template<> static inline double4 _pixMakeZero<double4>() {return make_double4(0.,0.,0.,0.);}
static inline uchar1 _pixMake(Ncv8u x) {return make_uchar1(x);}
static inline uchar3 _pixMake(Ncv8u x, Ncv8u y, Ncv8u z) {return make_uchar3(x,y,z);}
static inline uchar4 _pixMake(Ncv8u x, Ncv8u y, Ncv8u z, Ncv8u w) {return make_uchar4(x,y,z,w);}
static inline ushort1 _pixMake(Ncv16u x) {return make_ushort1(x);}
static inline ushort3 _pixMake(Ncv16u x, Ncv16u y, Ncv16u z) {return make_ushort3(x,y,z);}
static inline ushort4 _pixMake(Ncv16u x, Ncv16u y, Ncv16u z, Ncv16u w) {return make_ushort4(x,y,z,w);}
static inline uint1 _pixMake(Ncv32u x) {return make_uint1(x);}
static inline uint3 _pixMake(Ncv32u x, Ncv32u y, Ncv32u z) {return make_uint3(x,y,z);}
static inline uint4 _pixMake(Ncv32u x, Ncv32u y, Ncv32u z, Ncv32u w) {return make_uint4(x,y,z,w);}
static inline float1 _pixMake(Ncv32f x) {return make_float1(x);}
static inline float3 _pixMake(Ncv32f x, Ncv32f y, Ncv32f z) {return make_float3(x,y,z);}
static inline float4 _pixMake(Ncv32f x, Ncv32f y, Ncv32f z, Ncv32f w) {return make_float4(x,y,z,w);}
static inline double1 _pixMake(Ncv64f x) {return make_double1(x);}
static inline double3 _pixMake(Ncv64f x, Ncv64f y, Ncv64f z) {return make_double3(x,y,z);}
static inline double4 _pixMake(Ncv64f x, Ncv64f y, Ncv64f z, Ncv64f w) {return make_double4(x,y,z,w);}
template<typename Tin, typename Tout, Ncv32u CN> struct __pixDemoteClampZ_CN {static Tout _pixDemoteClampZ_CN(Tin &pix);};
template<typename Tin, typename Tout> struct __pixDemoteClampZ_CN<Tin, Tout, 1> {
static Tout _pixDemoteClampZ_CN(Tin &pix)
{
Tout out;
_TDemoteClampZ(pix.x, out.x);
return out;
}};
template<typename Tin, typename Tout> struct __pixDemoteClampZ_CN<Tin, Tout, 3> {
static Tout _pixDemoteClampZ_CN(Tin &pix)
{
Tout out;
_TDemoteClampZ(pix.x, out.x);
_TDemoteClampZ(pix.y, out.y);
_TDemoteClampZ(pix.z, out.z);
return out;
}};
template<typename Tin, typename Tout> struct __pixDemoteClampZ_CN<Tin, Tout, 4> {
static Tout _pixDemoteClampZ_CN(Tin &pix)
{
Tout out;
_TDemoteClampZ(pix.x, out.x);
_TDemoteClampZ(pix.y, out.y);
_TDemoteClampZ(pix.z, out.z);
_TDemoteClampZ(pix.w, out.w);
return out;
}};
template<typename Tin, typename Tout> static inline Tout _pixDemoteClampZ(Tin &pix)
{
return __pixDemoteClampZ_CN<Tin, Tout, NC(Tin)>::_pixDemoteClampZ_CN(pix);
}
template<typename Tin, typename Tout, Ncv32u CN> struct __pixDemoteClampNN_CN {static Tout _pixDemoteClampNN_CN(Tin &pix);};
template<typename Tin, typename Tout> struct __pixDemoteClampNN_CN<Tin, Tout, 1> {
static Tout _pixDemoteClampNN_CN(Tin &pix)
{
Tout out;
_TDemoteClampNN(pix.x, out.x);
return out;
}};
template<typename Tin, typename Tout> struct __pixDemoteClampNN_CN<Tin, Tout, 3> {
static Tout _pixDemoteClampNN_CN(Tin &pix)
{
Tout out;
_TDemoteClampNN(pix.x, out.x);
_TDemoteClampNN(pix.y, out.y);
_TDemoteClampNN(pix.z, out.z);
return out;
}};
template<typename Tin, typename Tout> struct __pixDemoteClampNN_CN<Tin, Tout, 4> {
static Tout _pixDemoteClampNN_CN(Tin &pix)
{
Tout out;
_TDemoteClampNN(pix.x, out.x);
_TDemoteClampNN(pix.y, out.y);
_TDemoteClampNN(pix.z, out.z);
_TDemoteClampNN(pix.w, out.w);
return out;
}};
template<typename Tin, typename Tout> static inline Tout _pixDemoteClampNN(Tin &pix)
{
return __pixDemoteClampNN_CN<Tin, Tout, NC(Tin)>::_pixDemoteClampNN_CN(pix);
}
template<typename Tin, typename Tout, typename Tw, Ncv32u CN> struct __pixScale_CN {static Tout _pixScale_CN(Tin &pix, Tw w);};
template<typename Tin, typename Tout, typename Tw> struct __pixScale_CN<Tin, Tout, Tw, 1> {
static Tout _pixScale_CN(Tin &pix, Tw w)
{
Tout out;
typedef typename TConvVec2Base<Tout>::TBase TBout;
out.x = (TBout)(pix.x * w);
return out;
}};
template<typename Tin, typename Tout, typename Tw> struct __pixScale_CN<Tin, Tout, Tw, 3> {
static Tout _pixScale_CN(Tin &pix, Tw w)
{
Tout out;
typedef typename TConvVec2Base<Tout>::TBase TBout;
out.x = (TBout)(pix.x * w);
out.y = (TBout)(pix.y * w);
out.z = (TBout)(pix.z * w);
return out;
}};
template<typename Tin, typename Tout, typename Tw> struct __pixScale_CN<Tin, Tout, Tw, 4> {
static Tout _pixScale_CN(Tin &pix, Tw w)
{
Tout out;
typedef typename TConvVec2Base<Tout>::TBase TBout;
out.x = (TBout)(pix.x * w);
out.y = (TBout)(pix.y * w);
out.z = (TBout)(pix.z * w);
out.w = (TBout)(pix.w * w);
return out;
}};
template<typename Tin, typename Tout, typename Tw> static Tout _pixScale(Tin &pix, Tw w)
{
return __pixScale_CN<Tin, Tout, Tw, NC(Tin)>::_pixScale_CN(pix, w);
}
template<typename Tin, typename Tout, Ncv32u CN> struct __pixAdd_CN {static Tout _pixAdd_CN(Tout &pix1, Tin &pix2);};
template<typename Tin, typename Tout> struct __pixAdd_CN<Tin, Tout, 1> {
static Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
{
Tout out;
out.x = pix1.x + pix2.x;
return out;
}};
template<typename Tin, typename Tout> struct __pixAdd_CN<Tin, Tout, 3> {
static Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
{
Tout out;
out.x = pix1.x + pix2.x;
out.y = pix1.y + pix2.y;
out.z = pix1.z + pix2.z;
return out;
}};
template<typename Tin, typename Tout> struct __pixAdd_CN<Tin, Tout, 4> {
static Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
{
Tout out;
out.x = pix1.x + pix2.x;
out.y = pix1.y + pix2.y;
out.z = pix1.z + pix2.z;
out.w = pix1.w + pix2.w;
return out;
}};
template<typename Tin, typename Tout> static Tout _pixAdd(Tout &pix1, Tin &pix2)
{
return __pixAdd_CN<Tin, Tout, NC(Tin)>::_pixAdd_CN(pix1, pix2);
}
template<typename Tin, typename Tout, Ncv32u CN> struct __pixDist_CN {static Tout _pixDist_CN(Tin &pix1, Tin &pix2);};
template<typename Tin, typename Tout> struct __pixDist_CN<Tin, Tout, 1> {
static Tout _pixDist_CN(Tin &pix1, Tin &pix2)
{
return Tout(SQR(pix1.x - pix2.x));
}};
template<typename Tin, typename Tout> struct __pixDist_CN<Tin, Tout, 3> {
static Tout _pixDist_CN(Tin &pix1, Tin &pix2)
{
return Tout(SQR(pix1.x - pix2.x) + SQR(pix1.y - pix2.y) + SQR(pix1.z - pix2.z));
}};
template<typename Tin, typename Tout> struct __pixDist_CN<Tin, Tout, 4> {
static Tout _pixDist_CN(Tin &pix1, Tin &pix2)
{
return Tout(SQR(pix1.x - pix2.x) + SQR(pix1.y - pix2.y) + SQR(pix1.z - pix2.z) + SQR(pix1.w - pix2.w));
}};
template<typename Tin, typename Tout> static Tout _pixDist(Tin &pix1, Tin &pix2)
{
return __pixDist_CN<Tin, Tout, NC(Tin)>::_pixDist_CN(pix1, pix2);
}
template <typename T> struct TAccPixWeighted;
template<> struct TAccPixWeighted<uchar1> {typedef double1 type;};
template<> struct TAccPixWeighted<uchar3> {typedef double3 type;};
template<> struct TAccPixWeighted<uchar4> {typedef double4 type;};
template<> struct TAccPixWeighted<ushort1> {typedef double1 type;};
template<> struct TAccPixWeighted<ushort3> {typedef double3 type;};
template<> struct TAccPixWeighted<ushort4> {typedef double4 type;};
template<> struct TAccPixWeighted<float1> {typedef double1 type;};
template<> struct TAccPixWeighted<float3> {typedef double3 type;};
template<> struct TAccPixWeighted<float4> {typedef double4 type;};
template<typename Tfrom> struct TAccPixDist {};
template<> struct TAccPixDist<uchar1> {typedef Ncv32u type;};
template<> struct TAccPixDist<uchar3> {typedef Ncv32u type;};
template<> struct TAccPixDist<uchar4> {typedef Ncv32u type;};
template<> struct TAccPixDist<ushort1> {typedef Ncv32u type;};
template<> struct TAccPixDist<ushort3> {typedef Ncv32u type;};
template<> struct TAccPixDist<ushort4> {typedef Ncv32u type;};
template<> struct TAccPixDist<float1> {typedef Ncv32f type;};
template<> struct TAccPixDist<float3> {typedef Ncv32f type;};
template<> struct TAccPixDist<float4> {typedef Ncv32f type;};
#endif //_ncv_pixel_operations_hpp_

@ -0,0 +1,397 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include <cuda_runtime.h>
#include <stdio.h>
#include "NCV.hpp"
#include "NCVPyramid.hpp"
#include "NCVPixelOperations.hpp"
template<typename T, Ncv32u CN> struct __average4_CN {static T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11);};
template<typename T> struct __average4_CN<T, 1> {
static T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11)
{
T out;
out.x = ((Ncv32s)p00.x + p01.x + p10.x + p11.x + 2) / 4;
return out;
}};
template<> struct __average4_CN<float1, 1> {
static float1 _average4_CN(const float1 &p00, const float1 &p01, const float1 &p10, const float1 &p11)
{
float1 out;
out.x = (p00.x + p01.x + p10.x + p11.x) / 4;
return out;
}};
template<> struct __average4_CN<double1, 1> {
static double1 _average4_CN(const double1 &p00, const double1 &p01, const double1 &p10, const double1 &p11)
{
double1 out;
out.x = (p00.x + p01.x + p10.x + p11.x) / 4;
return out;
}};
template<typename T> struct __average4_CN<T, 3> {
static T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11)
{
T out;
out.x = ((Ncv32s)p00.x + p01.x + p10.x + p11.x + 2) / 4;
out.y = ((Ncv32s)p00.y + p01.y + p10.y + p11.y + 2) / 4;
out.z = ((Ncv32s)p00.z + p01.z + p10.z + p11.z + 2) / 4;
return out;
}};
template<> struct __average4_CN<float3, 3> {
static float3 _average4_CN(const float3 &p00, const float3 &p01, const float3 &p10, const float3 &p11)
{
float3 out;
out.x = (p00.x + p01.x + p10.x + p11.x) / 4;
out.y = (p00.y + p01.y + p10.y + p11.y) / 4;
out.z = (p00.z + p01.z + p10.z + p11.z) / 4;
return out;
}};
template<> struct __average4_CN<double3, 3> {
static double3 _average4_CN(const double3 &p00, const double3 &p01, const double3 &p10, const double3 &p11)
{
double3 out;
out.x = (p00.x + p01.x + p10.x + p11.x) / 4;
out.y = (p00.y + p01.y + p10.y + p11.y) / 4;
out.z = (p00.z + p01.z + p10.z + p11.z) / 4;
return out;
}};
template<typename T> struct __average4_CN<T, 4> {
static T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11)
{
T out;
out.x = ((Ncv32s)p00.x + p01.x + p10.x + p11.x + 2) / 4;
out.y = ((Ncv32s)p00.y + p01.y + p10.y + p11.y + 2) / 4;
out.z = ((Ncv32s)p00.z + p01.z + p10.z + p11.z + 2) / 4;
out.w = ((Ncv32s)p00.w + p01.w + p10.w + p11.w + 2) / 4;
return out;
}};
template<> struct __average4_CN<float4, 4> {
static float4 _average4_CN(const float4 &p00, const float4 &p01, const float4 &p10, const float4 &p11)
{
float4 out;
out.x = (p00.x + p01.x + p10.x + p11.x) / 4;
out.y = (p00.y + p01.y + p10.y + p11.y) / 4;
out.z = (p00.z + p01.z + p10.z + p11.z) / 4;
out.w = (p00.w + p01.w + p10.w + p11.w) / 4;
return out;
}};
template<> struct __average4_CN<double4, 4> {
static double4 _average4_CN(const double4 &p00, const double4 &p01, const double4 &p10, const double4 &p11)
{
double4 out;
out.x = (p00.x + p01.x + p10.x + p11.x) / 4;
out.y = (p00.y + p01.y + p10.y + p11.y) / 4;
out.z = (p00.z + p01.z + p10.z + p11.z) / 4;
out.w = (p00.w + p01.w + p10.w + p11.w) / 4;
return out;
}};
template<typename T> static T _average4(const T &p00, const T &p01, const T &p10, const T &p11)
{
return __average4_CN<T, NC(T)>::_average4_CN(p00, p01, p10, p11);
}
template<typename Tin, typename Tout, Ncv32u CN> struct __lerp_CN {static Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d);};
template<typename Tin, typename Tout> struct __lerp_CN<Tin, Tout, 1> {
static Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d)
{
typedef typename TConvVec2Base<Tout>::TBase TB;
return _pixMake(TB(b.x * d + a.x * (1 - d)));
}};
template<typename Tin, typename Tout> struct __lerp_CN<Tin, Tout, 3> {
static Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d)
{
typedef typename TConvVec2Base<Tout>::TBase TB;
return _pixMake(TB(b.x * d + a.x * (1 - d)),
TB(b.y * d + a.y * (1 - d)),
TB(b.z * d + a.z * (1 - d)));
}};
template<typename Tin, typename Tout> struct __lerp_CN<Tin, Tout, 4> {
static Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d)
{
typedef typename TConvVec2Base<Tout>::TBase TB;
return _pixMake(TB(b.x * d + a.x * (1 - d)),
TB(b.y * d + a.y * (1 - d)),
TB(b.z * d + a.z * (1 - d)),
TB(b.w * d + a.w * (1 - d)));
}};
template<typename Tin, typename Tout> static Tout _lerp(const Tin &a, const Tin &b, Ncv32f d)
{
return __lerp_CN<Tin, Tout, NC(Tin)>::_lerp_CN(a, b, d);
}
template<typename T>
static T _interpLinear(const T &a, const T &b, Ncv32f d)
{
typedef typename TConvBase2Vec<Ncv32f, NC(T)>::TVec TVFlt;
TVFlt tmp = _lerp<T, TVFlt>(a, b, d);
return _pixDemoteClampZ<TVFlt, T>(tmp);
}
template<typename T>
static T _interpBilinear(const NCVMatrix<T> &refLayer, Ncv32f x, Ncv32f y)
{
Ncv32u xl = (Ncv32u)x;
Ncv32u xh = xl+1;
Ncv32f dx = x - xl;
Ncv32u yl = (Ncv32u)y;
Ncv32u yh = yl+1;
Ncv32f dy = y - yl;
T p00, p01, p10, p11;
p00 = refLayer.at(xl, yl);
p01 = xh < refLayer.width() ? refLayer.at(xh, yl) : p00;
p10 = yh < refLayer.height() ? refLayer.at(xl, yh) : p00;
p11 = (xh < refLayer.width() && yh < refLayer.height()) ? refLayer.at(xh, yh) : p00;
typedef typename TConvBase2Vec<Ncv32f, NC(T)>::TVec TVFlt;
TVFlt m_00_01 = _lerp<T, TVFlt>(p00, p01, dx);
TVFlt m_10_11 = _lerp<T, TVFlt>(p10, p11, dx);
TVFlt mixture = _lerp<TVFlt, TVFlt>(m_00_01, m_10_11, dy);
return _pixDemoteClampZ<TVFlt, T>(mixture);
}
template <class T>
NCVImagePyramid<T>::NCVImagePyramid(const NCVMatrix<T> &img,
Ncv8u numLayers,
INCVMemAllocator &alloc,
cudaStream_t cuStream)
{
this->_isInitialized = false;
ncvAssertPrintReturn(img.memType() == alloc.memType(), "NCVImagePyramid_host::ctor error", );
this->layer0 = &img;
NcvSize32u szLastLayer(img.width(), img.height());
this->nLayers = 1;
NCV_SET_SKIP_COND(alloc.isCounting());
NcvBool bDeviceCode = alloc.memType() == NCVMemoryTypeDevice;
if (numLayers == 0)
{
numLayers = 255; //it will cut-off when any of the dimensions goes 1
}
for (Ncv32u i=0; i<(Ncv32u)numLayers-1; i++)
{
NcvSize32u szCurLayer(szLastLayer.width / 2, szLastLayer.height / 2);
if (szCurLayer.width == 0 || szCurLayer.height == 0)
{
break;
}
this->pyramid.push_back(new NCVMatrixAlloc<T>(alloc, szCurLayer.width, szCurLayer.height));
ncvAssertPrintReturn(((NCVMatrixAlloc<T> *)(this->pyramid[i]))->isMemAllocated(), "NCVImagePyramid_host::ctor error", );
this->nLayers++;
//fill in the layer
NCV_SKIP_COND_BEGIN
const NCVMatrix<T> *prevLayer = i == 0 ? this->layer0 : this->pyramid[i-1];
NCVMatrix<T> *curLayer = this->pyramid[i];
if (bDeviceCode)
{
//TODO: in cuStream
}
else
{
for (Ncv32u i=0; i<szCurLayer.height; i++)
{
for (Ncv32u j=0; j<szCurLayer.width; j++)
{
T p00 = prevLayer->at(2*j+0, 2*i+0);
T p01 = prevLayer->at(2*j+1, 2*i+0);
T p10 = prevLayer->at(2*j+0, 2*i+1);
T p11 = prevLayer->at(2*j+1, 2*i+1);
curLayer->at(j, i) = _average4(p00, p01, p10, p11);
}
}
}
NCV_SKIP_COND_END
szLastLayer = szCurLayer;
}
this->_isInitialized = true;
}
template <class T>
NCVImagePyramid<T>::~NCVImagePyramid()
{
}
template <class T>
NcvBool NCVImagePyramid<T>::isInitialized() const
{
return this->_isInitialized;
}
template <class T>
NCVStatus NCVImagePyramid<T>::getLayer(NCVMatrix<T> &outImg,
NcvSize32u outRoi,
NcvBool bTrilinear,
cudaStream_t cuStream) const
{
ncvAssertReturn(this->isInitialized(), NCV_UNKNOWN_ERROR);
ncvAssertReturn(outImg.memType() == this->layer0->memType(), NCV_MEM_RESIDENCE_ERROR);
ncvAssertReturn(outRoi.width <= this->layer0->width() && outRoi.height <= this->layer0->height() &&
outRoi.width > 0 && outRoi.height > 0, NCV_DIMENSIONS_INVALID);
if (outRoi.width == this->layer0->width() && outRoi.height == this->layer0->height())
{
ncvAssertReturnNcvStat(this->layer0->copy2D(outImg, NcvSize32u(this->layer0->width(), this->layer0->height()), cuStream));
return NCV_SUCCESS;
}
Ncv32f lastScale = 1.0f;
Ncv32f curScale;
const NCVMatrix<T> *lastLayer = this->layer0;
const NCVMatrix<T> *curLayer = NULL;
NcvBool bUse2Refs = false;
for (Ncv32u i=0; i<this->nLayers-1; i++)
{
curScale = lastScale * 0.5f;
curLayer = this->pyramid[i];
if (outRoi.width == curLayer->width() && outRoi.height == curLayer->height())
{
ncvAssertReturnNcvStat(this->pyramid[i]->copy2D(outImg, NcvSize32u(this->pyramid[i]->width(), this->pyramid[i]->height()), cuStream));
return NCV_SUCCESS;
}
if (outRoi.width >= curLayer->width() && outRoi.height >= curLayer->height())
{
if (outRoi.width < lastLayer->width() && outRoi.height < lastLayer->height())
{
bUse2Refs = true;
}
break;
}
lastScale = curScale;
lastLayer = curLayer;
}
bUse2Refs = bUse2Refs && bTrilinear;
NCV_SET_SKIP_COND(outImg.memType() == NCVMemoryTypeNone);
NcvBool bDeviceCode = this->layer0->memType() == NCVMemoryTypeDevice;
NCV_SKIP_COND_BEGIN
if (bDeviceCode)
{
//TODO: in cuStream
}
else
{
for (Ncv32u i=0; i<outRoi.height; i++)
{
for (Ncv32u j=0; j<outRoi.width; j++)
{
//top layer pixel (always exists)
NcvSize32u szTopLayer(lastLayer->width(), lastLayer->height());
Ncv32f ptTopX = 1.0f * (szTopLayer.width - 1) * j / (outRoi.width - 1);
Ncv32f ptTopY = 1.0f * (szTopLayer.height - 1) * i / (outRoi.height - 1);
T topPix = _interpBilinear(*lastLayer, ptTopX, ptTopY);
T trilinearPix = topPix;
if (bUse2Refs)
{
//bottom layer pixel (exists only if the requested scale is greater than the smallest layer scale)
NcvSize32u szBottomLayer(curLayer->width(), curLayer->height());
Ncv32f ptBottomX = 1.0f * (szBottomLayer.width - 1) * j / (outRoi.width - 1);
Ncv32f ptBottomY = 1.0f * (szBottomLayer.height - 1) * i / (outRoi.height - 1);
T bottomPix = _interpBilinear(*curLayer, ptBottomX, ptBottomY);
Ncv32f scale = (1.0f * outRoi.width / layer0->width() + 1.0f * outRoi.height / layer0->height()) / 2;
Ncv32f dl = (scale - curScale) / (lastScale - curScale);
dl = CLAMP(dl, 0.0f, 1.0f);
trilinearPix = _interpLinear(bottomPix, topPix, dl);
}
outImg.at(j, i) = trilinearPix;
}
}
}
NCV_SKIP_COND_END
return NCV_SUCCESS;
}
template class NCVImagePyramid<uchar1>;
template class NCVImagePyramid<uchar3>;
template class NCVImagePyramid<uchar4>;
template class NCVImagePyramid<ushort1>;
template class NCVImagePyramid<ushort3>;
template class NCVImagePyramid<ushort4>;
template class NCVImagePyramid<uint1>;
template class NCVImagePyramid<uint3>;
template class NCVImagePyramid<uint4>;
template class NCVImagePyramid<float1>;
template class NCVImagePyramid<float3>;
template class NCVImagePyramid<float4>;

@ -0,0 +1,97 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef _ncvpyramid_hpp_
#define _ncvpyramid_hpp_
#include <memory>
#include <vector>
#include "NCV.hpp"
template <class T>
class NCV_EXPORTS NCVMatrixStack
{
public:
NCVMatrixStack() {this->_arr.clear();}
~NCVMatrixStack()
{
const Ncv32u nElem = this->_arr.size();
for (Ncv32u i=0; i<nElem; i++)
{
pop_back();
}
}
void push_back(NCVMatrix<T> *elem) {this->_arr.push_back(std::tr1::shared_ptr< NCVMatrix<T> >(elem));}
void pop_back() {this->_arr.pop_back();}
NCVMatrix<T> * operator [] (int i) const {return this->_arr[i].get();}
private:
std::vector< std::tr1::shared_ptr< NCVMatrix<T> > > _arr;
};
template <class T>
class NCV_EXPORTS NCVImagePyramid
{
public:
NCVImagePyramid(const NCVMatrix<T> &img,
Ncv8u nLayers,
INCVMemAllocator &alloc,
cudaStream_t cuStream);
~NCVImagePyramid();
NcvBool isInitialized() const;
NCVStatus getLayer(NCVMatrix<T> &outImg,
NcvSize32u outRoi,
NcvBool bTrilinear,
cudaStream_t cuStream) const;
private:
NcvBool _isInitialized;
const NCVMatrix<T> *layer0;
NCVMatrixStack<T> pyramid;
Ncv32u nLayers;
};
#endif //_ncvpyramid_hpp_

@ -68,10 +68,7 @@ namespace
namespace
{
void outputHandler(const char* msg)
{
CV_Error(CV_GpuApiCallError, msg);
}
static void outputHandler(const std::string &msg) { CV_Error(CV_GpuApiCallError, msg.c_str()); }
}
void cv::gpu::BroxOpticalFlow::operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& s)

Loading…
Cancel
Save