added ImagePyramid class to gpu module

pull/13383/head
Vladislav Vinogradov 13 years ago
parent 92a682093a
commit b398ac7a96
  1. 26
      modules/gpu/include/opencv2/gpu/gpu.hpp
  2. 133
      modules/gpu/src/imgproc.cpp
  3. 65
      modules/gpu/src/nvidia/core/NCVPyramid.cu
  4. 6
      modules/gpu/src/nvidia/core/NCVPyramid.hpp

@ -826,6 +826,32 @@ struct CV_EXPORTS CannyBuf
Ptr<FilterEngine_GPU> filterDX, filterDY;
};
class CV_EXPORTS ImagePyramid
{
public:
inline ImagePyramid() : nLayers_(0) {}
inline ImagePyramid(const GpuMat& img, int nLayers, Stream& stream = Stream::Null())
{
build(img, nLayers, stream);
}
void build(const GpuMat& img, int nLayers, Stream& stream = Stream::Null());
void getLayer(GpuMat& outImg, Size outRoi, Stream& stream = Stream::Null()) const;
inline void release()
{
layer0_.release();
pyramid_.clear();
nLayers_ = 0;
}
private:
GpuMat layer0_;
std::vector<GpuMat> pyramid_;
int nLayers_;
};
////////////////////////////// Matrix reductions //////////////////////////////
//! computes mean value and standard deviation of all or selected array elements

@ -101,6 +101,8 @@ void cv::gpu::Canny(const GpuMat&, const GpuMat&, CannyBuf&, GpuMat&, double, do
cv::gpu::CannyBuf::CannyBuf(const GpuMat&, const GpuMat&) { throw_nogpu(); }
void cv::gpu::CannyBuf::create(const Size&, int) { throw_nogpu(); }
void cv::gpu::CannyBuf::release() { throw_nogpu(); }
void cv::gpu::ImagePyramid::build(const GpuMat&, int, Stream&) { throw_nogpu(); }
void cv::gpu::ImagePyramid::getLayer(GpuMat&, Size, Stream&) const { throw_nogpu(); }
#else /* !defined (HAVE_CUDA) */
@ -2017,6 +2019,137 @@ void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& d
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
}
//////////////////////////////////////////////////////////////////////////////
// ImagePyramid
namespace cv { namespace gpu { namespace device
{
namespace pyramid
{
template <typename T> void kernelDownsampleX2_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template <typename T> void kernelInterpolateFrom1_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
}
}}}
void cv::gpu::ImagePyramid::build(const GpuMat& img, int numLayers, Stream& stream)
{
#ifdef _WIN32
using namespace cv::gpu::device::pyramid;
typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
static const func_t funcs[7][4] =
{
{kernelDownsampleX2_gpu<uchar1>, /*kernelDownsampleX2_gpu<uchar2>*/ 0, kernelDownsampleX2_gpu<uchar3>, kernelDownsampleX2_gpu<uchar4>},
{/*kernelDownsampleX2_gpu<char1>*/0, /*kernelDownsampleX2_gpu<char2>*/ 0, /*kernelDownsampleX2_gpu<char3>*/ 0, /*kernelDownsampleX2_gpu<char4>*/ 0},
{kernelDownsampleX2_gpu<ushort1>, /*kernelDownsampleX2_gpu<ushort2>*/ 0, kernelDownsampleX2_gpu<ushort3>, kernelDownsampleX2_gpu<ushort4>},
{/*kernelDownsampleX2_gpu<short1>*/ 0, /*kernelDownsampleX2_gpu<short2>*/ 0, /*kernelDownsampleX2_gpu<short3>*/ 0, /*kernelDownsampleX2_gpu<short4>*/ 0},
{/*kernelDownsampleX2_gpu<int1>*/ 0, /*kernelDownsampleX2_gpu<int2>*/ 0, /*kernelDownsampleX2_gpu<int3>*/ 0, /*kernelDownsampleX2_gpu<int4>*/ 0},
{kernelDownsampleX2_gpu<float1>, /*kernelDownsampleX2_gpu<float2>*/ 0, kernelDownsampleX2_gpu<float3>, kernelDownsampleX2_gpu<float4>},
{/*kernelDownsampleX2_gpu<double1>*/ 0, /*kernelDownsampleX2_gpu<double2>*/ 0, /*kernelDownsampleX2_gpu<double3>*/ 0, /*kernelDownsampleX2_gpu<double4>*/ 0}
};
CV_Assert(img.channels() == 1 || img.channels() == 3 || img.channels() == 4);
CV_Assert(img.depth() == CV_8U || img.depth() == CV_16U || img.depth() == CV_32F);
layer0_ = img;
Size szLastLayer = img.size();
nLayers_ = 1;
if (numLayers <= 0)
numLayers = 255; //it will cut-off when any of the dimensions goes 1
pyramid_.resize(numLayers);
for (int i = 0; i < numLayers - 1; ++i)
{
Size szCurLayer(szLastLayer.width / 2, szLastLayer.height / 2);
if (szCurLayer.width == 0 || szCurLayer.height == 0)
break;
ensureSizeIsEnough(szCurLayer, img.type(), pyramid_[i]);
nLayers_++;
const GpuMat& prevLayer = i == 0 ? layer0_ : pyramid_[i - 1];
func_t func = funcs[img.depth()][img.channels() - 1];
CV_Assert(func != 0);
func(prevLayer, pyramid_[i], StreamAccessor::getStream(stream));
szLastLayer = szCurLayer;
}
#else
throw_nogpu();
#endif
}
void cv::gpu::ImagePyramid::getLayer(GpuMat& outImg, Size outRoi, Stream& stream) const
{
#ifdef _WIN32
using namespace cv::gpu::device::pyramid;
typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
static const func_t funcs[7][4] =
{
{kernelInterpolateFrom1_gpu<uchar1>, /*kernelInterpolateFrom1_gpu<uchar2>*/ 0, kernelInterpolateFrom1_gpu<uchar3>, kernelInterpolateFrom1_gpu<uchar4>},
{/*kernelInterpolateFrom1_gpu<char1>*/0, /*kernelInterpolateFrom1_gpu<char2>*/ 0, /*kernelInterpolateFrom1_gpu<char3>*/ 0, /*kernelInterpolateFrom1_gpu<char4>*/ 0},
{kernelInterpolateFrom1_gpu<ushort1>, /*kernelInterpolateFrom1_gpu<ushort2>*/ 0, kernelInterpolateFrom1_gpu<ushort3>, kernelInterpolateFrom1_gpu<ushort4>},
{/*kernelInterpolateFrom1_gpu<short1>*/ 0, /*kernelInterpolateFrom1_gpu<short2>*/ 0, /*kernelInterpolateFrom1_gpu<short3>*/ 0, /*kernelInterpolateFrom1_gpu<short4>*/ 0},
{/*kernelInterpolateFrom1_gpu<int1>*/ 0, /*kernelInterpolateFrom1_gpu<int2>*/ 0, /*kernelInterpolateFrom1_gpu<int3>*/ 0, /*kernelInterpolateFrom1_gpu<int4>*/ 0},
{kernelInterpolateFrom1_gpu<float1>, /*kernelInterpolateFrom1_gpu<float2>*/ 0, kernelInterpolateFrom1_gpu<float3>, kernelInterpolateFrom1_gpu<float4>},
{/*kernelInterpolateFrom1_gpu<double1>*/ 0, /*kernelInterpolateFrom1_gpu<double2>*/ 0, /*kernelInterpolateFrom1_gpu<double3>*/ 0, /*kernelInterpolateFrom1_gpu<double4>*/ 0}
};
CV_Assert(outRoi.width <= layer0_.cols && outRoi.height <= layer0_.rows && outRoi.width > 0 && outRoi.height > 0);
ensureSizeIsEnough(outRoi, layer0_.type(), outImg);
if (outRoi.width == layer0_.cols && outRoi.height == layer0_.rows)
{
if (stream)
stream.enqueueCopy(layer0_, outImg);
else
layer0_.copyTo(outImg);
}
float lastScale = 1.0f;
float curScale;
GpuMat lastLayer = layer0_;
GpuMat curLayer;
for (int i = 0; i < nLayers_ - 1; ++i)
{
curScale = lastScale * 0.5f;
curLayer = pyramid_[i];
if (outRoi.width == curLayer.cols && outRoi.height == curLayer.rows)
{
if (stream)
stream.enqueueCopy(curLayer, outImg);
else
curLayer.copyTo(outImg);
}
if (outRoi.width >= curLayer.cols && outRoi.height >= curLayer.rows)
break;
lastScale = curScale;
lastLayer = curLayer;
}
func_t func = funcs[outImg.depth()][outImg.channels() - 1];
CV_Assert(func != 0);
func(lastLayer, outImg, StreamAccessor::getStream(stream));
#else
throw_nogpu();
#endif
}
#endif /* !defined (HAVE_CUDA) */

@ -46,6 +46,7 @@
#include "NCVAlg.hpp"
#include "NCVPyramid.hpp"
#include "NCVPixelOperations.hpp"
#include "opencv2/gpu/device/common.hpp"
#ifdef _WIN32
@ -234,6 +235,39 @@ __global__ void kernelDownsampleX2(T *d_src,
}
}
namespace cv { namespace gpu { namespace device
{
namespace pyramid
{
template <typename T> void kernelDownsampleX2_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream)
{
dim3 bDim(16, 8);
dim3 gDim(divUp(src.cols, bDim.x), divUp(src.rows, bDim.y));
kernelDownsampleX2<<<gDim, bDim, 0, stream>>>((T*)src.data, src.step, (T*)dst.data, dst.step, NcvSize32u(dst.cols, dst.rows));
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template void kernelDownsampleX2_gpu<uchar1>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void kernelDownsampleX2_gpu<uchar3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void kernelDownsampleX2_gpu<uchar4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void kernelDownsampleX2_gpu<ushort1>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void kernelDownsampleX2_gpu<ushort3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void kernelDownsampleX2_gpu<ushort4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void kernelDownsampleX2_gpu<float1>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void kernelDownsampleX2_gpu<float3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void kernelDownsampleX2_gpu<float4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
}
}}}
template<typename T>
__global__ void kernelInterpolateFrom1(T *d_srcTop,
@ -275,6 +309,37 @@ __global__ void kernelInterpolateFrom1(T *d_srcTop,
d_dst_line[j] = outPix;
}
}
namespace cv { namespace gpu { namespace device
{
namespace pyramid
{
template <typename T> void kernelInterpolateFrom1_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream)
{
dim3 bDim(16, 8);
dim3 gDim(divUp(dst.cols, bDim.x), divUp(dst.rows, bDim.y));
kernelInterpolateFrom1<<<gDim, bDim, 0, stream>>>((T*) src.data, src.step, NcvSize32u(src.cols, src.rows),
(T*) dst.data, dst.step, NcvSize32u(dst.cols, dst.rows));
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template void kernelInterpolateFrom1_gpu<uchar1>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void kernelInterpolateFrom1_gpu<uchar3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void kernelInterpolateFrom1_gpu<uchar4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void kernelInterpolateFrom1_gpu<ushort1>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void kernelInterpolateFrom1_gpu<ushort3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void kernelInterpolateFrom1_gpu<ushort4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void kernelInterpolateFrom1_gpu<float1>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void kernelInterpolateFrom1_gpu<float3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void kernelInterpolateFrom1_gpu<float4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
}
}}}
template <class T>

@ -46,7 +46,7 @@
#include <memory>
#include <vector>
#include "NCV.hpp"
#ifdef _WIN32
template <class T>
@ -92,8 +92,8 @@ private:
const NCVMatrix<T> *layer0;
NCVMatrixStack<T> pyramid;
Ncv32u nLayers;
};
};
#endif //_WIN32
#endif //_ncvpyramid_hpp_

Loading…
Cancel
Save