removed buffered versions of histogram functions

used BufferPool mechanism instead
pull/3566/head
Vladislav Vinogradov 11 years ago
parent 61991a3330
commit 220d937d9a
  1. 6
      modules/core/include/opencv2/core/private.cuda.hpp
  2. 50
      modules/cudaimgproc/include/opencv2/cudaimgproc.hpp
  3. 9
      modules/cudaimgproc/perf/perf_histogram.cpp
  4. 60
      modules/cudaimgproc/src/histogram.cpp
  5. 5
      samples/gpu/performance/tests.cpp

@ -130,6 +130,12 @@ namespace cv { namespace cuda
class NppStreamHandler class NppStreamHandler
{ {
public: public:
inline explicit NppStreamHandler(Stream& newStream)
{
oldStream = nppGetStream();
nppSetStream(StreamAccessor::getStream(newStream));
}
inline explicit NppStreamHandler(cudaStream_t newStream) inline explicit NppStreamHandler(cudaStream_t newStream)
{ {
oldStream = nppGetStream(); oldStream = nppGetStream();

@ -205,19 +205,11 @@ CV_EXPORTS void calcHist(InputArray src, OutputArray hist, Stream& stream = Stre
@param src Source image with CV_8UC1 type. @param src Source image with CV_8UC1 type.
@param dst Destination image. @param dst Destination image.
@param buf Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
@param stream Stream for the asynchronous version. @param stream Stream for the asynchronous version.
@sa equalizeHist @sa equalizeHist
*/ */
CV_EXPORTS void equalizeHist(InputArray src, OutputArray dst, InputOutputArray buf, Stream& stream = Stream::Null()); CV_EXPORTS void equalizeHist(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
/** @overload */
static inline void equalizeHist(InputArray src, OutputArray dst, Stream& stream = Stream::Null())
{
GpuMat buf;
cuda::equalizeHist(src, dst, buf, stream);
}
/** @brief Base class for Contrast Limited Adaptive Histogram Equalization. : /** @brief Base class for Contrast Limited Adaptive Histogram Equalization. :
*/ */
@ -259,27 +251,11 @@ a four-channel image, all channels are processed separately.
@param histSize Size of the histogram. @param histSize Size of the histogram.
@param lowerLevel Lower boundary of lowest-level bin. @param lowerLevel Lower boundary of lowest-level bin.
@param upperLevel Upper boundary of highest-level bin. @param upperLevel Upper boundary of highest-level bin.
@param buf Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
@param stream Stream for the asynchronous version. @param stream Stream for the asynchronous version.
*/ */
CV_EXPORTS void histEven(InputArray src, OutputArray hist, InputOutputArray buf, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null()); CV_EXPORTS void histEven(InputArray src, OutputArray hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null());
/** @overload */ /** @overload */
static inline void histEven(InputArray src, OutputArray hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null()) CV_EXPORTS void histEven(InputArray src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null());
{
GpuMat buf;
cuda::histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);
}
/** @overload */
CV_EXPORTS void histEven(InputArray src, GpuMat hist[4], InputOutputArray buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null());
/** @overload */
static inline void histEven(InputArray src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null())
{
GpuMat buf;
cuda::histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);
}
/** @brief Calculates a histogram with bins determined by the levels array. /** @brief Calculates a histogram with bins determined by the levels array.
@ -287,27 +263,11 @@ static inline void histEven(InputArray src, GpuMat hist[4], int histSize[4], int
For a four-channel image, all channels are processed separately. For a four-channel image, all channels are processed separately.
@param hist Destination histogram with one row, (levels.cols-1) columns, and the CV_32SC1 type. @param hist Destination histogram with one row, (levels.cols-1) columns, and the CV_32SC1 type.
@param levels Number of levels in the histogram. @param levels Number of levels in the histogram.
@param buf Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
@param stream Stream for the asynchronous version. @param stream Stream for the asynchronous version.
*/ */
CV_EXPORTS void histRange(InputArray src, OutputArray hist, InputArray levels, InputOutputArray buf, Stream& stream = Stream::Null()); CV_EXPORTS void histRange(InputArray src, OutputArray hist, InputArray levels, Stream& stream = Stream::Null());
/** @overload */
static inline void histRange(InputArray src, OutputArray hist, InputArray levels, Stream& stream = Stream::Null())
{
GpuMat buf;
cuda::histRange(src, hist, levels, buf, stream);
}
/** @overload */
CV_EXPORTS void histRange(InputArray src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, Stream& stream = Stream::Null());
/** @overload */ /** @overload */
static inline void histRange(InputArray src, GpuMat hist[4], const GpuMat levels[4], Stream& stream = Stream::Null()) CV_EXPORTS void histRange(InputArray src, GpuMat hist[4], const GpuMat levels[4], Stream& stream = Stream::Null());
{
GpuMat buf;
cuda::histRange(src, hist, levels, buf, stream);
}
//! @} cudaimgproc_hist //! @} cudaimgproc_hist

@ -63,9 +63,8 @@ PERF_TEST_P(Sz_Depth, HistEvenC1,
{ {
const cv::cuda::GpuMat d_src(src); const cv::cuda::GpuMat d_src(src);
cv::cuda::GpuMat dst; cv::cuda::GpuMat dst;
cv::cuda::GpuMat d_buf;
TEST_CYCLE() cv::cuda::histEven(d_src, dst, d_buf, 30, 0, 180); TEST_CYCLE() cv::cuda::histEven(d_src, dst, 30, 0, 180);
CUDA_SANITY_CHECK(dst); CUDA_SANITY_CHECK(dst);
} }
@ -106,9 +105,8 @@ PERF_TEST_P(Sz_Depth, HistEvenC4,
{ {
const cv::cuda::GpuMat d_src(src); const cv::cuda::GpuMat d_src(src);
cv::cuda::GpuMat d_hist[4]; cv::cuda::GpuMat d_hist[4];
cv::cuda::GpuMat d_buf;
TEST_CYCLE() cv::cuda::histEven(d_src, d_hist, d_buf, histSize, lowerLevel, upperLevel); TEST_CYCLE() cv::cuda::histEven(d_src, d_hist, histSize, lowerLevel, upperLevel);
cv::Mat cpu_hist0, cpu_hist1, cpu_hist2, cpu_hist3; cv::Mat cpu_hist0, cpu_hist1, cpu_hist2, cpu_hist3;
d_hist[0].download(cpu_hist0); d_hist[0].download(cpu_hist0);
@ -167,9 +165,8 @@ PERF_TEST_P(Sz, EqualizeHist,
{ {
const cv::cuda::GpuMat d_src(src); const cv::cuda::GpuMat d_src(src);
cv::cuda::GpuMat dst; cv::cuda::GpuMat dst;
cv::cuda::GpuMat d_buf;
TEST_CYCLE() cv::cuda::equalizeHist(d_src, dst, d_buf); TEST_CYCLE() cv::cuda::equalizeHist(d_src, dst);
CUDA_SANITY_CHECK(dst); CUDA_SANITY_CHECK(dst);
} }

@ -49,7 +49,7 @@ using namespace cv::cuda;
void cv::cuda::calcHist(InputArray, OutputArray, Stream&) { throw_no_cuda(); } void cv::cuda::calcHist(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::equalizeHist(InputArray, OutputArray, InputOutputArray, Stream&) { throw_no_cuda(); } void cv::cuda::equalizeHist(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
cv::Ptr<cv::cuda::CLAHE> cv::cuda::createCLAHE(double, cv::Size) { throw_no_cuda(); return cv::Ptr<cv::cuda::CLAHE>(); } cv::Ptr<cv::cuda::CLAHE> cv::cuda::createCLAHE(double, cv::Size) { throw_no_cuda(); return cv::Ptr<cv::cuda::CLAHE>(); }
@ -93,7 +93,7 @@ namespace hist
void equalizeHist(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream); void equalizeHist(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream);
} }
void cv::cuda::equalizeHist(InputArray _src, OutputArray _dst, InputOutputArray _buf, Stream& _stream) void cv::cuda::equalizeHist(InputArray _src, OutputArray _dst, Stream& _stream)
{ {
GpuMat src = _src.getGpuMat(); GpuMat src = _src.getGpuMat();
@ -107,8 +107,8 @@ void cv::cuda::equalizeHist(InputArray _src, OutputArray _dst, InputOutputArray
size_t bufSize = intBufSize + 2 * 256 * sizeof(int); size_t bufSize = intBufSize + 2 * 256 * sizeof(int);
ensureSizeIsEnough(1, static_cast<int>(bufSize), CV_8UC1, _buf); BufferPool pool(_stream);
GpuMat buf = _buf.getGpuMat(); GpuMat buf = pool.getBuffer(1, static_cast<int>(bufSize), CV_8UC1);
GpuMat hist(1, 256, CV_32SC1, buf.data); GpuMat hist(1, 256, CV_32SC1, buf.data);
GpuMat lut(1, 256, CV_32SC1, buf.data + 256 * sizeof(int)); GpuMat lut(1, 256, CV_32SC1, buf.data + 256 * sizeof(int));
@ -288,7 +288,7 @@ namespace
{ {
typedef typename NppHistogramEvenFuncC1<SDEPTH>::src_t src_t; typedef typename NppHistogramEvenFuncC1<SDEPTH>::src_t src_t;
static void hist(const GpuMat& src, OutputArray _hist, InputOutputArray _buf, int histSize, int lowerLevel, int upperLevel, cudaStream_t stream) static void hist(const GpuMat& src, OutputArray _hist, int histSize, int lowerLevel, int upperLevel, Stream& stream)
{ {
const int levels = histSize + 1; const int levels = histSize + 1;
@ -302,15 +302,15 @@ namespace
int buf_size; int buf_size;
get_buf_size(sz, levels, &buf_size); get_buf_size(sz, levels, &buf_size);
ensureSizeIsEnough(1, buf_size, CV_8UC1, _buf); BufferPool pool(stream);
GpuMat buf = _buf.getGpuMat(); GpuMat buf = pool.getBuffer(1, buf_size, CV_8UC1);
NppStreamHandler h(stream); NppStreamHandler h(stream);
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, hist.ptr<Npp32s>(), levels, nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, hist.ptr<Npp32s>(), levels,
lowerLevel, upperLevel, buf.ptr<Npp8u>()) ); lowerLevel, upperLevel, buf.ptr<Npp8u>()) );
if (stream == 0) if (!stream)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
@ -319,7 +319,7 @@ namespace
{ {
typedef typename NppHistogramEvenFuncC4<SDEPTH>::src_t src_t; typedef typename NppHistogramEvenFuncC4<SDEPTH>::src_t src_t;
static void hist(const GpuMat& src, GpuMat hist[4],InputOutputArray _buf, int histSize[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream) static void hist(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)
{ {
int levels[] = {histSize[0] + 1, histSize[1] + 1, histSize[2] + 1, histSize[3] + 1}; int levels[] = {histSize[0] + 1, histSize[1] + 1, histSize[2] + 1, histSize[3] + 1};
hist[0].create(1, histSize[0], CV_32S); hist[0].create(1, histSize[0], CV_32S);
@ -336,14 +336,14 @@ namespace
int buf_size; int buf_size;
get_buf_size(sz, levels, &buf_size); get_buf_size(sz, levels, &buf_size);
ensureSizeIsEnough(1, buf_size, CV_8U, _buf); BufferPool pool(stream);
GpuMat buf = _buf.getGpuMat(); GpuMat buf = pool.getBuffer(1, buf_size, CV_8UC1);
NppStreamHandler h(stream); NppStreamHandler h(stream);
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, levels, lowerLevel, upperLevel, buf.ptr<Npp8u>()) ); nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, levels, lowerLevel, upperLevel, buf.ptr<Npp8u>()) );
if (stream == 0) if (!stream)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
@ -392,7 +392,7 @@ namespace
typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t; typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;
enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE}; enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};
static void hist(const GpuMat& src, OutputArray _hist, const GpuMat& levels, InputOutputArray _buf, cudaStream_t stream) static void hist(const GpuMat& src, OutputArray _hist, const GpuMat& levels, Stream& stream)
{ {
CV_Assert( levels.type() == LEVEL_TYPE_CODE && levels.rows == 1 ); CV_Assert( levels.type() == LEVEL_TYPE_CODE && levels.rows == 1 );
@ -406,8 +406,8 @@ namespace
int buf_size; int buf_size;
get_buf_size(sz, levels.cols, &buf_size); get_buf_size(sz, levels.cols, &buf_size);
ensureSizeIsEnough(1, buf_size, CV_8U, _buf); BufferPool pool(stream);
GpuMat buf = _buf.getGpuMat(); GpuMat buf = pool.getBuffer(1, buf_size, CV_8UC1);
NppStreamHandler h(stream); NppStreamHandler h(stream);
@ -424,7 +424,7 @@ namespace
typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t; typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;
enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE}; enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};
static void hist(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4],InputOutputArray _buf, cudaStream_t stream) static void hist(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream)
{ {
CV_Assert( levels[0].type() == LEVEL_TYPE_CODE && levels[0].rows == 1 ); CV_Assert( levels[0].type() == LEVEL_TYPE_CODE && levels[0].rows == 1 );
CV_Assert( levels[1].type() == LEVEL_TYPE_CODE && levels[1].rows == 1 ); CV_Assert( levels[1].type() == LEVEL_TYPE_CODE && levels[1].rows == 1 );
@ -447,8 +447,8 @@ namespace
int buf_size; int buf_size;
get_buf_size(sz, nLevels, &buf_size); get_buf_size(sz, nLevels, &buf_size);
ensureSizeIsEnough(1, buf_size, CV_8U, _buf); BufferPool pool(stream);
GpuMat buf = _buf.getGpuMat(); GpuMat buf = pool.getBuffer(1, buf_size, CV_8UC1);
NppStreamHandler h(stream); NppStreamHandler h(stream);
@ -493,9 +493,9 @@ namespace
} }
} }
void cv::cuda::histEven(InputArray _src, OutputArray hist, InputOutputArray buf, int histSize, int lowerLevel, int upperLevel, Stream& stream) void cv::cuda::histEven(InputArray _src, OutputArray hist, int histSize, int lowerLevel, int upperLevel, Stream& stream)
{ {
typedef void (*hist_t)(const GpuMat& src, OutputArray hist, InputOutputArray buf, int levels, int lowerLevel, int upperLevel, cudaStream_t stream); typedef void (*hist_t)(const GpuMat& src, OutputArray hist, int levels, int lowerLevel, int upperLevel, Stream& stream);
static const hist_t hist_callers[] = static const hist_t hist_callers[] =
{ {
NppHistogramEvenC1<CV_8U , nppiHistogramEven_8u_C1R , nppiHistogramEvenGetBufferSize_8u_C1R >::hist, NppHistogramEvenC1<CV_8U , nppiHistogramEven_8u_C1R , nppiHistogramEvenGetBufferSize_8u_C1R >::hist,
@ -514,12 +514,12 @@ void cv::cuda::histEven(InputArray _src, OutputArray hist, InputOutputArray buf,
CV_Assert( src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 ); CV_Assert( src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 );
hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream)); hist_callers[src.depth()](src, hist, histSize, lowerLevel, upperLevel, stream);
} }
void cv::cuda::histEven(InputArray _src, GpuMat hist[4], InputOutputArray buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream) void cv::cuda::histEven(InputArray _src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)
{ {
typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], InputOutputArray buf, int levels[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream); typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], int levels[4], int lowerLevel[4], int upperLevel[4], Stream& stream);
static const hist_t hist_callers[] = static const hist_t hist_callers[] =
{ {
NppHistogramEvenC4<CV_8U , nppiHistogramEven_8u_C4R , nppiHistogramEvenGetBufferSize_8u_C4R >::hist, NppHistogramEvenC4<CV_8U , nppiHistogramEven_8u_C4R , nppiHistogramEvenGetBufferSize_8u_C4R >::hist,
@ -532,12 +532,12 @@ void cv::cuda::histEven(InputArray _src, GpuMat hist[4], InputOutputArray buf, i
CV_Assert( src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 ); CV_Assert( src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 );
hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream)); hist_callers[src.depth()](src, hist, histSize, lowerLevel, upperLevel, stream);
} }
void cv::cuda::histRange(InputArray _src, OutputArray hist, InputArray _levels, InputOutputArray buf, Stream& stream) void cv::cuda::histRange(InputArray _src, OutputArray hist, InputArray _levels, Stream& stream)
{ {
typedef void (*hist_t)(const GpuMat& src, OutputArray hist, const GpuMat& levels, InputOutputArray buf, cudaStream_t stream); typedef void (*hist_t)(const GpuMat& src, OutputArray hist, const GpuMat& levels, Stream& stream);
static const hist_t hist_callers[] = static const hist_t hist_callers[] =
{ {
NppHistogramRangeC1<CV_8U , nppiHistogramRange_8u_C1R , nppiHistogramRangeGetBufferSize_8u_C1R >::hist, NppHistogramRangeC1<CV_8U , nppiHistogramRange_8u_C1R , nppiHistogramRangeGetBufferSize_8u_C1R >::hist,
@ -553,12 +553,12 @@ void cv::cuda::histRange(InputArray _src, OutputArray hist, InputArray _levels,
CV_Assert( src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 || src.type() == CV_32FC1 ); CV_Assert( src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 || src.type() == CV_32FC1 );
hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream)); hist_callers[src.depth()](src, hist, levels, stream);
} }
void cv::cuda::histRange(InputArray _src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, Stream& stream) void cv::cuda::histRange(InputArray _src, GpuMat hist[4], const GpuMat levels[4], Stream& stream)
{ {
typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, cudaStream_t stream); typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream);
static const hist_t hist_callers[] = static const hist_t hist_callers[] =
{ {
NppHistogramRangeC4<CV_8U , nppiHistogramRange_8u_C4R , nppiHistogramRangeGetBufferSize_8u_C4R >::hist, NppHistogramRangeC4<CV_8U , nppiHistogramRange_8u_C4R , nppiHistogramRangeGetBufferSize_8u_C4R >::hist,
@ -573,7 +573,7 @@ void cv::cuda::histRange(InputArray _src, GpuMat hist[4], const GpuMat levels[4]
CV_Assert( src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 || src.type() == CV_32FC4 ); CV_Assert( src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 || src.type() == CV_32FC4 );
hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream)); hist_callers[src.depth()](src, hist, levels, stream);
} }
#endif /* !defined (HAVE_CUDA) */ #endif /* !defined (HAVE_CUDA) */

@ -1053,12 +1053,11 @@ TEST(equalizeHist)
cuda::GpuMat d_src(src); cuda::GpuMat d_src(src);
cuda::GpuMat d_dst; cuda::GpuMat d_dst;
cuda::GpuMat d_buf;
cuda::equalizeHist(d_src, d_dst, d_buf); cuda::equalizeHist(d_src, d_dst);
CUDA_ON; CUDA_ON;
cuda::equalizeHist(d_src, d_dst, d_buf); cuda::equalizeHist(d_src, d_dst);
CUDA_OFF; CUDA_OFF;
} }
} }

Loading…
Cancel
Save