From f17b836d246c0754149d2e9b55ffca655738db11 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 23 Apr 2013 17:11:45 +0400 Subject: [PATCH] added Event class (wrapper for cudaEvent) --- modules/core/include/opencv2/core/base.hpp | 1 + modules/core/include/opencv2/core/gpu.hpp | 37 ++++++ .../opencv2/core/gpu_stream_accessor.hpp | 6 + modules/core/src/gpu_stream.cpp | 120 ++++++++++++++++++ 4 files changed, 164 insertions(+) diff --git a/modules/core/include/opencv2/core/base.hpp b/modules/core/include/opencv2/core/base.hpp index 1bcaf4ef4e..637ecdf513 100644 --- a/modules/core/include/opencv2/core/base.hpp +++ b/modules/core/include/opencv2/core/base.hpp @@ -495,6 +495,7 @@ namespace gpu class CV_EXPORTS GpuMat; class CV_EXPORTS CudaMem; class CV_EXPORTS Stream; + class CV_EXPORTS Event; } } // cv diff --git a/modules/core/include/opencv2/core/gpu.hpp b/modules/core/include/opencv2/core/gpu.hpp index 88e3530826..bbeda31ef0 100644 --- a/modules/core/include/opencv2/core/gpu.hpp +++ b/modules/core/include/opencv2/core/gpu.hpp @@ -359,6 +359,9 @@ public: //! waits for stream tasks to complete void waitForCompletion(); + //! makes a compute stream wait on an event + void waitEvent(const Event& event); + //! adds a callback to be called on the host after all currently enqueued items in the stream have completed void enqueueHostCallback(StreamCallback callback, void* userData); @@ -390,6 +393,39 @@ private: friend struct StreamAccessor; }; +class CV_EXPORTS Event +{ +public: + enum CreateFlags + { + DEFAULT = 0x00, /**< Default event flag */ + BLOCKING_SYNC = 0x01, /**< Event uses blocking synchronization */ + DISABLE_TIMING = 0x02, /**< Event will not record timing data */ + INTERPROCESS = 0x04 /**< Event is suitable for interprocess use. DisableTiming must be set */ + }; + + explicit Event(CreateFlags flags = DEFAULT); + + //! records an event + void record(Stream& stream = Stream::Null()); + + //! queries an event's status + bool queryIfComplete() const; + + //! waits for an event to complete + void waitForCompletion(); + + //! computes the elapsed time between events + static float elapsedTime(const Event& start, const Event& end); + + class Impl; + +private: + Ptr impl_; + + friend struct EventAccessor; +}; + //////////////////////////////// Initialization & Info //////////////////////// //! this is the only function that do not throw exceptions if the library is compiled without CUDA @@ -642,6 +678,7 @@ CV_EXPORTS void printShortCudaDeviceInfo(int device); namespace cv { template <> CV_EXPORTS void Ptr::delete_obj(); +template <> CV_EXPORTS void Ptr::delete_obj(); } diff --git a/modules/core/include/opencv2/core/gpu_stream_accessor.hpp b/modules/core/include/opencv2/core/gpu_stream_accessor.hpp index 364ab279ab..cf7d3c4316 100644 --- a/modules/core/include/opencv2/core/gpu_stream_accessor.hpp +++ b/modules/core/include/opencv2/core/gpu_stream_accessor.hpp @@ -60,11 +60,17 @@ namespace cv namespace gpu { class Stream; + class Event; struct StreamAccessor { CV_EXPORTS static cudaStream_t getStream(const Stream& stream); }; + + struct EventAccessor + { + CV_EXPORTS static cudaEvent_t getEvent(const Event& event); + }; } } diff --git a/modules/core/src/gpu_stream.cpp b/modules/core/src/gpu_stream.cpp index cf90501593..879775355c 100644 --- a/modules/core/src/gpu_stream.cpp +++ b/modules/core/src/gpu_stream.cpp @@ -45,6 +45,9 @@ using namespace cv; using namespace cv::gpu; +//////////////////////////////////////////////////////////////// +// Stream + #ifndef HAVE_CUDA class cv::gpu::Stream::Impl @@ -126,6 +129,16 @@ void cv::gpu::Stream::waitForCompletion() #endif } +void cv::gpu::Stream::waitEvent(const Event& event) +{ +#ifndef HAVE_CUDA + (void) event; + throw_no_cuda(); +#else + cudaSafeCall( cudaStreamWaitEvent(impl_->stream, EventAccessor::getEvent(event), 0) ); +#endif +} + #if defined(HAVE_CUDA) && (CUDART_VERSION >= 5000) namespace @@ -186,3 +199,110 @@ template <> void cv::Ptr::delete_obj() { if (obj) delete obj; } + +//////////////////////////////////////////////////////////////// +// Stream + +#ifndef HAVE_CUDA + +class cv::gpu::Event::Impl +{ +public: + Impl(unsigned int) + { + throw_no_cuda(); + } +}; + +#else + +class cv::gpu::Event::Impl +{ +public: + cudaEvent_t event; + + Impl(unsigned int flags); + ~Impl(); +}; + +cv::gpu::Event::Impl::Impl(unsigned int flags) : event(0) +{ + cudaSafeCall( cudaEventCreateWithFlags(&event, flags) ); +} + +cv::gpu::Event::Impl::~Impl() +{ + if (event) + cudaEventDestroy(event); +} + +cudaEvent_t cv::gpu::EventAccessor::getEvent(const Event& event) +{ + return event.impl_->event; +} + +#endif + +cv::gpu::Event::Event(CreateFlags flags) +{ +#ifndef HAVE_CUDA + (void) flags; + throw_no_cuda(); +#else + impl_ = new Impl(flags); +#endif +} + +void cv::gpu::Event::record(Stream& stream) +{ +#ifndef HAVE_CUDA + (void) stream; + throw_no_cuda(); +#else + cudaSafeCall( cudaEventRecord(impl_->event, StreamAccessor::getStream(stream)) ); +#endif +} + +bool cv::gpu::Event::queryIfComplete() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return false; +#else + cudaError_t err = cudaEventQuery(impl_->event); + + if (err == cudaErrorNotReady || err == cudaSuccess) + return err == cudaSuccess; + + cudaSafeCall(err); + return false; +#endif +} + +void cv::gpu::Event::waitForCompletion() +{ +#ifndef HAVE_CUDA + throw_no_cuda(); +#else + cudaSafeCall( cudaEventSynchronize(impl_->event) ); +#endif +} + +float cv::gpu::Event::elapsedTime(const Event& start, const Event& end) +{ +#ifndef HAVE_CUDA + (void) start; + (void) end; + throw_no_cuda(); + return 0.0f; +#else + float ms; + cudaSafeCall( cudaEventElapsedTime(&ms, start.impl_->event, end.impl_->event) ); + return ms; +#endif +} + +template <> void cv::Ptr::delete_obj() +{ + if (obj) delete obj; +}