added Event class (wrapper for cudaEvent)

12 years ago · f17b836d24
parent 2dab93c2e8
commit f17b836d24
4 changed files with 164 additions and 0 deletions
--- a/modules/core/include/opencv2/core/base.hpp
+++ b/modules/core/include/opencv2/core/base.hpp
@ -495,6 +495,7 @@ namespace gpu
    class CV_EXPORTS GpuMat;
    class CV_EXPORTS CudaMem;
    class CV_EXPORTS Stream;
+    class CV_EXPORTS Event;
 }

 } // cv
--- a/modules/core/include/opencv2/core/gpu.hpp
+++ b/modules/core/include/opencv2/core/gpu.hpp
@ -359,6 +359,9 @@ public:
    //! waits for stream tasks to complete
    void waitForCompletion();

+    //! makes a compute stream wait on an event
+    void waitEvent(const Event& event);
+
    //! adds a callback to be called on the host after all currently enqueued items in the stream have completed
    void enqueueHostCallback(StreamCallback callback, void* userData);

@ -390,6 +393,39 @@ private:
    friend struct StreamAccessor;
 };

+class CV_EXPORTS Event
+{
+public:
+    enum CreateFlags
+    {
+        DEFAULT        = 0x00,  /**< Default event flag */
+        BLOCKING_SYNC  = 0x01,  /**< Event uses blocking synchronization */
+        DISABLE_TIMING = 0x02,  /**< Event will not record timing data */
+        INTERPROCESS   = 0x04   /**< Event is suitable for interprocess use. DisableTiming must be set */
+    };
+
+    explicit Event(CreateFlags flags = DEFAULT);
+
+    //! records an event
+    void record(Stream& stream = Stream::Null());
+
+    //! queries an event's status
+    bool queryIfComplete() const;
+
+    //! waits for an event to complete
+    void waitForCompletion();
+
+    //! computes the elapsed time between events
+    static float elapsedTime(const Event& start, const Event& end);
+
+    class Impl;
+
+private:
+    Ptr<Impl> impl_;
+
+    friend struct EventAccessor;
+};
+
 //////////////////////////////// Initialization & Info ////////////////////////

 //! this is the only function that do not throw exceptions if the library is compiled without CUDA
@ -642,6 +678,7 @@ CV_EXPORTS void printShortCudaDeviceInfo(int device);
 namespace cv {

 template <> CV_EXPORTS void Ptr<cv::gpu::Stream::Impl>::delete_obj();
+template <> CV_EXPORTS void Ptr<cv::gpu::Event::Impl>::delete_obj();

 }

--- a/modules/core/include/opencv2/core/gpu_stream_accessor.hpp
+++ b/modules/core/include/opencv2/core/gpu_stream_accessor.hpp
@ -60,11 +60,17 @@ namespace cv
    namespace gpu
    {
        class Stream;
+        class Event;

        struct StreamAccessor
        {
            CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
        };
+
+        struct EventAccessor
+        {
+            CV_EXPORTS static cudaEvent_t getEvent(const Event& event);
+        };
    }
 }

--- a/modules/core/src/gpu_stream.cpp
+++ b/modules/core/src/gpu_stream.cpp
@ -45,6 +45,9 @@
 using namespace cv;
 using namespace cv::gpu;

+////////////////////////////////////////////////////////////////
+// Stream
+
 #ifndef HAVE_CUDA

 class cv::gpu::Stream::Impl
@ -126,6 +129,16 @@ void cv::gpu::Stream::waitForCompletion()
 #endif
 }

+void cv::gpu::Stream::waitEvent(const Event& event)
+{
+#ifndef HAVE_CUDA
+    (void) event;
+    throw_no_cuda();
+#else
+    cudaSafeCall( cudaStreamWaitEvent(impl_->stream, EventAccessor::getEvent(event), 0) );
+#endif
+}
+
 #if defined(HAVE_CUDA) && (CUDART_VERSION >= 5000)

 namespace
@ -186,3 +199,110 @@ template <> void cv::Ptr<Stream::Impl>::delete_obj()
 {
    if (obj) delete obj;
 }
+
+////////////////////////////////////////////////////////////////
+// Stream
+
+#ifndef HAVE_CUDA
+
+class cv::gpu::Event::Impl
+{
+public:
+    Impl(unsigned int)
+    {
+        throw_no_cuda();
+    }
+};
+
+#else
+
+class cv::gpu::Event::Impl
+{
+public:
+    cudaEvent_t event;
+
+    Impl(unsigned int flags);
+    ~Impl();
+};
+
+cv::gpu::Event::Impl::Impl(unsigned int flags) : event(0)
+{
+    cudaSafeCall( cudaEventCreateWithFlags(&event, flags) );
+}
+
+cv::gpu::Event::Impl::~Impl()
+{
+    if (event)
+        cudaEventDestroy(event);
+}
+
+cudaEvent_t cv::gpu::EventAccessor::getEvent(const Event& event)
+{
+    return event.impl_->event;
+}
+
+#endif
+
+cv::gpu::Event::Event(CreateFlags flags)
+{
+#ifndef HAVE_CUDA
+    (void) flags;
+    throw_no_cuda();
+#else
+    impl_ = new Impl(flags);
+#endif
+}
+
+void cv::gpu::Event::record(Stream& stream)
+{
+#ifndef HAVE_CUDA
+    (void) stream;
+    throw_no_cuda();
+#else
+    cudaSafeCall( cudaEventRecord(impl_->event, StreamAccessor::getStream(stream)) );
+#endif
+}
+
+bool cv::gpu::Event::queryIfComplete() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return false;
+#else
+    cudaError_t err = cudaEventQuery(impl_->event);
+
+    if (err == cudaErrorNotReady || err == cudaSuccess)
+        return err == cudaSuccess;
+
+    cudaSafeCall(err);
+    return false;
+#endif
+}
+
+void cv::gpu::Event::waitForCompletion()
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+#else
+    cudaSafeCall( cudaEventSynchronize(impl_->event) );
+#endif
+}
+
+float cv::gpu::Event::elapsedTime(const Event& start, const Event& end)
+{
+#ifndef HAVE_CUDA
+    (void) start;
+    (void) end;
+    throw_no_cuda();
+    return 0.0f;
+#else
+    float ms;
+    cudaSafeCall( cudaEventElapsedTime(&ms, start.impl_->event, end.impl_->event) );
+    return ms;
+#endif
+}
+
+template <> void cv::Ptr<Event::Impl>::delete_obj()
+{
+    if (obj) delete obj;
+}