From f17b836d246c0754149d2e9b55ffca655738db11 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Tue, 23 Apr 2013 17:11:45 +0400
Subject: [PATCH] added Event class (wrapper for cudaEvent)

---
 modules/core/include/opencv2/core/base.hpp    |   1 +
 modules/core/include/opencv2/core/gpu.hpp     |  37 ++++++
 .../opencv2/core/gpu_stream_accessor.hpp      |   6 +
 modules/core/src/gpu_stream.cpp               | 120 ++++++++++++++++++
 4 files changed, 164 insertions(+)
diff --git a/modules/core/include/opencv2/core/base.hpp b/modules/core/include/opencv2/core/base.hpp
index 1bcaf4ef4e..637ecdf513 100644
--- a/modules/core/include/opencv2/core/base.hpp
+++ b/modules/core/include/opencv2/core/base.hpp
@@ -495,6 +495,7 @@ namespace gpu
     class CV_EXPORTS GpuMat;
     class CV_EXPORTS CudaMem;
     class CV_EXPORTS Stream;
+    class CV_EXPORTS Event;
 }
 
 } // cv
diff --git a/modules/core/include/opencv2/core/gpu.hpp b/modules/core/include/opencv2/core/gpu.hpp
index 88e3530826..bbeda31ef0 100644
--- a/modules/core/include/opencv2/core/gpu.hpp
+++ b/modules/core/include/opencv2/core/gpu.hpp
@@ -359,6 +359,9 @@ public:
     //! waits for stream tasks to complete
     void waitForCompletion();
 
+    //! makes a compute stream wait on an event
+    void waitEvent(const Event& event);
+
     //! adds a callback to be called on the host after all currently enqueued items in the stream have completed
     void enqueueHostCallback(StreamCallback callback, void* userData);
 
@@ -390,6 +393,39 @@ private:
     friend struct StreamAccessor;
 };
 
+class CV_EXPORTS Event
+{
+public:
+    enum CreateFlags
+    {
+        DEFAULT        = 0x00,  /**< Default event flag */
+        BLOCKING_SYNC  = 0x01,  /**< Event uses blocking synchronization */
+        DISABLE_TIMING = 0x02,  /**< Event will not record timing data */
+        INTERPROCESS   = 0x04   /**< Event is suitable for interprocess use. DisableTiming must be set */
+    };
+
+    explicit Event(CreateFlags flags = DEFAULT);
+
+    //! records an event
+    void record(Stream& stream = Stream::Null());
+
+    //! queries an event's status
+    bool queryIfComplete() const;
+
+    //! waits for an event to complete
+    void waitForCompletion();
+
+    //! computes the elapsed time between events
+    static float elapsedTime(const Event& start, const Event& end);
+
+    class Impl;
+
+private:
+    Ptr<Impl> impl_;
+
+    friend struct EventAccessor;
+};
+
 //////////////////////////////// Initialization & Info ////////////////////////
 
 //! this is the only function that do not throw exceptions if the library is compiled without CUDA
@@ -642,6 +678,7 @@ CV_EXPORTS void printShortCudaDeviceInfo(int device);
 namespace cv {
 
 template <> CV_EXPORTS void Ptr<cv::gpu::Stream::Impl>::delete_obj();
+template <> CV_EXPORTS void Ptr<cv::gpu::Event::Impl>::delete_obj();
 
 }
 
diff --git a/modules/core/include/opencv2/core/gpu_stream_accessor.hpp b/modules/core/include/opencv2/core/gpu_stream_accessor.hpp
index 364ab279ab..cf7d3c4316 100644
--- a/modules/core/include/opencv2/core/gpu_stream_accessor.hpp
+++ b/modules/core/include/opencv2/core/gpu_stream_accessor.hpp
@@ -60,11 +60,17 @@ namespace cv
     namespace gpu
     {
         class Stream;
+        class Event;
 
         struct StreamAccessor
         {
             CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
         };
+
+        struct EventAccessor
+        {
+            CV_EXPORTS static cudaEvent_t getEvent(const Event& event);
+        };
     }
 }
 
diff --git a/modules/core/src/gpu_stream.cpp b/modules/core/src/gpu_stream.cpp
index cf90501593..879775355c 100644
--- a/modules/core/src/gpu_stream.cpp
+++ b/modules/core/src/gpu_stream.cpp
@@ -45,6 +45,9 @@
 using namespace cv;
 using namespace cv::gpu;
 
+////////////////////////////////////////////////////////////////
+// Stream
+
 #ifndef HAVE_CUDA
 
 class cv::gpu::Stream::Impl
@@ -126,6 +129,16 @@ void cv::gpu::Stream::waitForCompletion()
 #endif
 }
 
+void cv::gpu::Stream::waitEvent(const Event& event)
+{
+#ifndef HAVE_CUDA
+    (void) event;
+    throw_no_cuda();
+#else
+    cudaSafeCall( cudaStreamWaitEvent(impl_->stream, EventAccessor::getEvent(event), 0) );
+#endif
+}
+
 #if defined(HAVE_CUDA) && (CUDART_VERSION >= 5000)
 
 namespace
@@ -186,3 +199,110 @@ template <> void cv::Ptr<Stream::Impl>::delete_obj()
 {
     if (obj) delete obj;
 }
+
+////////////////////////////////////////////////////////////////
+// Stream
+
+#ifndef HAVE_CUDA
+
+class cv::gpu::Event::Impl
+{
+public:
+    Impl(unsigned int)
+    {
+        throw_no_cuda();
+    }
+};
+
+#else
+
+class cv::gpu::Event::Impl
+{
+public:
+    cudaEvent_t event;
+
+    Impl(unsigned int flags);
+    ~Impl();
+};
+
+cv::gpu::Event::Impl::Impl(unsigned int flags) : event(0)
+{
+    cudaSafeCall( cudaEventCreateWithFlags(&event, flags) );
+}
+
+cv::gpu::Event::Impl::~Impl()
+{
+    if (event)
+        cudaEventDestroy(event);
+}
+
+cudaEvent_t cv::gpu::EventAccessor::getEvent(const Event& event)
+{
+    return event.impl_->event;
+}
+
+#endif
+
+cv::gpu::Event::Event(CreateFlags flags)
+{
+#ifndef HAVE_CUDA
+    (void) flags;
+    throw_no_cuda();
+#else
+    impl_ = new Impl(flags);
+#endif
+}
+
+void cv::gpu::Event::record(Stream& stream)
+{
+#ifndef HAVE_CUDA
+    (void) stream;
+    throw_no_cuda();
+#else
+    cudaSafeCall( cudaEventRecord(impl_->event, StreamAccessor::getStream(stream)) );
+#endif
+}
+
+bool cv::gpu::Event::queryIfComplete() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return false;
+#else
+    cudaError_t err = cudaEventQuery(impl_->event);
+
+    if (err == cudaErrorNotReady || err == cudaSuccess)
+        return err == cudaSuccess;
+
+    cudaSafeCall(err);
+    return false;
+#endif
+}
+
+void cv::gpu::Event::waitForCompletion()
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+#else
+    cudaSafeCall( cudaEventSynchronize(impl_->event) );
+#endif
+}
+
+float cv::gpu::Event::elapsedTime(const Event& start, const Event& end)
+{
+#ifndef HAVE_CUDA
+    (void) start;
+    (void) end;
+    throw_no_cuda();
+    return 0.0f;
+#else
+    float ms;
+    cudaSafeCall( cudaEventElapsedTime(&ms, start.impl_->event, end.impl_->event) );
+    return ms;
+#endif
+}
+
+template <> void cv::Ptr<Event::Impl>::delete_obj()
+{
+    if (obj) delete obj;
+}