From 3d93199fd30adc11e4b26d2e0bbeadc81468d8b2 Mon Sep 17 00:00:00 2001
From: kallaballa <amir@viel-zu.com>
Date: Fri, 29 Sep 2023 06:39:51 +0200
Subject: [PATCH] emscripten adaptions

---
 .../opencv2/v4d/detail/framebuffercontext.hpp |  33 +++-
 modules/v4d/include/opencv2/v4d/v4d.hpp       |  40 ++++-
 modules/v4d/samples/beauty-demo.cpp           |   5 +-
 modules/v4d/samples/optflow-demo.cpp          | 115 +++++++-------
 modules/v4d/src/detail/clvacontext.cpp        |  23 ++-
 modules/v4d/src/detail/framebuffercontext.cpp |   9 +-
 modules/v4d/src/util.cpp                      |  17 +-
 modules/v4d/src/v4d.cpp                       | 145 ++++++++++++------
 8 files changed, 251 insertions(+), 136 deletions(-)
diff --git a/modules/v4d/include/opencv2/v4d/detail/framebuffercontext.hpp b/modules/v4d/include/opencv2/v4d/detail/framebuffercontext.hpp
index 4bf88e44f..a27da5509 100644
--- a/modules/v4d/include/opencv2/v4d/detail/framebuffercontext.hpp
+++ b/modules/v4d/include/opencv2/v4d/detail/framebuffercontext.hpp
@@ -120,7 +120,9 @@ public:
     class CV_EXPORTS FrameBufferScope {
         FrameBufferContext& ctx_;
         cv::UMat& m_;
+#ifndef __EMSCRIPTEN__
         std::shared_ptr<ocl::OpenCLExecutionContext> pExecCtx;
+#endif
     public:
         /*!
          * Aquires the framebuffer via cl-gl sharing.
@@ -128,16 +130,37 @@ public:
          * @param m The UMat to bind the OpenGL framebuffer to.
          */
         CV_EXPORTS FrameBufferScope(FrameBufferContext& ctx, cv::UMat& m) :
-                ctx_(ctx), m_(m), pExecCtx(std::static_pointer_cast<ocl::OpenCLExecutionContext>(m.u->allocatorContext)) {
-            ocl::OpenCLExecutionContextScope execScope(*pExecCtx.get());
-            ctx_.acquireFromGL(m_);
+                ctx_(ctx), m_(m)
+#ifndef __EMSCRIPTEN__
+        , pExecCtx(std::static_pointer_cast<ocl::OpenCLExecutionContext>(m.u->allocatorContext))
+#endif
+        {
+            CV_Assert(!m.empty());
+#ifndef __EMSCRIPTEN__
+            if(pExecCtx) {
+                CLExecScope_t execScope(*pExecCtx.get());
+                ctx_.acquireFromGL(m_);
+            } else
+#endif
+            {
+                ctx_.acquireFromGL(m_);
+            }
         }
         /*!
          * Releases the framebuffer via cl-gl sharing.
          */
         CV_EXPORTS ~FrameBufferScope() {
-            ocl::OpenCLExecutionContextScope execScope(*pExecCtx.get());
-            ctx_.releaseToGL(m_);
+#ifndef __EMSCRIPTEN__
+
+            if (pExecCtx) {
+                CLExecScope_t execScope(*pExecCtx.get());
+                ctx_.releaseToGL(m_);
+            }
+            else
+#endif
+            {
+                ctx_.releaseToGL(m_);
+            }
         }
     };
 
diff --git a/modules/v4d/include/opencv2/v4d/v4d.hpp b/modules/v4d/include/opencv2/v4d/v4d.hpp
index c46153b7e..7f05067e5 100644
--- a/modules/v4d/include/opencv2/v4d/v4d.hpp
+++ b/modules/v4d/include/opencv2/v4d/v4d.hpp
@@ -25,6 +25,7 @@
 #include <set>
 #include <map>
 #include <string>
+#include <memory>
 
 #include <opencv2/core.hpp>
 #include <opencv2/imgproc.hpp>
@@ -80,6 +81,11 @@ using namespace cv::v4d::detail;
 class CV_EXPORTS V4D {
     friend class detail::FrameBufferContext;
     friend class HTML5Capture;
+    static const std::thread::id default_thread_id_;
+    static std::thread::id main_thread_id_;
+    static concurrent::threadpool thread_pool_;
+    std::map<std::string, cv::Ptr<cv::UMat>> umat_pool_;
+    std::map<std::string, std::shared_ptr<void>> data_pool_;
     cv::Ptr<V4D> self_;
     cv::Size initialSize_;
     bool debug_;
@@ -95,7 +101,6 @@ class CV_EXPORTS V4D {
     bool closed_ = false;
     cv::Ptr<Source> source_;
     cv::Ptr<Sink> sink_;
-    concurrent::threadpool pool_;
     cv::UMat currentReaderFrame_;
     cv::UMat nextReaderFrame_;
     cv::UMat currentWriterFrame_;
@@ -128,6 +133,35 @@ public:
      * Default destructor
      */
     CV_EXPORTS virtual ~V4D();
+
+    template<typename T>
+    T& get(const string& name) {
+        auto it = data_pool_.find(name);
+        std::shared_ptr<void> p = std::make_shared<T>();
+        if(it == data_pool_.end()) {
+            data_pool_.insert({name, p });
+        }else
+            p = (*it).second;
+
+        return *(std::static_pointer_cast<T, void>(p).get());
+    }
+
+    template<typename T>
+    T& init(const string& name, std::function<std::shared_ptr<T>()> creatorFunc) {
+        auto it = data_pool_.find(name);
+        std::shared_ptr<void> p;
+        if(it == data_pool_.end())
+            data_pool_.insert({name, p = std::static_pointer_cast<void, T>(creatorFunc())});
+        else
+            p = (*it).second;
+
+        return *static_cast<T*>(p.get());
+    }
+
+    CV_EXPORTS cv::Ptr<cv::UMat> get(const string& name);
+    CV_EXPORTS cv::Ptr<cv::UMat> get(const string& name, cv::Size sz, int type);
+
+    CV_EXPORTS bool isMain() const;
     /*!
      * The internal framebuffer exposed as OpenGL Texture2D.
      * @return The texture object.
@@ -177,7 +211,11 @@ public:
      * This function main purpose is to abstract the run loop for portability reasons.
      * @param fn A functor that will be called repeatetly until the application terminates or the functor returns false
      */
+#ifndef __EMSCRIPTEN__
     CV_EXPORTS void run(std::function<bool(cv::Ptr<V4D>)> fn, size_t workers = 0);
+#else
+    CV_EXPORTS void run(std::function<bool(cv::Ptr<V4D>)> fn, size_t workers = 0);
+#endif
     /*!
      * Called to feed an image directly to the framebuffer
      */
diff --git a/modules/v4d/samples/beauty-demo.cpp b/modules/v4d/samples/beauty-demo.cpp
index f5a6b8eb8..1b7a73268 100644
--- a/modules/v4d/samples/beauty-demo.cpp
+++ b/modules/v4d/samples/beauty-demo.cpp
@@ -239,8 +239,7 @@ static bool iteration(cv::Ptr<V4D> window) {
         shapes.clear();
         cv::Mat faces;
         //Detect faces in the down-scaled image
-        cv::Mat m = down.getMat(cv::ACCESS_RW);
-        detector->detect(m, faces);
+        detector->detect(down, faces);
         //Only add the first face
 		cv::Rect faceRect;
 		if(!faces.empty())
@@ -386,7 +385,7 @@ int main() {
     window->setSource(src);
 #endif
 
-    window->run(iteration);
+    window->run(iteration,3);
 
     return 0;
 }
diff --git a/modules/v4d/samples/optflow-demo.cpp b/modules/v4d/samples/optflow-demo.cpp
index 35d75f839..fd1dfb64a 100644
--- a/modules/v4d/samples/optflow-demo.cpp
+++ b/modules/v4d/samples/optflow-demo.cpp
@@ -38,7 +38,6 @@ constexpr const char* OUTPUT_FILENAME = "../optflow-demo.mkv";
 constexpr bool OFFSCREEN = false;
 
 #ifndef __EMSCRIPTEN__
-static std::thread::id main_thread_id;
 //the second window
 static cv::Ptr<cv::v4d::V4D> miniWindow;
 #endif
@@ -57,66 +56,67 @@ enum BackgroundModes {
 enum PostProcModes {
     GLOW,
     BLOOM,
-    NONE
+    DISABLED
 };
 
 // Generate the foreground at this scale.
-float fg_scale = 0.5f;
+static float fg_scale = 0.5f;
 // On every frame the foreground loses on brightness. Specifies the loss in percent.
 #ifndef __EMSCRIPTEN__
-float fg_loss = 2.5;
+static float fg_loss = 2.5;
 #else
-float fg_loss = 10.0;
+static float fg_loss = 10.0;
 #endif
 //Convert the background to greyscale
-BackgroundModes background_mode = GREY;
+static BackgroundModes background_mode = GREY;
 // Peak thresholds for the scene change detection. Lowering them makes the detection more sensitive but
 // the default should be fine.
-float scene_change_thresh = 0.29f;
-float scene_change_thresh_diff = 0.1f;
+static float scene_change_thresh = 0.29f;
+static float scene_change_thresh_diff = 0.1f;
 // The theoretical maximum number of points to track which is scaled by the density of detected points
 // and therefor is usually much smaller.
 #ifndef __EMSCRIPTEN__
-int max_points = 250000;
+static int max_points = 250000;
 #else
-int max_points = 100000;
+static int max_points = 100000;
 #endif
 // How many of the tracked points to lose intentionally, in percent.
 #ifndef __EMSCRIPTEN__
-float point_loss = 25;
+static float point_loss = 25;
 #else
-float point_loss = 10;
+static float point_loss = 10;
 #endif
 // The theoretical maximum size of the drawing stroke which is scaled by the area of the convex hull
 // of tracked points and therefor is usually much smaller.
-int max_stroke = 10;
+static int max_stroke = 10;
 
 // Red, green, blue and alpha. All from 0.0f to 1.0f
-float effect_color[4] = {1.0f, 0.75f, 0.4f, 1.0f};
+static float effect_color[4] = {1.0f, 0.75f, 0.4f, 1.0f};
 //display on-screen FPS
-bool show_fps = true;
+static bool show_fps = true;
 //Stretch frame buffer to window size
-bool stretch = false;
-//Use OpenCL or not
-bool use_acceleration = true;
+static bool stretch = false;
 //The post processing mode
 #ifndef __EMSCRIPTEN__
-PostProcModes post_proc_mode = GLOW;
+static PostProcModes post_proc_mode = GLOW;
 #else
-PostProcModes post_proc_mode = NONE;
+static PostProcModes post_proc_mode = DISABLED;
 #endif
 // Intensity of glow or bloom defined by kernel size. The default scales with the image diagonal.
-int glow_kernel_size = std::max(int(DIAG / 100 % 2 == 0 ? DIAG / 100 + 1 : DIAG / 100), 1);
+static int glow_kernel_size = std::max(int(DIAG / 100 % 2 == 0 ? DIAG / 100 + 1 : DIAG / 100), 1);
 //The lightness selection threshold
-int bloom_thresh = 210;
+static int bloom_thresh = 210;
 //The intensity of the bloom filter
-float bloom_gain = 3;
+static float bloom_gain = 3;
+
+using namespace cv::v4d;
+
 
 //Uses background subtraction to generate a "motion mask"
 static void prepare_motion_mask(const cv::UMat& srcGrey, cv::UMat& motionMaskGrey) {
-    static thread_local cv::Ptr<cv::BackgroundSubtractor> bg_subtrator = cv::createBackgroundSubtractorMOG2(100, 16.0, false);
-    static thread_local int morph_size = 1;
-    static thread_local cv::Mat element = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2 * morph_size + 1, 2 * morph_size + 1), cv::Point(morph_size, morph_size));
+    thread_local cv::Ptr<cv::BackgroundSubtractor> bg_subtrator = cv::createBackgroundSubtractorMOG2(100, 16.0, false);
+    thread_local int morph_size = 1;
+    thread_local cv::Mat element = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2 * morph_size + 1, 2 * morph_size + 1), cv::Point(morph_size, morph_size));
 
     bg_subtrator->apply(srcGrey, motionMaskGrey);
     //Surpress speckles
@@ -125,8 +125,8 @@ static void prepare_motion_mask(const cv::UMat& srcGrey, cv::UMat& motionMaskGre
 
 //Detect points to track
 static void detect_points(const cv::UMat& srcMotionMaskGrey, vector<cv::Point2f>& points) {
-    static thread_local cv::Ptr<cv::FastFeatureDetector> detector = cv::FastFeatureDetector::create(1, false);
-    static thread_local vector<cv::KeyPoint> tmpKeyPoints;
+    thread_local cv::Ptr<cv::FastFeatureDetector> detector = cv::FastFeatureDetector::create(1, false);
+    thread_local vector<cv::KeyPoint> tmpKeyPoints;
 
     tmpKeyPoints.clear();
     detector->detect(srcMotionMaskGrey, tmpKeyPoints);
@@ -139,7 +139,7 @@ static void detect_points(const cv::UMat& srcMotionMaskGrey, vector<cv::Point2f>
 
 //Detect extrem changes in scene content and report it
 static bool detect_scene_change(const cv::UMat& srcMotionMaskGrey, const float thresh, const float theshDiff) {
-    static thread_local float last_movement = 0;
+    thread_local float last_movement = 0;
 
     float movement = cv::countNonZero(srcMotionMaskGrey) / float(srcMotionMaskGrey.cols * srcMotionMaskGrey.rows);
     float relation = movement > 0 && last_movement > 0 ? std::max(movement, last_movement) / std::min(movement, last_movement) : 0;
@@ -154,12 +154,12 @@ static bool detect_scene_change(const cv::UMat& srcMotionMaskGrey, const float t
 
 //Visualize the sparse optical flow
 static void visualize_sparse_optical_flow(const cv::UMat &prevGrey, const cv::UMat &nextGrey, const vector<cv::Point2f> &detectedPoints, const float scaleFactor, const int maxStrokeSize, const cv::Scalar color, const int maxPoints, const float pointLossPercent) {
-    static thread_local vector<cv::Point2f> hull, prevPoints, nextPoints, newPoints;
-    static thread_local vector<cv::Point2f> upPrevPoints, upNextPoints;
-    static thread_local std::vector<uchar> status;
-    static thread_local std::vector<float> err;
-    static thread_local std::random_device rd;
-    static thread_local std::mt19937 g(rd());
+    thread_local vector<cv::Point2f> hull, prevPoints, nextPoints, newPoints;
+    thread_local vector<cv::Point2f> upPrevPoints, upNextPoints;
+    thread_local std::vector<uchar> status;
+    thread_local std::vector<float> err;
+    thread_local std::random_device rd;
+    thread_local std::mt19937 g(rd());
 
     //less then 5 points is a degenerate case (e.g. the corners of a video frame)
     if (detectedPoints.size() > 4) {
@@ -231,12 +231,12 @@ static void visualize_sparse_optical_flow(const cv::UMat &prevGrey, const cv::UM
 
 //Bloom post-processing effect
 static void bloom(const cv::UMat& src, cv::UMat &dst, int ksize = 3, int threshValue = 235, float gain = 4) {
-    static thread_local cv::UMat bgr;
-    static thread_local cv::UMat hls;
-    static thread_local cv::UMat ls16;
-    static thread_local cv::UMat ls;
-    static thread_local cv::UMat blur;
-    static thread_local std::vector<cv::UMat> hlsChannels;
+    thread_local cv::UMat bgr;
+    thread_local cv::UMat hls;
+    thread_local cv::UMat ls16;
+    thread_local cv::UMat ls;
+    thread_local cv::UMat blur;
+    thread_local std::vector<cv::UMat> hlsChannels;
 
     //remove alpha channel
     cv::cvtColor(src, bgr, cv::COLOR_BGRA2RGB);
@@ -285,10 +285,10 @@ static void glow_effect(const cv::UMat &src, cv::UMat &dst, const int ksize) {
 
 //Compose the different layers into the final image
 static void composite_layers(cv::UMat& background, const cv::UMat& foreground, const cv::UMat& frameBuffer, cv::UMat& dst, int kernelSize, float fgLossPercent, BackgroundModes bgMode, PostProcModes ppMode) {
-    static thread_local cv::UMat tmp;
-    static thread_local cv::UMat post;
-    static thread_local cv::UMat backgroundGrey;
-    static thread_local vector<cv::UMat> channels;
+    thread_local cv::UMat tmp;
+    thread_local cv::UMat post;
+    thread_local cv::UMat backgroundGrey;
+    thread_local vector<cv::UMat> channels;
 
     //Lose a bit of foreground brightness based on fgLossPercent
     cv::subtract(foreground, cv::Scalar::all(255.0f * (fgLossPercent / 100.0f)), foreground);
@@ -324,7 +324,7 @@ static void composite_layers(cv::UMat& background, const cv::UMat& foreground, c
     case BLOOM:
         bloom(foreground, post, kernelSize, bloom_thresh, bloom_gain);
         break;
-    case NONE:
+    case DISABLED:
         foreground.copyTo(post);
         break;
     default:
@@ -348,8 +348,8 @@ static void setup_gui(cv::Ptr<V4D> main, cv::Ptr<V4D> mini) {
         SliderFloat("Scale", &fg_scale, 0.1f, 4.0f);
         SliderFloat("Loss", &fg_loss, 0.1f, 99.9f);
         Text("Background");
-        static thread_local const char* bgm_items[4] = {"Grey", "Color", "Value", "Black"};
-        static thread_local int* bgm = (int*)&background_mode;
+        thread_local const char* bgm_items[4] = {"Grey", "Color", "Value", "Black"};
+        thread_local int* bgm = (int*)&background_mode;
         ListBox("Mode", bgm, bgm_items, 4, 4);
         Text("Points");
         SliderInt("Max. Points", &max_points, 10, 1000000);
@@ -360,8 +360,8 @@ static void setup_gui(cv::Ptr<V4D> main, cv::Ptr<V4D> mini) {
         End();
 
         Begin("Post Processing");
-        static thread_local const char* ppm_items[3] = {"Glow", "Bloom", "None"};
-        static thread_local int* ppm = (int*)&post_proc_mode;
+        thread_local const char* ppm_items[3] = {"Glow", "Bloom", "None"};
+        thread_local int* ppm = (int*)&post_proc_mode;
         ListBox("Effect",ppm, ppm_items, 3, 3);
         SliderInt("Kernel Size",&glow_kernel_size, 1, 63);
         SliderFloat("Gain", &bloom_gain, 0.1f, 20.0f);
@@ -407,13 +407,13 @@ static bool iteration(cv::Ptr<V4D> window) {
         return false;
 
     //BGRA
-    static thread_local cv::UMat background, down;
-    static thread_local cv::UMat foreground(window->framebufferSize(), CV_8UC4, cv::Scalar::all(0));
+    thread_local cv::UMat background, down;
+    thread_local cv::UMat foreground(window->framebufferSize(), CV_8UC4, cv::Scalar::all(0));
     //BGR
-    static thread_local cv::UMat miniFrame;
+    thread_local cv::UMat miniFrame;
     //GREY
-    static thread_local cv::UMat downPrevGrey, downNextGrey, downMotionMaskGrey;
-    static thread_local vector<cv::Point2f> detectedPoints;
+    thread_local cv::UMat downPrevGrey, downNextGrey, downMotionMaskGrey;
+    thread_local vector<cv::Point2f> detectedPoints;
 
     window->fb([&](cv::UMat& frameBuffer) {
         //resize to foreground scale
@@ -449,14 +449,14 @@ static bool iteration(cv::Ptr<V4D> window) {
     });
 
 #ifndef __EMSCRIPTEN__
-    if(main_thread_id == std::this_thread::get_id())
+    if(window->isMain())
         miniWindow->feed(miniFrame);
 #endif
     window->write();
 
     //If onscreen rendering is enabled it displays the framebuffer in the native window. Returns false if the window was closed.
 #ifndef __EMSCRIPTEN__
-    if(main_thread_id == std::this_thread::get_id()) {
+    if(window->isMain()) {
         if(window->isFocused()) {
             return window->display() && miniWindow->display();
         }
@@ -472,7 +472,6 @@ static bool iteration(cv::Ptr<V4D> window) {
 }
 
 int main(int argc, char **argv) {
-    main_thread_id = std::this_thread::get_id();
     CV_UNUSED(argc);
     CV_UNUSED(argv);
 
diff --git a/modules/v4d/src/detail/clvacontext.cpp b/modules/v4d/src/detail/clvacontext.cpp
index 1c749dcc1..086d42efb 100644
--- a/modules/v4d/src/detail/clvacontext.cpp
+++ b/modules/v4d/src/detail/clvacontext.cpp
@@ -29,10 +29,14 @@ bool CLVAContext::capture(std::function<void(cv::UMat&)> fn, cv::UMat& output) {
         if (readFrame_.empty())
             return false;
         std::shared_ptr<ocl::OpenCLExecutionContext> pExecCtx = std::static_pointer_cast<ocl::OpenCLExecutionContext>(readFrame_.u->allocatorContext);
-        cv::ocl::OpenCLExecutionContextScope scope(*pExecCtx.get());
-
-        resizePreserveAspectRatio(readFrame_, readRGBBuffer_, mainFbContext_.size());
-        cv::cvtColor(readRGBBuffer_, output, cv::COLOR_RGB2BGRA);
+        if(pExecCtx && !pExecCtx->empty()) {
+            CLExecScope_t scope(*pExecCtx.get());
+            resizePreserveAspectRatio(readFrame_, readRGBBuffer_, mainFbContext_.size());
+            cv::cvtColor(readRGBBuffer_, output, cv::COLOR_RGB2BGRA);
+        } else {
+            resizePreserveAspectRatio(readFrame_, readRGBBuffer_, mainFbContext_.size());
+            cv::cvtColor(readRGBBuffer_, output, cv::COLOR_RGB2BGRA);
+        }
     }
 #endif
 
@@ -49,9 +53,14 @@ void CLVAContext::write(std::function<void(const cv::UMat&)> fn, cv::UMat& input
 #ifndef __EMSCRIPTEN__
     } else {
         std::shared_ptr<ocl::OpenCLExecutionContext> pExecCtx = std::static_pointer_cast<ocl::OpenCLExecutionContext>(input.u->allocatorContext);
-        cv::ocl::OpenCLExecutionContextScope scope(*pExecCtx.get());
-        cv::cvtColor(input, writeRGBBuffer_, cv::COLOR_BGRA2RGB);
-        fn(writeRGBBuffer_);
+        if(pExecCtx && !pExecCtx->empty()) {
+            CLExecScope_t scope(*pExecCtx.get());
+            cv::cvtColor(input, writeRGBBuffer_, cv::COLOR_BGRA2RGB);
+            fn(writeRGBBuffer_);
+        } else {
+            cv::cvtColor(input, writeRGBBuffer_, cv::COLOR_BGRA2RGB);
+            fn(writeRGBBuffer_);
+        }
     }
 #endif
 }
diff --git a/modules/v4d/src/detail/framebuffercontext.cpp b/modules/v4d/src/detail/framebuffercontext.cpp
index ef26b32d5..f8820bb4a 100644
--- a/modules/v4d/src/detail/framebuffercontext.cpp
+++ b/modules/v4d/src/detail/framebuffercontext.cpp
@@ -415,7 +415,6 @@ void FrameBufferContext::setup(const cv::Size& sz) {
     CLExecScope_t clExecScope(getCLExecContext());
 #endif
     framebuffer_.create(sz, CV_8UC4);
-
     if(!isShared_) {
         GL_CHECK(glGenFramebuffers(1, &frameBufferID_));
         GL_CHECK(glBindFramebuffer(GL_FRAMEBUFFER, frameBufferID_));
@@ -637,17 +636,16 @@ void FrameBufferContext::execute(std::function<void(cv::UMat&)> fn) {
 #ifndef __EMSCRIPTEN__
         if(!getCLExecContext().empty()) {
             CLExecScope_t clExecScope(getCLExecContext());
-#endif
             FrameBufferContext::GLScope glScope(*this, GL_FRAMEBUFFER);
             FrameBufferContext::FrameBufferScope fbScope(*this, framebuffer_);
             fn(framebuffer_);
-#ifndef __EMSCRIPTEN__
-        } else {
+        } else
+#endif
+        {
             FrameBufferContext::GLScope glScope(*this, GL_FRAMEBUFFER);
             FrameBufferContext::FrameBufferScope fbScope(*this, framebuffer_);
             fn(framebuffer_);
         }
-#endif
     });
 }
 
@@ -730,6 +728,7 @@ void FrameBufferContext::download(cv::UMat& m) {
     assert(tmp.data != nullptr);
     GL_CHECK(glReadPixels(0, 0, tmp.cols, tmp.rows, GL_RGBA, GL_UNSIGNED_BYTE, tmp.data));
     tmp.release();
+
 }
 
 void FrameBufferContext::upload(const cv::UMat& m) {
diff --git a/modules/v4d/src/util.cpp b/modules/v4d/src/util.cpp
index 299d696e9..931cdfcc8 100644
--- a/modules/v4d/src/util.cpp
+++ b/modules/v4d/src/util.cpp
@@ -238,6 +238,8 @@ bool isIntelVaSupported() {
 bool isClGlSharingSupported() {
 #ifndef __EMSCRIPTEN__
     try {
+        if(!cv::ocl::useOpenCL())
+            return false;
         std::vector<cv::ocl::PlatformInfo> plt_info;
         cv::ocl::getPlatfomsInfo(plt_info);
         cv::ocl::Device current;
@@ -353,17 +355,8 @@ static Source makeAnyHWSource(const string& inputFilename) {
     float fps = capture->get(cv::CAP_PROP_FPS);
 
     return Source([=](cv::UMat& frame) {
-        cv::UMat tmp;
-        (*capture) >> tmp;
-
-        if(frame.empty())
-            frame.create(tmp.size(), tmp.type());
-        if(!tmp.empty()) {
-            tmp.copyTo(frame.getMat(cv::ACCESS_WRITE));
-            return true;
-        } else {
-            return false;
-        }
+        (*capture) >> frame;
+        return !frame.empty();
     }, fps);
 }
 #endif
@@ -528,7 +521,7 @@ Source makeCaptureSource(int width, int height, cv::Ptr<V4D> window) {
             }
         }
         return true;
-    }, 0, false);
+    }, 0);
 }
 
 #endif
diff --git a/modules/v4d/src/v4d.cpp b/modules/v4d/src/v4d.cpp
index 5fcb393af..361ff8451 100644
--- a/modules/v4d/src/v4d.cpp
+++ b/modules/v4d/src/v4d.cpp
@@ -10,10 +10,15 @@
 #include "detail/glcontext.hpp"
 #include "detail/timetracker.hpp"
 #include <sstream>
+#include <algorithm>
 
 namespace cv {
 namespace v4d {
 
+const std::thread::id V4D::default_thread_id_;
+std::thread::id V4D::main_thread_id_;
+concurrent::threadpool V4D::thread_pool_(2);
+
 cv::Ptr<V4D> V4D::make(int w, int h, const string& title, AllocateFlags flags, bool offscreen, bool debug, int samples) {
     V4D* v4d = new V4D(cv::Size(w,h), cv::Size(), title, flags, offscreen, debug, samples);
     v4d->setVisible(!offscreen);
@@ -27,7 +32,7 @@ cv::Ptr<V4D> V4D::make(const cv::Size& size, const cv::Size& fbsize, const strin
 }
 
 V4D::V4D(const cv::Size& size, const cv::Size& fbsize, const string& title, AllocateFlags flags, bool offscreen, bool debug, int samples) :
-        initialSize_(size), debug_(debug), viewport_(0, 0, size.width, size.height), stretching_(true), pool_(2) {
+        initialSize_(size), debug_(debug), viewport_(0, 0, size.width, size.height), stretching_(true) {
 #ifdef __EMSCRIPTEN__
     printf(""); //makes sure we have FS as a dependency
 #endif
@@ -59,6 +64,21 @@ V4D::~V4D() {
     }
 }
 
+cv::Ptr<cv::UMat> V4D::get(const string& name) {
+    return umat_pool_[name];
+}
+
+
+cv::Ptr<cv::UMat> V4D::get(const string& name, cv::Size sz, int type) {
+    cv::Ptr<cv::UMat> u = umat_pool_[name];
+    u->create(sz, type);
+    return u;
+}
+
+bool V4D::isMain() const {
+        return main_thread_id_ == default_thread_id_ || main_thread_id_ == std::this_thread::get_id();
+}
+
 cv::ogl::Texture2D& V4D::texture() {
     return mainFbContext_->getTexture2D();
 }
@@ -216,59 +236,87 @@ static void do_frame(void* void_fn_ptr) {
      auto* fn_ptr = reinterpret_cast<std::function<bool()>*>(void_fn_ptr);
      if (fn_ptr) {
          auto& fn = *fn_ptr;
-         //FIXME cancel main loop
-         fn();
+             fn();
      }
  }
 #endif
 
+static bool first_run = true;
+
 void V4D::run(std::function<bool(cv::Ptr<V4D>)> fn, size_t workers) {
-#ifndef __EMSCRIPTEN__
     std::vector<std::thread*> threads;
-    for (size_t i = 0; i < workers; ++i) {
-        threads.push_back(
-                new std::thread(
-                        [this, fn, i] {
-                            cv::Ptr<cv::v4d::V4D> worker = V4D::make(
-                                    this->initialSize().width,
-                                    this->initialSize().height,
-                                    this->title() + "-worker-" + std::to_string(i),
-                                    NANOVG,
-                                    !this->debug_,
-                                    this->debug_,
-                                    0);
-                            if (this->hasSource()) {
-                                Source& src = this->getSource();
-                                src.setThreadSafe(true);
-                                worker->setSource(src);
-                            }
-                            if (this->hasSink()) {
-                                Sink& sink = this->getSink();
-                                sink.setThreadSafe(true);
-                                worker->setSink(sink);
-                            }
-                            worker->run(fn, 0);
-                        }
-                )
-        );
-    }
+    {
+        static std::mutex runMtx;
+
+        std::unique_lock<std::mutex> lock(runMtx);
+        if(first_run) {
+            main_thread_id_ = std::this_thread::get_id();
+            first_run = false;
+            cerr << "Starting with " << workers << " extra workers" << endl;
+        }
 
+        if(workers > 0  || !this->isMain()) {
+            cv::setNumThreads(0);
+            cerr << "Setting threads to 0" << endl;
+        }
+
+        if(this->isMain()) {
+            for (size_t i = 0; i < workers; ++i) {
+                threads.push_back(
+                        new std::thread(
+                                [this, fn, i] {
+                                    cv::Ptr<cv::v4d::V4D> worker = V4D::make(
+                                            this->initialSize().width,
+                                            this->initialSize().height,
+                                            this->title() + "-worker-" + std::to_string(i),
+                                            NANOVG,
+                                            !this->debug_,
+                                            this->debug_,
+                                            0);
+                                    if (this->hasSource()) {
+                                        Source& src = this->getSource();
+                                        src.setThreadSafe(true);
+                                        worker->setSource(src);
+                                    }
+                                    if (this->hasSink()) {
+                                        Sink& sink = this->getSink();
+                                        sink.setThreadSafe(true);
+                                        worker->setSink(sink);
+                                    }
+                                    worker->run(fn, 0);
+                                }
+                        )
+                );
+            }
+        }
+    }
     this->makeCurrent();
+#ifndef __EMSCRIPTEN__
     bool success = true;
     while (keepRunning() && success) {
         CLExecScope_t scope(fbCtx().getCLExecContext());
         success = fn(self());
     }
-    pool_.finish();
-
-    for(auto& t : threads)
-        t->join();
 #else
-    std::function<bool()> fnFrame([=,this](){
-        return fn(self());
-    });
-    emscripten_set_main_loop_arg(do_frame, &fnFrame, -1, true);
+    if(this->isMain()) {
+        std::function<bool()> fnFrame([=,this](){
+            return fn(self());
+        });
+
+        emscripten_set_main_loop_arg(do_frame, &fnFrame, -1, true);
+    } else {
+        while (true) {
+            fn(self());
+        }
+    }
 #endif
+
+    if(this->isMain()) {
+        thread_pool_.finish();
+
+        for(auto& t : threads)
+            t->join();
+    }
 }
 
 void V4D::setSource(Source& src) {
@@ -285,7 +333,10 @@ bool V4D::hasSource() {
 }
 
 void V4D::feed(cv::InputArray in) {
+#ifndef __EMSCRIPTEN__
     CLExecScope_t scope(fbCtx().getCLExecContext());
+#endif
+
     TimeTracker::getInstance()->execute("feed", [this, &in](){
         cv::UMat frame;
         clvaCtx().capture([&](cv::UMat& videoFrame) {
@@ -309,6 +360,7 @@ cv::_InputArray V4D::fetch() {
 }
 
 bool V4D::capture() {
+#ifndef __EMSCRIPTEN__
     CLExecScope_t scope(fbCtx().getCLExecContext());
     if (source_) {
         return this->capture([this](cv::UMat& videoFrame) {
@@ -320,13 +372,16 @@ bool V4D::capture() {
             }
             return false;
         });
-#ifndef __EMSCRIPTEN__
-        return false;
+    }
+    return false;
 #else
+        if(source_ && source_->isReady()) {
+            auto p = source_->operator()();
+            currentSeqNr_ = p.first;
+        }
+
         return true;
 #endif
-    }
-    return false;
 }
 
 bool V4D::capture(std::function<void(cv::UMat&)> fn) {
@@ -357,7 +412,7 @@ bool V4D::capture(std::function<void(cv::UMat&)> fn) {
             }
         }
         nextReaderFrame_.copyTo(currentReaderFrame_);
-        futureReader_ = pool_.enqueue(
+        futureReader_ = thread_pool_.enqueue(
             [](V4D* v, std::function<void(UMat&)> func, cv::UMat& frame) {
 #ifndef __EMSCRIPTEN__
             return v->clvaCtx().capture(func, frame);
@@ -410,7 +465,7 @@ void V4D::write(std::function<void(const cv::UMat&)> fn) {
             frameBuffer.copyTo(currentWriterFrame_);
         });
 
-        futureWriter_ = pool_.enqueue([](V4D* v, std::function<void(const UMat&)> func, cv::UMat& frame) {
+        futureWriter_ = thread_pool_.enqueue([](V4D* v, std::function<void(const UMat&)> func, cv::UMat& frame) {
             v->clvaCtx().write(func, frame);
         }, this, fn, currentWriterFrame_);
     });