diff --git a/modules/v4d/samples/beauty-demo.cpp b/modules/v4d/samples/beauty-demo.cpp
index 6625a48be..0e8e6bfd4 100644
--- a/modules/v4d/samples/beauty-demo.cpp
+++ b/modules/v4d/samples/beauty-demo.cpp
@@ -33,24 +33,7 @@ constexpr const char *OUTPUT_FILENAME = "beauty-demo.mkv";
 #endif
 const unsigned long DIAG = hypot(double(WIDTH), double(HEIGHT));
 
-/* Visualization parameters */
 constexpr int BLUR_DIV = 500;
-static int blur_skin_kernel_size = std::max(int(DIAG / BLUR_DIV % 2 == 0 ? DIAG / BLUR_DIV + 1 : DIAG / BLUR_DIV), 1);
-//Saturation boost factor for eyes and lips
-static float eyes_and_lips_saturation = 2.0f;
-//Saturation boost factor for skin
-static float skin_saturation = 1.7f;
-//Contrast factor skin
-static float skin_contrast = 0.7f;
-#ifndef __EMSCRIPTEN__
-//Show input and output side by side
-static bool side_by_side = true;
-//Scale the video to the window size
-static bool stretch = true;
-#else
-static bool side_by_side = false;
-static bool stretch = false;
-#endif
 
 /*!
  * Data structure holding the points for all face landmarks
@@ -136,62 +119,33 @@ struct FaceFeatures {
     }
 };
 
-//based on the detected FaceFeatures it guesses a decent face oval and draws a mask for it.
-static void draw_face_oval_mask(const FaceFeatures &ff) {
-    using namespace cv::v4d::nvg;
-    clear();
-
-    vector<vector<cv::Point2f>> features = ff.features();
-    cv::RotatedRect rotRect = cv::fitEllipse(features[0]);
-
-    beginPath();
-    fillColor(cv::Scalar(255, 255, 255, 255));
-    ellipse(rotRect.center.x, rotRect.center.y * 1, rotRect.size.width / 2, rotRect.size.height / 2.5);
-    rotate(rotRect.angle);
-    fill();
-}
-
-//Draws a mask consisting of eyes and lips areas (deduced from FaceFeatures)
-static void draw_face_eyes_and_lips_mask(const FaceFeatures &ff) {
-    using namespace cv::v4d::nvg;
-    clear();
-    vector<vector<cv::Point2f>> features = ff.features();
-    for (size_t j = 5; j < 8; ++j) {
-        beginPath();
-        fillColor(cv::Scalar(255, 255, 255, 255));
-        moveTo(features[j][0].x, features[j][0].y);
-        for (size_t k = 1; k < features[j].size(); ++k) {
-            lineTo(features[j][k].x, features[j][k].y);
-        }
-        closePath();
-        fill();
-    }
-
-    beginPath();
-    fillColor(cv::Scalar(0, 0, 0, 255));
-    moveTo(features[8][0].x, features[8][0].y);
-    for (size_t k = 1; k < features[8].size(); ++k) {
-        lineTo(features[8][k].x, features[8][k].y);
-    }
-    closePath();
-    fill();
-}
-
-//adjusts the saturation of a UMat
-static void adjust_saturation(const cv::UMat &srcBGR, cv::UMat &dstBGR, float factor) {
-    thread_local vector<cv::UMat> channels;
-    thread_local cv::UMat hls;
-
-    cvtColor(srcBGR, hls, cv::COLOR_BGR2HLS);
-    split(hls, channels);
-    cv::multiply(channels[2], factor, channels[2]);
-    merge(channels, hls);
-    cvtColor(hls, dstBGR, cv::COLOR_HLS2BGR);
-}
-
 using namespace cv::v4d;
 
 class BeautyDemoPlan : public Plan {
+	struct Params {
+		int blurSkinKernelSize_ = std::max(int(DIAG / BLUR_DIV % 2 == 0 ? DIAG / BLUR_DIV + 1 : DIAG / BLUR_DIV), 1);
+		//Saturation boost factor for eyes and lips
+		float eyesAndLipsSaturation_ = 2.0f;
+		//Saturation boost factor for skin
+		float skinSaturation_ = 1.7f;
+		//Contrast factor skin
+		float skinContrast_ = 0.7f;
+#ifndef __EMSCRIPTEN__
+		//Show input and output side by side
+		bool sideBySide_ = true;
+		//Scale the video to the window size
+		bool stretch_ = true;
+#else
+		bool sideBySide_ = false;
+		bool stretch_ = false;
+#endif
+	} params_;
+
+	struct Cache {
+	    vector<cv::UMat> channels_;
+	    cv::UMat hls_;
+	} cache_;
+
 	cv::Ptr<cv::face::Facemark> facemark_ = cv::face::createFacemarkLBF();
 	//Blender (used to put the different face parts back together)
 	cv::Ptr<cv::detail::MultiBandBlender> blender_ = new cv::detail::MultiBandBlender(false, 5);
@@ -215,7 +169,89 @@ class BeautyDemoPlan : public Plan {
 	bool faceFound_ = false;
 	FaceFeatures features_;
 public:
+	//based on the detected FaceFeatures it guesses a decent face oval and draws a mask for it.
+	static void draw_face_oval_mask(const FaceFeatures &ff) {
+	    using namespace cv::v4d::nvg;
+	    clear();
+
+	    vector<vector<cv::Point2f>> features = ff.features();
+	    cv::RotatedRect rotRect = cv::fitEllipse(features[0]);
+
+	    beginPath();
+	    fillColor(cv::Scalar(255, 255, 255, 255));
+	    ellipse(rotRect.center.x, rotRect.center.y * 1, rotRect.size.width / 2, rotRect.size.height / 2.5);
+	    rotate(rotRect.angle);
+	    fill();
+	}
+
+	//Draws a mask consisting of eyes and lips areas (deduced from FaceFeatures)
+	static void draw_face_eyes_and_lips_mask(const FaceFeatures &ff) {
+	    using namespace cv::v4d::nvg;
+	    clear();
+	    vector<vector<cv::Point2f>> features = ff.features();
+	    for (size_t j = 5; j < 8; ++j) {
+	        beginPath();
+	        fillColor(cv::Scalar(255, 255, 255, 255));
+	        moveTo(features[j][0].x, features[j][0].y);
+	        for (size_t k = 1; k < features[j].size(); ++k) {
+	            lineTo(features[j][k].x, features[j][k].y);
+	        }
+	        closePath();
+	        fill();
+	    }
+
+	    beginPath();
+	    fillColor(cv::Scalar(0, 0, 0, 255));
+	    moveTo(features[8][0].x, features[8][0].y);
+	    for (size_t k = 1; k < features[8].size(); ++k) {
+	        lineTo(features[8][k].x, features[8][k].y);
+	    }
+	    closePath();
+	    fill();
+	}
+
+	//adjusts the saturation of a UMat
+	static void adjust_saturation(const cv::UMat &srcBGR, cv::UMat &dstBGR, float factor, Cache& cache) {
+	    cvtColor(srcBGR, cache.hls_, cv::COLOR_BGR2HLS);
+	    split(cache.hls_, cache.channels_);
+	    cv::multiply(cache.channels_[2], factor, cache.channels_[2]);
+	    merge(cache.channels_, cache.hls_);
+	    cvtColor(cache.hls_, dstBGR, cv::COLOR_HLS2BGR);
+	}
+
+	void gui(cv::Ptr<V4D> window) override {
+		window->imgui([this](cv::Ptr<V4D> window, ImGuiContext* ctx){
+			using namespace ImGui;
+			SetCurrentContext(ctx);
+			Begin("Effect");
+			Text("Display");
+			Checkbox("Side by side", &params_.sideBySide_);
+			if(Checkbox("Stetch", &params_.stretch_)) {
+				window->setStretching(true);
+			} else
+				window->setStretching(false);
+
+#ifndef __EMSCRIPTEN__
+			if(Button("Fullscreen")) {
+				window->setFullscreen(!window->isFullscreen());
+			};
+#endif
+
+			if(Button("Offscreen")) {
+				window->setVisible(!window->isVisible());
+			};
+
+			Text("Face Skin");
+			SliderInt("Blur", &params_.blurSkinKernelSize_, 0, 128);
+			SliderFloat("Saturation", &params_.skinSaturation_, 0.0f, 100.0f);
+			SliderFloat("Contrast", &params_.skinContrast_, 0.0f, 1.0f);
+			Text("Eyes and Lips");
+			SliderFloat("Saturation ", &params_.eyesAndLipsSaturation_, 0.0f, 100.0f);
+			End();
+		});
+	}
 	void setup(cv::Ptr<V4D> window) override {
+		window->setStretching(params_.stretch_);
 		window->parallel([](cv::Ptr<cv::face::Facemark>& facemark){
 #ifndef __EMSCRIPTEN__
 			facemark->loadModel("modules/v4d/assets/models/lbfmodel.yaml");
@@ -289,19 +325,18 @@ public:
 					cv::bitwise_not(ealmg, bmg);
 				}, faceOval_, eyesAndLipsMaskGrey_, faceSkinMaskGrey_, backgroundMaskGrey_);
 
-				window->parallel([](const cv::UMat& in, cv::UMat& eal, float& eals,  cv::UMat& c, cv::UMat& s) {
+				window->parallel([](const cv::UMat& in, cv::UMat& eal, cv::UMat& c, cv::UMat& s, Params& params, Cache& cache) {
 					//boost saturation of eyes and lips
-					adjust_saturation(in,  eal, eals);
+					adjust_saturation(in,  eal, params.eyesAndLipsSaturation_, cache);
 					//reduce skin contrast
-					multiply(in, cv::Scalar::all(skin_contrast), c);
+					multiply(in, cv::Scalar::all(params.skinContrast_), c);
 					//fix skin brightness
-					add(c, cv::Scalar::all((1.0 - skin_contrast) / 2.0) * 255.0, c);
+					add(c, cv::Scalar::all((1.0 - params.skinContrast_) / 2.0) * 255.0, c);
 					//blur the skin_
-					cv::boxFilter(c, c, -1, cv::Size(blur_skin_kernel_size, blur_skin_kernel_size), cv::Point(-1, -1), true, cv::BORDER_REPLICATE);
+					cv::boxFilter(c, c, -1, cv::Size(params.blurSkinKernelSize_, params.blurSkinKernelSize_), cv::Point(-1, -1), true, cv::BORDER_REPLICATE);
 					//boost skin saturation
-					adjust_saturation(c, s, skin_saturation);
-				}, input_, eyesAndLips_, eyes_and_lips_saturation, contrast_, skin_);
-
+					adjust_saturation(c, s, params.skinSaturation_, cache);
+				}, input_, eyesAndLips_, contrast_, skin_, params_, cache_);
 
 				window->parallel([](cv::Ptr<cv::detail::MultiBandBlender>& bl,
 						const cv::UMat& s, const cv::UMat& fsmg,
@@ -325,8 +360,8 @@ public:
 					foFloat.convertTo(fout, CV_8U, 1.0);
 				}, blender_, skin_, faceSkinMaskGrey_, input_, backgroundMaskGrey_, eyesAndLips_, eyesAndLipsMaskGrey_, frameOut_);
 
-				window->parallel([](cv::UMat& fout, const cv::UMat& in, cv::UMat& lh, cv::UMat& rh) {
-					if (side_by_side) {
+				window->parallel([](cv::UMat& fout, const cv::UMat& in, cv::UMat& lh, cv::UMat& rh, const Params& params) {
+					if (params.sideBySide_) {
 						//create side-by-side view with a result
 						cv::resize(in, lh, cv::Size(0, 0), 0.5, 0.5);
 						cv::resize(fout, rh, cv::Size(0, 0), 0.5, 0.5);
@@ -335,14 +370,14 @@ public:
 						lh.copyTo(fout(cv::Rect(0, 0, lh.size().width, lh.size().height)));
 						rh.copyTo(fout(cv::Rect(rh.size().width, 0, rh.size().width, rh.size().height)));
 					}
-				}, frameOut_, input_, lhalf_, rhalf_);
+				}, frameOut_, input_, lhalf_, rhalf_, params_);
 			}
 			window->endbranch(isTrue, faceFound_);
 
 			window->branch(isFalse, faceFound_);
 			{
-				window->parallel([](cv::UMat& fout, const cv::UMat& in, cv::UMat& lh) {
-					if (side_by_side) {
+				window->parallel([](cv::UMat& fout, const cv::UMat& in, cv::UMat& lh, const Params& params) {
+					if (params.sideBySide_) {
 						//create side-by-side view without a result (using the input image for both sides)
 						fout = cv::Scalar::all(0);
 						cv::resize(in, lh, cv::Size(0, 0), 0.5, 0.5);
@@ -351,7 +386,7 @@ public:
 					} else {
 						in.copyTo(fout);
 					}
-				}, frameOut_, input_, lhalf_);
+				}, frameOut_, input_, lhalf_, params_);
 			}
 			window->endbranch(isFalse, faceFound_);
 
@@ -381,45 +416,11 @@ int main() {
 #endif
     using namespace cv::v4d;
     cv::Ptr<V4D> window = V4D::make(WIDTH, HEIGHT, "Beautification Demo", ALL, OFFSCREEN);
-//    window->printSystemInfo();
-    window->setStretching(stretch);
-
-//    if (!OFFSCREEN) {
-//        window->imgui([window](ImGuiContext* ctx){
-//            using namespace ImGui;
-//            SetCurrentContext(ctx);
-//            Begin("Effect");
-//            Text("Display");
-//            Checkbox("Side by side", &side_by_side);
-//            if(Checkbox("Stetch", &stretch)) {
-//                window->setStretching(true);
-//            } else
-//                window->setStretching(false);
-//
-//    #ifndef __EMSCRIPTEN__
-//            if(Button("Fullscreen")) {
-//                window->setFullscreen(!window->isFullscreen());
-//            };
-//    #endif
-//
-//            if(Button("Offscreen")) {
-//                window->setVisible(!window->isVisible());
-//            };
-//
-//            Text("Face Skin");
-//            SliderInt("Blur", &blur_skin_kernel_size, 0, 128);
-//            SliderFloat("Saturation", &skin_saturation, 0.0f, 100.0f);
-//            SliderFloat("Contrast", &skin_contrast, 0.0f, 1.0f);
-//            Text("Eyes and Lips");
-//            SliderFloat("Saturation ", &eyes_and_lips_saturation, 0.0f, 100.0f);
-//            End();
-//        });
-//    }
 #ifndef __EMSCRIPTEN__
     auto src = makeCaptureSource(window, argv[1]);
     window->setSource(src);
-//    Sink sink = makeWriterSink(window, OUTPUT_FILENAME, src.fps(), cv::Size(WIDTH, HEIGHT));
-//    window->setSink(sink);
+    auto sink = makeWriterSink(window, OUTPUT_FILENAME, src->fps(), cv::Size(WIDTH, HEIGHT));
+    window->setSink(sink);
 #else
     auto src = makeCaptureSource(window);
     window->setSource(src);
diff --git a/modules/v4d/samples/optflow-demo.cpp b/modules/v4d/samples/optflow-demo.cpp
index 2395ea04c..10099c138 100644
--- a/modules/v4d/samples/optflow-demo.cpp
+++ b/modules/v4d/samples/optflow-demo.cpp
@@ -20,7 +20,6 @@
 #include <array>
 #include <utility>
 
-
 using std::cerr;
 using std::endl;
 using std::vector;
@@ -56,254 +55,32 @@ enum PostProcModes {
     DISABLED
 };
 
-//Uses background subtraction to generate a "motion mask"
-static void prepare_motion_mask(const cv::UMat& srcGrey, cv::UMat& motionMaskGrey) {
-	thread_local cv::Ptr<cv::BackgroundSubtractor> bg_subtrator = cv::createBackgroundSubtractorMOG2(100, 16.0, false);
-	thread_local int morph_size = 1;
-	thread_local cv::Mat element = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2 * morph_size + 1, 2 * morph_size + 1), cv::Point(morph_size, morph_size));
-
-    bg_subtrator->apply(srcGrey, motionMaskGrey);
-    //Surpress speckles
-    cv::morphologyEx(motionMaskGrey, motionMaskGrey, cv::MORPH_OPEN, element, cv::Point(element.cols >> 1, element.rows >> 1), 2, cv::BORDER_CONSTANT, cv::morphologyDefaultBorderValue());
-}
-
-//Detect points to track
-static void detect_points(const cv::UMat& srcMotionMaskGrey, vector<cv::Point2f>& points) {
-	thread_local cv::Ptr<cv::FastFeatureDetector> detector = cv::FastFeatureDetector::create(1, false);
-	thread_local vector<cv::KeyPoint> tmpKeyPoints;
-
-    detector->detect(srcMotionMaskGrey, tmpKeyPoints);
-
-    points.clear();
-    for (const auto &kp : tmpKeyPoints) {
-        points.push_back(kp.pt);
-    }
-}
-
-//Detect extrem changes in scene content and report it
-static bool detect_scene_change(const cv::UMat& srcMotionMaskGrey, const float thresh, const float theshDiff) {
-	thread_local float last_movement = 0;
-
-    float movement = cv::countNonZero(srcMotionMaskGrey) / float(srcMotionMaskGrey.cols * srcMotionMaskGrey.rows);
-    float relation = movement > 0 && last_movement > 0 ? std::max(movement, last_movement) / std::min(movement, last_movement) : 0;
-    float relM = relation * log10(1.0f + (movement * 9.0));
-    float relLM = relation * log10(1.0f + (last_movement * 9.0));
-
-    bool result = !((movement > 0 && last_movement > 0 && relation > 0)
-            && (relM < thresh && relLM < thresh && fabs(relM - relLM) < theshDiff));
-    last_movement = (last_movement + movement) / 2.0f;
-    return result;
-}
-
-//Visualize the sparse optical flow
-static void visualize_sparse_optical_flow(const cv::UMat &prevGrey, const cv::UMat &nextGrey, const vector<cv::Point2f> &detectedPoints, const float scaleFactor, const int maxStrokeSize, const cv::Scalar color, const int maxPoints, const float pointLossPercent) {
-	thread_local vector<cv::Point2f> hull, prevPoints, nextPoints, newPoints;
-	thread_local vector<cv::Point2f> upPrevPoints, upNextPoints;
-	thread_local std::vector<uchar> status;
-	thread_local std::vector<float> err;
-	thread_local std::random_device rd;
-	thread_local std::mt19937 g(rd());
-
-    //less then 5 points is a degenerate case (e.g. the corners of a video frame)
-    if (detectedPoints.size() > 4) {
-        cv::convexHull(detectedPoints, hull);
-        float area = cv::contourArea(hull);
-        //make sure the area of the point cloud is positive
-        if (area > 0) {
-            float density = (detectedPoints.size() / area);
-            //stroke size is biased by the area of the point cloud
-            float strokeSize = maxStrokeSize * pow(area / (nextGrey.cols * nextGrey.rows), 0.33f);
-            //max points is biased by the densitiy of the point cloud
-            size_t currentMaxPoints = ceil(density * maxPoints);
-
-            //lose a number of random points specified by pointLossPercent
-            std::shuffle(prevPoints.begin(), prevPoints.end(), g);
-            prevPoints.resize(ceil(prevPoints.size() * (1.0f - (pointLossPercent / 100.0f))));
-
-            //calculate how many newly detected points to add
-            size_t copyn = std::min(detectedPoints.size(), (size_t(std::ceil(currentMaxPoints)) - prevPoints.size()));
-            if (prevPoints.size() < currentMaxPoints) {
-                std::copy(detectedPoints.begin(), detectedPoints.begin() + copyn, std::back_inserter(prevPoints));
-            }
-
-            //calculate the sparse optical flow
-            cv::calcOpticalFlowPyrLK(prevGrey, nextGrey, prevPoints, nextPoints, status, err);
-            newPoints.clear();
-            if (prevPoints.size() > 1 && nextPoints.size() > 1) {
-                //scale the points to original size
-                upNextPoints.clear();
-                upPrevPoints.clear();
-                for (cv::Point2f pt : prevPoints) {
-                    upPrevPoints.push_back(pt /= scaleFactor);
-                }
-
-                for (cv::Point2f pt : nextPoints) {
-                    upNextPoints.push_back(pt /= scaleFactor);
-                }
-
-                using namespace cv::v4d::nvg;
-                //start drawing
-                beginPath();
-                strokeWidth(strokeSize);
-                strokeColor(color);
-
-                for (size_t i = 0; i < prevPoints.size(); i++) {
-                    if (status[i] == 1 //point was found in prev and new set
-                            && err[i] < (1.0 / density) //with a higher density be more sensitive to the feature error
-                            && upNextPoints[i].y >= 0 && upNextPoints[i].x >= 0 //check bounds
-                            && upNextPoints[i].y < nextGrey.rows / scaleFactor && upNextPoints[i].x < nextGrey.cols / scaleFactor //check bounds
-                            ) {
-                        float len = hypot(fabs(upPrevPoints[i].x - upNextPoints[i].x), fabs(upPrevPoints[i].y - upNextPoints[i].y));
-                        //upper and lower bound of the flow vector lengthss
-                        if (len > 0 && len < sqrt(area)) {
-                            //collect new points
-                            newPoints.push_back(nextPoints[i]);
-                            //the actual drawing operations
-                            moveTo(upNextPoints[i].x, upNextPoints[i].y);
-                            lineTo(upPrevPoints[i].x, upPrevPoints[i].y);
-                        }
-                    }
-                }
-                //end drawing
-                stroke();
-            }
-            prevPoints = newPoints;
-        }
-    }
-}
-
-//Bloom post-processing effect
-static void bloom(const cv::UMat& src, cv::UMat &dst, int ksize = 3, int threshValue = 235, float gain = 4) {
-	thread_local cv::UMat bgr;
-	thread_local cv::UMat hls;
-	thread_local cv::UMat ls16;
-	thread_local cv::UMat ls;
-	thread_local cv::UMat blur;
-	thread_local std::vector<cv::UMat> hlsChannels;
-
-    //remove alpha channel
-    cv::cvtColor(src, bgr, cv::COLOR_BGRA2RGB);
-    //convert to hls
-    cv::cvtColor(bgr, hls, cv::COLOR_BGR2HLS);
-    //split channels
-    cv::split(hls, hlsChannels);
-    //invert lightness
-    cv::bitwise_not(hlsChannels[2], hlsChannels[2]);
-    //multiply lightness and saturation
-    cv::multiply(hlsChannels[1], hlsChannels[2], ls16, 1, CV_16U);
-    //normalize
-    cv::divide(ls16, cv::Scalar(255.0), ls, 1, CV_8U);
-    //binary threhold according to threshValue
-    cv::threshold(ls, blur, threshValue, 255, cv::THRESH_BINARY);
-    //blur
-    cv::boxFilter(blur, blur, -1, cv::Size(ksize, ksize), cv::Point(-1,-1), true, cv::BORDER_REPLICATE);
-    //convert to BGRA
-    cv::cvtColor(blur, blur, cv::COLOR_GRAY2BGRA);
-    //add src and the blurred L-S-product according to gain
-    addWeighted(src, 1.0, blur, gain, 0, dst);
-}
-
-//Glow post-processing effect
-static void glow_effect(const cv::UMat &src, cv::UMat &dst, const int ksize) {
-	thread_local cv::UMat resize;
-	thread_local cv::UMat blur;
-	thread_local cv::UMat dst16;
-
-    cv::bitwise_not(src, dst);
-
-    //Resize for some extra performance
-    cv::resize(dst, resize, cv::Size(), 0.5, 0.5);
-    //Cheap blur
-    cv::boxFilter(resize, resize, -1, cv::Size(ksize, ksize), cv::Point(-1,-1), true, cv::BORDER_REPLICATE);
-    //Back to original size
-    cv::resize(resize, blur, src.size());
-
-    //Multiply the src image with a blurred version of itself
-    cv::multiply(dst, blur, dst16, 1, CV_16U);
-    //Normalize and convert back to CV_8U
-    cv::divide(dst16, cv::Scalar::all(255.0), dst, 1, CV_8U);
-
-    cv::bitwise_not(dst, dst);
-}
-
-//Compose the different layers into the final image
-static void composite_layers(cv::UMat& background, cv::UMat& foreground, const cv::UMat& frameBuffer, cv::UMat& dst, int kernelSize, float fgLossPercent, BackgroundModes bgMode, PostProcModes ppMode, int bloomThresh, float bloomGain) {
-    thread_local cv::UMat tmp;
-    thread_local cv::UMat post;
-    thread_local cv::UMat backgroundGrey;
-    thread_local vector<cv::UMat> channels;
-
-    //Lose a bit of foreground brightness based on fgLossPercent
-    cv::subtract(foreground, cv::Scalar::all(255.0f * (fgLossPercent / 100.0f)), foreground);
-    //Add foreground an the current framebuffer into foregound
-    cv::add(foreground, frameBuffer, foreground);
-
-    //Dependin on bgMode prepare the background in different ways
-    switch (bgMode) {
-    case GREY:
-        cv::cvtColor(background, backgroundGrey, cv::COLOR_BGRA2GRAY);
-        cv::cvtColor(backgroundGrey, background, cv::COLOR_GRAY2BGRA);
-        break;
-    case VALUE:
-        cv::cvtColor(background, tmp, cv::COLOR_BGRA2BGR);
-        cv::cvtColor(tmp, tmp, cv::COLOR_BGR2HSV);
-        split(tmp, channels);
-        cv::cvtColor(channels[2], background, cv::COLOR_GRAY2BGRA);
-        break;
-    case COLOR:
-        break;
-    case BLACK:
-        background = cv::Scalar::all(0);
-        break;
-    default:
-        break;
-    }
-
-    //Depending on ppMode perform post-processing
-    switch (ppMode) {
-    case GLOW:
-        glow_effect(foreground, post, kernelSize);
-        break;
-    case BLOOM:
-        bloom(foreground, post, kernelSize, bloomThresh, bloomGain);
-        break;
-    case DISABLED:
-        foreground.copyTo(post);
-        break;
-    default:
-        break;
-    }
-
-    //Add background and post-processed foreground into dst
-    cv::add(background, post, dst);
-}
-
 using namespace cv::v4d;
 
 class OptflowPlan : public Plan {
 	struct Params {
 		// Generate the foreground at this scale.
-		float fgScale = 0.5f;
+		float fgScale_ = 0.5f;
 		// On every frame the foreground loses on brightness. Specifies the loss in percent.
 		float fgLoss_ = 1;
 		//Convert the background to greyscale
 		BackgroundModes backgroundMode_ = GREY;
 		// Peak thresholds for the scene change detection. Lowering them makes the detection more sensitive but
 		// the default should be fine.
-		float sceneChangeThresh = 0.29f;
-		float sceneChangeThreshDiff = 0.1f;
+		float sceneChangeThresh_ = 0.29f;
+		float sceneChangeThreshDiff_ = 0.1f;
 		// The theoretical maximum number of points to track which is scaled by the density of detected points
 		// and therefor is usually much smaller.
-		int maxPoints = 300000;
+		int maxPoints_ = 300000;
 		// How many of the tracked points to lose intentionally, in percent.
-		float pointLoss = 20;
+		float pointLoss_ = 20;
 		// The theoretical maximum size of the drawing stroke which is scaled by the area of the convex hull
 		// of tracked points and therefor is usually much smaller.
-		int maxStroke = 6;
-		// Red, green, blue and alpha. All from 0.0f to 1.0f
-		float effectColor[4] = {1.0f, 0.75f, 0.4f, 0.15f};
+		int maxStroke_ = 6;
+		// Blue, green, red and alpha. All from 0.0f to 1.0f
+		cv::Scalar_<float> effectColor_ = {0.4f, 0.75f, 1.0f, 0.15f};
 		//display on-screen FPS
-		bool showFps = true;
+		bool showFps_ = true;
 		//Stretch frame buffer to window size
 		bool stretch_ = false;
 		//The post processing mode
@@ -320,6 +97,38 @@ class OptflowPlan : public Plan {
 		float bloomGain_ = 3;
 	} params_;
 
+	struct Cache {
+		cv::Mat element_ = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3), cv::Point(1, 1));
+
+		vector<cv::KeyPoint> tmpKeyPoints_;
+
+		float last_movement_ = 0;
+
+		vector<cv::Point2f> hull_, prevPoints_, nextPoints_, newPoints_;
+		vector<cv::Point2f> upPrevPoints_, upNextPoints_;
+		std::vector<uchar> status_;
+		std::vector<float> err_;
+		std::random_device rd_;
+		std::mt19937 rng_;
+
+		cv::UMat bgr_;
+		cv::UMat hls_;
+		cv::UMat ls16_;
+		cv::UMat ls_;
+		cv::UMat bblur_;
+		std::vector<cv::UMat> hlsChannels_;
+
+		cv::UMat high_;
+		cv::UMat low_;
+		cv::UMat gblur_;
+		cv::UMat dst16_;
+
+	    cv::UMat tmp_;
+	    cv::UMat post_;
+	    cv::UMat backgroundGrey_;
+	    vector<cv::UMat> channels_;
+	} cache_;
+
 	//BGRA
 	cv::UMat background_, down_;
 	//BGR
@@ -328,49 +137,242 @@ class OptflowPlan : public Plan {
 	//GREY
 	cv::UMat downPrevGrey_, downNextGrey_, downMotionMaskGrey_;
 	vector<cv::Point2f> detectedPoints_;
+
+	cv::Ptr<cv::BackgroundSubtractor> bg_subtractor_ = cv::createBackgroundSubtractorMOG2(100, 16.0, false);
+	cv::Ptr<cv::FastFeatureDetector> detector_ = cv::FastFeatureDetector::create(1, false);
 public:
 	virtual ~OptflowPlan() override {};
+	//Uses background subtraction to generate a "motion mask"
+	static void prepare_motion_mask(const cv::UMat& srcGrey, cv::UMat& motionMaskGrey, cv::Ptr<cv::BackgroundSubtractor> bg_subtractor, Cache& cache) {
+	    bg_subtractor->apply(srcGrey, motionMaskGrey);
+	    //Surpress speckles
+	    cv::morphologyEx(motionMaskGrey, motionMaskGrey, cv::MORPH_OPEN, cache.element_, cv::Point(cache.element_.cols >> 1, cache.element_.rows >> 1), 2, cv::BORDER_CONSTANT, cv::morphologyDefaultBorderValue());
+	}
+
+	//Detect points to track
+	static void detect_points(const cv::UMat& srcMotionMaskGrey, vector<cv::Point2f>& points, cv::Ptr<cv::FastFeatureDetector> detector, Cache& cache) {
+	    detector->detect(srcMotionMaskGrey, cache.tmpKeyPoints_);
+
+	    points.clear();
+	    for (const auto &kp : cache.tmpKeyPoints_) {
+	        points.push_back(kp.pt);
+	    }
+	}
+
+	//Detect extrem changes in scene content and report it
+	static bool detect_scene_change(const cv::UMat& srcMotionMaskGrey, const Params& params, Cache& cache) {
+	    float movement = cv::countNonZero(srcMotionMaskGrey) / float(srcMotionMaskGrey.cols * srcMotionMaskGrey.rows);
+	    float relation = movement > 0 && cache.last_movement_ > 0 ? std::max(movement, cache.last_movement_) / std::min(movement, cache.last_movement_) : 0;
+	    float relM = relation * log10(1.0f + (movement * 9.0));
+	    float relLM = relation * log10(1.0f + (cache.last_movement_ * 9.0));
+
+	    bool result = !((movement > 0 && cache.last_movement_ > 0 && relation > 0)
+	            && (relM < params.sceneChangeThresh_ && relLM < params.sceneChangeThresh_ && fabs(relM - relLM) < params.sceneChangeThreshDiff_));
+	    cache.last_movement_ = (cache.last_movement_ + movement) / 2.0f;
+	    return result;
+	}
+
+	//Visualize the sparse optical flow
+	static void visualize_sparse_optical_flow(const cv::UMat &prevGrey, const cv::UMat &nextGrey, const vector<cv::Point2f> &detectedPoints, const Params& params, Cache& cache) {
+	    //less then 5 points is a degenerate case (e.g. the corners of a video frame)
+	    if (detectedPoints.size() > 4) {
+	        cv::convexHull(detectedPoints, cache.hull_);
+	        float area = cv::contourArea(cache.hull_);
+	        //make sure the area of the point cloud is positive
+	        if (area > 0) {
+	            float density = (detectedPoints.size() / area);
+	            //stroke size is biased by the area of the point cloud
+	            float strokeSize = params.maxStroke_ * pow(area / (nextGrey.cols * nextGrey.rows), 0.33f);
+	            //max points is biased by the densitiy of the point cloud
+	            size_t currentMaxPoints = ceil(density * params.maxPoints_);
+
+	            //lose a number of random points specified by pointLossPercent
+	            std::shuffle(cache.prevPoints_.begin(), cache.prevPoints_.end(), cache.rng_);
+	            cache.prevPoints_.resize(ceil(cache.prevPoints_.size() * (1.0f - (params.pointLoss_ / 100.0f))));
+
+	            //calculate how many newly detected points to add
+	            size_t copyn = std::min(detectedPoints.size(), (size_t(std::ceil(currentMaxPoints)) - cache.prevPoints_.size()));
+	            if (cache.prevPoints_.size() < currentMaxPoints) {
+	                std::copy(detectedPoints.begin(), detectedPoints.begin() + copyn, std::back_inserter(cache.prevPoints_));
+	            }
+
+	            //calculate the sparse optical flow
+	            cv::calcOpticalFlowPyrLK(prevGrey, nextGrey, cache.prevPoints_, cache.nextPoints_, cache.status_, cache.err_);
+	            cache.newPoints_.clear();
+	            if (cache.prevPoints_.size() > 1 && cache.nextPoints_.size() > 1) {
+	                //scale the points to original size
+	            	cache.upNextPoints_.clear();
+	            	cache.upPrevPoints_.clear();
+	                for (cv::Point2f pt : cache.prevPoints_) {
+	                	cache.upPrevPoints_.push_back(pt /= params.fgScale_);
+	                }
+
+	                for (cv::Point2f pt : cache.nextPoints_) {
+	                	cache.upNextPoints_.push_back(pt /= params.fgScale_);
+	                }
+
+	                using namespace cv::v4d::nvg;
+	                //start drawing
+	                beginPath();
+	                strokeWidth(strokeSize);
+	                strokeColor(params.effectColor_ * 255.0);
+
+	                for (size_t i = 0; i < cache.prevPoints_.size(); i++) {
+	                    if (cache.status_[i] == 1 //point was found in prev and new set
+	                            && cache.err_[i] < (1.0 / density) //with a higher density be more sensitive to the feature error
+	                            && cache.upNextPoints_[i].y >= 0 && cache.upNextPoints_[i].x >= 0 //check bounds
+	                            && cache.upNextPoints_[i].y < nextGrey.rows / params.fgScale_ && cache.upNextPoints_[i].x < nextGrey.cols / params.fgScale_ //check bounds
+	                            ) {
+	                        float len = hypot(fabs(cache.upPrevPoints_[i].x - cache.upNextPoints_[i].x), fabs(cache.upPrevPoints_[i].y - cache.upNextPoints_[i].y));
+	                        //upper and lower bound of the flow vector lengthss
+	                        if (len > 0 && len < sqrt(area)) {
+	                            //collect new points
+	                        	cache.newPoints_.push_back(cache.nextPoints_[i]);
+	                            //the actual drawing operations
+	                            moveTo(cache.upNextPoints_[i].x, cache.upNextPoints_[i].y);
+	                            lineTo(cache.upPrevPoints_[i].x, cache.upPrevPoints_[i].y);
+	                        }
+	                    }
+	                }
+	                //end drawing
+	                stroke();
+	            }
+	            cache.prevPoints_ = cache.newPoints_;
+	        }
+	    }
+	}
+
+	//Bloom post-processing effect
+	static void bloom(const cv::UMat& src, cv::UMat &dst, Cache& cache, int ksize = 3, int threshValue = 235, float gain = 4) {
+	    //remove alpha channel
+	    cv::cvtColor(src, cache.bgr_, cv::COLOR_BGRA2RGB);
+	    //convert to hls
+	    cv::cvtColor(cache.bgr_, cache.hls_, cv::COLOR_BGR2HLS);
+	    //split channels
+	    cv::split(cache.hls_, cache.hlsChannels_);
+	    //invert lightness
+	    cv::bitwise_not(cache.hlsChannels_[2], cache.hlsChannels_[2]);
+	    //multiply lightness and saturation
+	    cv::multiply(cache.hlsChannels_[1], cache.hlsChannels_[2], cache.ls16_, 1, CV_16U);
+	    //normalize
+	    cv::divide(cache.ls16_, cv::Scalar(255.0), cache.ls_, 1, CV_8U);
+	    //binary threhold according to threshValue
+	    cv::threshold(cache.ls_, cache.bblur_, threshValue, 255, cv::THRESH_BINARY);
+	    //blur
+	    cv::boxFilter(cache.bblur_, cache.bblur_, -1, cv::Size(ksize, ksize), cv::Point(-1,-1), true, cv::BORDER_REPLICATE);
+	    //convert to BGRA
+	    cv::cvtColor(cache.bblur_, cache.bblur_, cv::COLOR_GRAY2BGRA);
+	    //add src and the blurred L-S-product according to gain
+	    addWeighted(src, 1.0, cache.bblur_, gain, 0, dst);
+	}
+
+	//Glow post-processing effect
+	static void glow_effect(const cv::UMat &src, cv::UMat &dst, const int ksize, Cache& cache) {
+	    cv::bitwise_not(src, dst);
+
+	    //Resize for some extra performance
+	    cv::resize(dst, cache.low_, cv::Size(), 0.5, 0.5);
+	    //Cheap blur
+	    cv::boxFilter(cache.low_, cache.gblur_, -1, cv::Size(ksize, ksize), cv::Point(-1,-1), true, cv::BORDER_REPLICATE);
+	    //Back to original size
+	    cv::resize(cache.gblur_, cache.high_, src.size());
+
+	    //Multiply the src image with a blurred version of itself
+	    cv::multiply(dst, cache.high_, cache.dst16_, 1, CV_16U);
+	    //Normalize and convert back to CV_8U
+	    cv::divide(cache.dst16_, cv::Scalar::all(255.0), dst, 1, CV_8U);
+
+	    cv::bitwise_not(dst, dst);
+	}
+
+	//Compose the different layers into the final image
+	static void composite_layers(cv::UMat& background, cv::UMat& foreground, const cv::UMat& frameBuffer, cv::UMat& dst, const Params& params, Cache& cache) {
+	    //Lose a bit of foreground brightness based on fgLossPercent
+	    cv::subtract(foreground, cv::Scalar::all(255.0f * (params.fgLoss_ / 100.0f)), foreground);
+	    //Add foreground an the current framebuffer into foregound
+	    cv::add(foreground, frameBuffer, foreground);
+
+	    //Dependin on bgMode prepare the background in different ways
+	    switch (params.backgroundMode_) {
+	    case GREY:
+	        cv::cvtColor(background, cache.backgroundGrey_, cv::COLOR_BGRA2GRAY);
+	        cv::cvtColor(cache.backgroundGrey_, background, cv::COLOR_GRAY2BGRA);
+	        break;
+	    case VALUE:
+	        cv::cvtColor(background, cache.tmp_, cv::COLOR_BGRA2BGR);
+	        cv::cvtColor(cache.tmp_, cache.tmp_, cv::COLOR_BGR2HSV);
+	        split(cache.tmp_, cache.channels_);
+	        cv::cvtColor(cache.channels_[2], background, cv::COLOR_GRAY2BGRA);
+	        break;
+	    case COLOR:
+	        break;
+	    case BLACK:
+	        background = cv::Scalar::all(0);
+	        break;
+	    default:
+	        break;
+	    }
+
+	    //Depending on ppMode perform post-processing
+	    switch (params.postProcMode_) {
+	    case GLOW:
+	        glow_effect(foreground, cache.post_, params.glowKernelSize_, cache);
+	        break;
+	    case BLOOM:
+	        bloom(foreground, cache.post_, cache, params.glowKernelSize_, params.bloomThresh_, params.bloomGain_);
+	        break;
+	    case DISABLED:
+	        foreground.copyTo(cache.post_);
+	        break;
+	    default:
+	        break;
+	    }
+
+	    //Add background and post-processed foreground into dst
+	    cv::add(background, cache.post_, dst);
+	}
+
 	virtual void gui(cv::Ptr<V4D> window) override {
-		window->imgui([this](cv::Ptr<V4D> win, ImGuiContext* ctx){
+		window->imgui([](cv::Ptr<V4D> win, ImGuiContext* ctx, Params& params){
 	        using namespace ImGui;
 	        SetCurrentContext(ctx);
 
 	        Begin("Effects");
 	        Text("Foreground");
-	        SliderFloat("Scale", &params_.fgScale, 0.1f, 4.0f);
-	        SliderFloat("Loss", &params_.fgLoss_, 0.1f, 99.9f);
+	        SliderFloat("Scale", &params.fgScale_, 0.1f, 4.0f);
+	        SliderFloat("Loss", &params.fgLoss_, 0.1f, 99.9f);
 	        Text("Background");
 	        thread_local const char* bgm_items[4] = {"Grey", "Color", "Value", "Black"};
-	        thread_local int* bgm = (int*)&params_.backgroundMode_;
+	        thread_local int* bgm = (int*)&params.backgroundMode_;
 	        ListBox("Mode", bgm, bgm_items, 4, 4);
 	        Text("Points");
-	        SliderInt("Max. Points", &params_.maxPoints, 10, 1000000);
-	        SliderFloat("Point Loss", &params_.pointLoss, 0.0f, 100.0f);
+	        SliderInt("Max. Points", &params.maxPoints_, 10, 1000000);
+	        SliderFloat("Point Loss", &params.pointLoss_, 0.0f, 100.0f);
 	        Text("Optical flow");
-	        SliderInt("Max. Stroke Size", &params_.maxStroke, 1, 100);
-	        ColorPicker4("Color", params_.effectColor);
+	        SliderInt("Max. Stroke Size", &params.maxStroke_, 1, 100);
+	        ColorPicker4("Color", params.effectColor_.val);
 	        End();
 
 	        Begin("Post Processing");
 	        thread_local const char* ppm_items[3] = {"Glow", "Bloom", "None"};
-	        thread_local int* ppm = (int*)&params_.postProcMode_;
+	        thread_local int* ppm = (int*)&params.postProcMode_;
 	        ListBox("Effect",ppm, ppm_items, 3, 3);
-	        SliderInt("Kernel Size",&params_.glowKernelSize_, 1, 63);
-	        SliderFloat("Gain", &params_.bloomGain_, 0.1f, 20.0f);
+	        SliderInt("Kernel Size",&params.glowKernelSize_, 1, 63);
+	        SliderFloat("Gain", &params.bloomGain_, 0.1f, 20.0f);
 	        End();
 
 	        Begin("Settings");
 	        Text("Scene Change Detection");
-	        SliderFloat("Threshold", &params_.sceneChangeThresh, 0.1f, 1.0f);
-	        SliderFloat("Threshold Diff", &params_.sceneChangeThreshDiff, 0.1f, 1.0f);
+	        SliderFloat("Threshold", &params.sceneChangeThresh_, 0.1f, 1.0f);
+	        SliderFloat("Threshold Diff", &params.sceneChangeThreshDiff_, 0.1f, 1.0f);
 	        End();
 
 			Begin("Window");
-			if(Checkbox("Show FPS", &params_.showFps)) {
-				win->setShowFPS(params_.showFps);
+			if(Checkbox("Show FPS", &params.showFps_)) {
+				win->setShowFPS(params.showFps_);
 			}
-			if(Checkbox("Stretch", &params_.stretch_)) {
-				win->setStretching(params_.stretch_);
+			if(Checkbox("Stretch", &params.stretch_)) {
+				win->setStretching(params.stretch_);
 			}
 	#ifndef __EMSCRIPTEN__
 			if(Button("Fullscreen")) {
@@ -382,12 +384,13 @@ public:
 			};
 	#endif
 			End();
-	    });
+	    }, params_);
 	}
 
 	virtual void setup(cv::Ptr<V4D> window) override {
+		cache_.rng_ = std::mt19937(cache_.rd_());
 		window->setStretching(params_.stretch_);
-		params_.effectColor[3] /= pow(window->workers() + 1.0, 0.33);
+		params_.effectColor_[3] /= pow(window->workers() + 1.0, 0.33);
 	}
 
 	virtual void infer(cv::Ptr<V4D> window) override {
@@ -395,39 +398,38 @@ public:
 
 		window->fb([](const cv::UMat& framebuffer, cv::UMat& d, cv::UMat& b, const Params& params) {
 			//resize to foreground scale
-			cv::resize(framebuffer, d, cv::Size(framebuffer.size().width * params.fgScale, framebuffer.size().height * params.fgScale));
+			cv::resize(framebuffer, d, cv::Size(framebuffer.size().width * params.fgScale_, framebuffer.size().height * params.fgScale_));
 			//save video background
 			framebuffer.copyTo(b);
 		}, down_, background_, params_);
 
-		window->parallel([](const cv::UMat& d, cv::UMat& dng, cv::UMat& dmmg, std::vector<cv::Point2f>& dp){
+		window->parallel([](const cv::UMat& d, cv::UMat& dng, cv::UMat& dmmg, std::vector<cv::Point2f>& dp, cv::Ptr<cv::BackgroundSubtractor>& bg_subtractor, cv::Ptr<cv::FastFeatureDetector>& detector, Cache& cache){
 			cv::cvtColor(d, dng, cv::COLOR_RGBA2GRAY);
 			//Subtract the background to create a motion mask
-			prepare_motion_mask(dng, dmmg);
+			prepare_motion_mask(dng, dmmg, bg_subtractor, cache);
 			//Detect trackable points in the motion mask
-			detect_points(dmmg, dp);
-		}, down_, downNextGrey_, downMotionMaskGrey_, detectedPoints_);
+			detect_points(dmmg, dp, detector, cache);
+		}, down_, downNextGrey_, downMotionMaskGrey_, detectedPoints_, bg_subtractor_, detector_, cache_);
 
-		window->nvg([](const cv::UMat& dmmg, const cv::UMat& dpg, const cv::UMat& dng, const std::vector<cv::Point2f>& dp, const Params& params) {
+		window->nvg([](const cv::UMat& dmmg, const cv::UMat& dpg, const cv::UMat& dng, const std::vector<cv::Point2f>& dp, const Params& params, Cache& cache) {
 			cv::v4d::nvg::clear();
 			if (!dpg.empty()) {
 				//We don't want the algorithm to get out of hand when there is a scene change, so we suppress it when we detect one.
-				if (!detect_scene_change(dmmg, params.sceneChangeThresh, params.sceneChangeThreshDiff)) {
+				if (!detect_scene_change(dmmg, params, cache)) {
 					//Visualize the sparse optical flow using nanovg
-					cv::Scalar color = cv::Scalar(params.effectColor[2] * 255, params.effectColor[1] * 255, params.effectColor[0] * 255, params.effectColor[3] * 255);
-					visualize_sparse_optical_flow(dpg, dng, dp, params.fgScale, params.maxStroke, color, params.maxPoints, params.pointLoss);
+					visualize_sparse_optical_flow(dpg, dng, dp, params, cache);
 				}
 			}
-		}, downMotionMaskGrey_, downPrevGrey_, downNextGrey_, detectedPoints_, params_);
+		}, downMotionMaskGrey_, downPrevGrey_, downNextGrey_, detectedPoints_, params_, cache_);
 
 		window->parallel([](cv::UMat& dpg, const cv::UMat& dng) {
 			dpg = dng.clone();
 		}, downPrevGrey_, downNextGrey_);
 
-		window->fb([](cv::UMat& framebuffer, cv::UMat& b, cv::UMat& f, const Params& params) {
+		window->fb([](cv::UMat& framebuffer, cv::UMat& b, cv::UMat& f, const Params& params, Cache& cache) {
 			//Put it all together (OpenCL)
-			composite_layers(b, f, framebuffer, framebuffer, params.glowKernelSize_, params.fgLoss_, params.backgroundMode_, params.postProcMode_, params.bloomThresh_, params.bloomGain_);
-		}, background_, foreground_, params_);
+			composite_layers(b, f, framebuffer, framebuffer, params, cache);
+		}, background_, foreground_, params_, cache_);
 
 		window->write();
 	}
@@ -456,7 +458,7 @@ int main(int argc, char **argv) {
         window->setSource(src);
 #endif
 
-        window->run<OptflowPlan>(6);
+        window->run<OptflowPlan>(0);
     } catch (std::exception& ex) {
         cerr << ex.what() << endl;
     }