diff --git a/modules/gpu/doc/video.rst b/modules/gpu/doc/video.rst
index bb312e0cda..378cca71ab 100644
--- a/modules/gpu/doc/video.rst
+++ b/modules/gpu/doc/video.rst
@@ -649,6 +649,114 @@ Releases all inner buffer's memory.
 
 
 
+gpu::GMG_GPU
+------------
+.. ocv:class:: gpu::GMG_GPU
+
+Class used for background/foreground segmentation. ::
+
+    class GMG_GPU_GPU
+    {
+    public:
+        GMG_GPU();
+
+        void initialize(Size frameSize, float min = 0.0f, float max = 255.0f);
+
+        void operator ()(const GpuMat& frame, GpuMat& fgmask, float learningRate = -1.0f, Stream& stream = Stream::Null());
+
+        void release();
+
+        int    maxFeatures;
+        float  learningRate;
+        int    numInitializationFrames;
+        int    quantizationLevels;
+        float  backgroundPrior;
+        float  decisionThreshold;
+        int    smoothingRadius;
+
+        ...
+    };
+
+The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [GMG2012]_.
+
+Here are important members of the class that control the algorithm, which you can set after constructing the class instance:
+
+    .. ocv:member:: int maxFeatures
+
+        Total number of distinct colors to maintain in histogram.
+
+    .. ocv:member:: float learningRate
+
+        Set between 0.0 and 1.0, determines how quickly features are "forgotten" from histograms.
+
+    .. ocv:member:: int numInitializationFrames
+
+        Number of frames of video to use to initialize histograms.
+
+    .. ocv:member:: int quantizationLevels
+
+        Number of discrete levels in each channel to be used in histograms.
+
+    .. ocv:member:: float backgroundPrior
+
+        Prior probability that any given pixel is a background pixel. A sensitivity parameter.
+
+    .. ocv:member:: float decisionThreshold
+
+        Value above which pixel is determined to be FG.
+
+    .. ocv:member:: float smoothingRadius
+
+        Smoothing radius, in pixels, for cleaning up FG image.
+
+
+
+gpu::GMG_GPU::GMG_GPU
+---------------------
+The default constructor.
+
+.. ocv:function:: gpu::GMG_GPU::GMG_GPU()
+
+Default constructor sets all parameters to default values.
+
+
+
+gpu::GMG_GPU::initialize
+------------------------
+Initialize background model and allocates all inner buffers.
+
+.. ocv:function:: void gpu::GMG_GPU::initialize(Size frameSize, float min = 0.0f, float max = 255.0f)
+
+    :param frameSize: Input frame size.
+
+    :param min: Minimum value taken on by pixels in image sequence. Usually 0.
+
+    :param max: Maximum value taken on by pixels in image sequence, e.g. 1.0 or 255.
+
+
+
+gpu::GMG_GPU::operator()
+------------------------
+Updates the background model and returns the foreground mask
+
+.. ocv:function:: void gpu::GMG_GPU::operator()(const GpuMat& frame, GpuMat& fgmask, Stream& stream = Stream::Null())
+
+    :param frame: Next video frame.
+
+    :param fgmask: The output foreground mask as an 8-bit binary image.
+
+    :param stream: Stream for the asynchronous version.
+
+
+
+gpu::GMG_GPU::release
+---------------------
+Releases all inner buffer's memory.
+
+.. ocv:function:: void gpu::GMG_GPU::release()
+
+
+
 gpu::VideoWriter_GPU
 ---------------------
 Video writer class.
@@ -1093,3 +1201,4 @@ Parse next video frame. Implementation must call this method after new frame was
 .. [MOG2004] Z. Zivkovic. *Improved adaptive Gausian mixture model for background subtraction*. International Conference Pattern Recognition, UK, August, 2004
 .. [ShadowDetect2003] Prati, Mikic, Trivedi and Cucchiarra. *Detecting Moving Shadows...*. IEEE PAMI, 2003
 .. [VIBE2011] O. Barnich and M. Van D Roogenbroeck. *ViBe: A universal background subtraction algorithm for video sequences*. IEEE Transactions on Image Processing, 20(6) :1709-1724, June 2011
+.. [GMG2012] A. Godbehere, A. Matsukawa and K. Goldberg. *Visual Tracking of Human Visitors under Variable-Lighting Conditions for a Responsive Audio Art Installation*. American Control Conference, Montreal, June 2012
diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp
index 87dfcc73c8..ca9ad89889 100644
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -2127,6 +2127,77 @@ private:
     GpuMat samples_;
 };
 
+/**
+ * Background Subtractor module. Takes a series of images and returns a sequence of mask (8UC1)
+ * images of the same size, where 255 indicates Foreground and 0 represents Background.
+ * This class implements an algorithm described in "Visual Tracking of Human Visitors under
+ * Variable-Lighting Conditions for a Responsive Audio Art Installation," A. Godbehere,
+ * A. Matsukawa, K. Goldberg, American Control Conference, Montreal, June 2012.
+ */
+class CV_EXPORTS GMG_GPU
+{
+public:
+    GMG_GPU();
+
+    /**
+     * Validate parameters and set up data structures for appropriate frame size.
+     * @param frameSize Input frame size
+     * @param min       Minimum value taken on by pixels in image sequence. Usually 0
+     * @param max       Maximum value taken on by pixels in image sequence. e.g. 1.0 or 255
+     */
+    void initialize(Size frameSize, float min = 0.0f, float max = 255.0f);
+
+    /**
+     * Performs single-frame background subtraction and builds up a statistical background image
+     * model.
+     * @param frame        Input frame
+     * @param fgmask       Output mask image representing foreground and background pixels
+     * @param stream       Stream for the asynchronous version
+     */
+    void operator ()(const GpuMat& frame, GpuMat& fgmask, float learningRate = -1.0f, Stream& stream = Stream::Null());
+
+    //! Releases all inner buffers
+    void release();
+
+    //! Total number of distinct colors to maintain in histogram.
+    int maxFeatures;
+
+    //! Set between 0.0 and 1.0, determines how quickly features are "forgotten" from histograms.
+    float learningRate;
+
+    //! Number of frames of video to use to initialize histograms.
+    int numInitializationFrames;
+
+    //! Number of discrete levels in each channel to be used in histograms.
+    int quantizationLevels;
+
+    //! Prior probability that any given pixel is a background pixel. A sensitivity parameter.
+    float backgroundPrior;
+
+    //! Value above which pixel is determined to be FG.
+    float decisionThreshold;
+
+    //! Smoothing radius, in pixels, for cleaning up FG image.
+    int smoothingRadius;
+
+    //! Perform background model update.
+    bool updateBackgroundModel;
+
+private:
+    float maxVal_, minVal_;
+
+    Size frameSize_;
+
+    int frameNum_;
+
+    GpuMat nfeatures_;
+    GpuMat colors_;
+    GpuMat weights_;
+
+    Ptr<FilterEngine_GPU> boxFilter_;
+    GpuMat buf_;
+};
+
 ////////////////////////////////// Video Encoding //////////////////////////////////
 
 // Works only under Windows
diff --git a/modules/gpu/perf/perf_video.cpp b/modules/gpu/perf/perf_video.cpp
index 4ae18bd09c..6e577a4a40 100644
--- a/modules/gpu/perf/perf_video.cpp
+++ b/modules/gpu/perf/perf_video.cpp
@@ -578,6 +578,77 @@ INSTANTIATE_TEST_CASE_P(Video, VIBE, testing::Combine(
     testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi")),
     testing::Values(Channels(1), Channels(3), Channels(4))));
 
+//////////////////////////////////////////////////////
+// GMG
+
+IMPLEMENT_PARAM_CLASS(MaxFeatures, int)
+
+GPU_PERF_TEST(GMG, cv::gpu::DeviceInfo, std::string, Channels, MaxFeatures)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+    std::string inputFile = perf::TestBase::getDataPath(std::string("gpu/video/") + GET_PARAM(1));
+    int cn = GET_PARAM(2);
+    int maxFeatures = GET_PARAM(3);
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    if (cn != 3)
+    {
+        cv::Mat temp;
+        if (cn == 1)
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+        else
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+        cv::swap(temp, frame);
+    }
+
+    cv::gpu::GpuMat d_frame(frame);
+    cv::gpu::GpuMat d_fgmask;
+
+    cv::gpu::GMG_GPU gmg;
+    gmg.maxFeatures = maxFeatures;
+
+    gmg(d_frame, d_fgmask);
+
+    for (int i = 0; i < 150; ++i)
+    {
+        cap >> frame;
+        if (frame.empty())
+        {
+            cap.open(inputFile);
+            cap >> frame;
+        }
+
+        if (cn != 3)
+        {
+            cv::Mat temp;
+            if (cn == 1)
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            else
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+            cv::swap(temp, frame);
+        }
+
+        d_frame.upload(frame);
+
+        startTimer(); next();
+        gmg(d_frame, d_fgmask);
+        stopTimer();
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Video, GMG, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi")),
+    testing::Values(Channels(1), Channels(3), Channels(4)),
+    testing::Values(MaxFeatures(20), MaxFeatures(40), MaxFeatures(60))));
+
 //////////////////////////////////////////////////////
 // VideoWriter
 
diff --git a/modules/gpu/perf_cpu/perf_video.cpp b/modules/gpu/perf_cpu/perf_video.cpp
index f635f42b0b..bada376c04 100644
--- a/modules/gpu/perf_cpu/perf_video.cpp
+++ b/modules/gpu/perf_cpu/perf_video.cpp
@@ -328,6 +328,76 @@ INSTANTIATE_TEST_CASE_P(Video, MOG2_getBackgroundImage, testing::Combine(
     testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi")),
     testing::Values(/*Channels(1),*/ Channels(3)/*, Channels(4)*/)));
 
+//////////////////////////////////////////////////////
+// GMG
+
+IMPLEMENT_PARAM_CLASS(MaxFeatures, int)
+
+GPU_PERF_TEST(GMG, cv::gpu::DeviceInfo, std::string, Channels, MaxFeatures)
+{
+    std::string inputFile = perf::TestBase::getDataPath(std::string("gpu/video/") + GET_PARAM(1));
+    int cn = GET_PARAM(2);
+    int maxFeatures = GET_PARAM(3);
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    if (cn != 3)
+    {
+        cv::Mat temp;
+        if (cn == 1)
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+        else
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+        cv::swap(temp, frame);
+    }
+
+    cv::Mat fgmask;
+    cv::Mat zeros(frame.size(), CV_8UC1, cv::Scalar::all(0));
+
+    cv::BackgroundSubtractorGMG gmg;
+    gmg.set("maxFeatures", maxFeatures);
+    gmg.initializeType(frame, 0.0, 255.0);
+
+    gmg(frame, fgmask);
+    gmg.updateBackgroundModel(zeros);
+
+    for (int i = 0; i < 150; ++i)
+    {
+        cap >> frame;
+        if (frame.empty())
+        {
+            cap.open(inputFile);
+            cap >> frame;
+        }
+
+        if (cn != 3)
+        {
+            cv::Mat temp;
+            if (cn == 1)
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            else
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+            cv::swap(temp, frame);
+        }
+
+        startTimer(); next();
+        gmg(frame, fgmask);
+        gmg.updateBackgroundModel(zeros);
+        stopTimer();
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Video, GMG, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi")),
+    testing::Values(Channels(1), Channels(3), Channels(4)),
+    testing::Values(MaxFeatures(20), MaxFeatures(40), MaxFeatures(60))));
+
 //////////////////////////////////////////////////////
 // VideoWriter
 
diff --git a/modules/gpu/src/bgfg_gmg.cpp b/modules/gpu/src/bgfg_gmg.cpp
new file mode 100644
index 0000000000..6e0ed9e631
--- /dev/null
+++ b/modules/gpu/src/bgfg_gmg.cpp
@@ -0,0 +1,168 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+#ifndef HAVE_CUDA
+
+cv::gpu::GMG_GPU::GMG_GPU() { throw_nogpu(); }
+void cv::gpu::GMG_GPU::initialize(cv::Size, float, float) { throw_nogpu(); }
+void cv::gpu::GMG_GPU::operator ()(const cv::gpu::GpuMat&, cv::gpu::GpuMat&, float, cv::gpu::Stream&) { throw_nogpu(); }
+void cv::gpu::GMG_GPU::release() {}
+
+#else
+
+namespace cv { namespace gpu { namespace device {
+    namespace bgfg_gmg
+    {
+        void loadConstants(int width, int height, float minVal, float maxVal, int quantizationLevels, float backgroundPrior,
+                           float decisionThreshold, int maxFeatures, int numInitializationFrames);
+
+        template <typename SrcT>
+        void update_gpu(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures,
+                        int frameNum,  float learningRate, bool updateBackgroundModel, cudaStream_t stream);
+    }
+}}}
+
+cv::gpu::GMG_GPU::GMG_GPU()
+{
+    maxFeatures = 64;
+    learningRate = 0.025f;
+    numInitializationFrames = 120;
+    quantizationLevels = 16;
+    backgroundPrior = 0.8f;
+    decisionThreshold = 0.8f;
+    smoothingRadius = 7;
+    updateBackgroundModel = true;
+}
+
+void cv::gpu::GMG_GPU::initialize(cv::Size frameSize, float min, float max)
+{
+    using namespace cv::gpu::device::bgfg_gmg;
+
+    CV_Assert(min < max);
+    CV_Assert(maxFeatures > 0);
+    CV_Assert(learningRate >= 0.0f && learningRate <= 1.0f);
+    CV_Assert(numInitializationFrames >= 1);
+    CV_Assert(quantizationLevels >= 1 && quantizationLevels <= 255);
+    CV_Assert(backgroundPrior >= 0.0f && backgroundPrior <= 1.0f);
+
+    minVal_ = min;
+    maxVal_ = max;
+
+    frameSize_ = frameSize;
+
+    frameNum_ = 0;
+
+    nfeatures_.create(frameSize_, CV_32SC1);
+    colors_.create(maxFeatures * frameSize_.height, frameSize_.width, CV_32SC1);
+    weights_.create(maxFeatures * frameSize_.height, frameSize_.width, CV_32FC1);
+
+    nfeatures_.setTo(cv::Scalar::all(0));
+
+    if (smoothingRadius > 0)
+        boxFilter_ = cv::gpu::createBoxFilter_GPU(CV_8UC1, CV_8UC1, cv::Size(smoothingRadius, smoothingRadius));
+
+    loadConstants(frameSize_.width, frameSize_.height, minVal_, maxVal_, quantizationLevels, backgroundPrior, decisionThreshold, maxFeatures, numInitializationFrames);
+}
+
+void cv::gpu::GMG_GPU::operator ()(const cv::gpu::GpuMat& frame, cv::gpu::GpuMat& fgmask, float newLearningRate, cv::gpu::Stream& stream)
+{
+    using namespace cv::gpu::device::bgfg_gmg;
+
+    typedef void (*func_t)(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures,
+                           int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
+    static const func_t funcs[6][4] =
+    {
+        {update_gpu<uchar>, 0, update_gpu<uchar3>, update_gpu<uchar4>},
+        {0,0,0,0},
+        {update_gpu<ushort>, 0, update_gpu<ushort3>, update_gpu<ushort4>},
+        {0,0,0,0},
+        {0,0,0,0},
+        {update_gpu<float>, 0, update_gpu<float3>, update_gpu<float4>}
+    };
+
+    CV_Assert(frame.depth() == CV_8U || frame.depth() == CV_16U || frame.depth() == CV_32F);
+    CV_Assert(frame.channels() == 1 || frame.channels() == 3 || frame.channels() == 4);
+
+    if (newLearningRate != -1.0f)
+    {
+        CV_Assert(newLearningRate >= 0.0f && newLearningRate <= 1.0f);
+        learningRate = newLearningRate;
+    }
+
+    if (frame.size() != frameSize_)
+        initialize(frame.size(), 0.0f, frame.depth() == CV_8U ? 255.0f : frame.depth() == CV_16U ? std::numeric_limits<ushort>::max() : 1.0f);
+
+    fgmask.create(frameSize_, CV_8UC1);
+    if (stream)
+        stream.enqueueMemSet(fgmask, cv::Scalar::all(0));
+    else
+        fgmask.setTo(cv::Scalar::all(0));
+
+    funcs[frame.depth()][frame.channels() - 1](frame, fgmask, colors_, weights_, nfeatures_, frameNum_, learningRate, updateBackgroundModel, cv::gpu::StreamAccessor::getStream(stream));
+
+    // medianBlur
+    if (smoothingRadius > 0)
+    {
+        boxFilter_->apply(fgmask, buf_, cv::Rect(0,0,-1,-1), stream);
+        int minCount = (smoothingRadius * smoothingRadius + 1) / 2;
+        double thresh = 255.0 * minCount / (smoothingRadius * smoothingRadius);
+        cv::gpu::threshold(buf_, fgmask, thresh, 255.0, cv::THRESH_BINARY, stream);
+    }
+
+    // keep track of how many frames we have processed
+    ++frameNum_;
+}
+
+void cv::gpu::GMG_GPU::release()
+{
+    frameSize_ = Size();
+
+    nfeatures_.release();
+    colors_.release();
+    weights_.release();
+    boxFilter_.release();
+    buf_.release();
+}
+
+#endif
diff --git a/modules/gpu/src/cuda/bgfg_gmg.cu b/modules/gpu/src/cuda/bgfg_gmg.cu
new file mode 100644
index 0000000000..76ebb2da09
--- /dev/null
+++ b/modules/gpu/src/cuda/bgfg_gmg.cu
@@ -0,0 +1,253 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or bpied warranties, including, but not limited to, the bpied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "opencv2/gpu/device/common.hpp"
+#include "opencv2/gpu/device/vec_traits.hpp"
+#include "opencv2/gpu/device/limits.hpp"
+
+namespace cv { namespace gpu { namespace device {
+    namespace bgfg_gmg
+    {
+        __constant__ int   c_width;
+        __constant__ int   c_height;
+        __constant__ float c_minVal;
+        __constant__ float c_maxVal;
+        __constant__ int   c_quantizationLevels;
+        __constant__ float c_backgroundPrior;
+        __constant__ float c_decisionThreshold;
+        __constant__ int   c_maxFeatures;
+        __constant__ int   c_numInitializationFrames;
+
+        void loadConstants(int width, int height, float minVal, float maxVal, int quantizationLevels, float backgroundPrior,
+                           float decisionThreshold, int maxFeatures, int numInitializationFrames)
+        {
+            cudaSafeCall( cudaMemcpyToSymbol(c_width, &width, sizeof(width)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_height, &height, sizeof(height)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_minVal, &minVal, sizeof(minVal)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_maxVal, &maxVal, sizeof(maxVal)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_quantizationLevels, &quantizationLevels, sizeof(quantizationLevels)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_backgroundPrior, &backgroundPrior, sizeof(backgroundPrior)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_decisionThreshold, &decisionThreshold, sizeof(decisionThreshold)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_maxFeatures, &maxFeatures, sizeof(maxFeatures)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_numInitializationFrames, &numInitializationFrames, sizeof(numInitializationFrames)) );
+        }
+
+        __device__ float findFeature(const int color, const PtrStepi& colors, const PtrStepf& weights, const int x, const int y, const int nfeatures)
+        {
+            for (int i = 0, fy = y; i < nfeatures; ++i, fy += c_height)
+            {
+                if (color == colors(fy, x))
+                    return weights(fy, x);
+            }
+
+            // not in histogram, so return 0.
+            return 0.0f;
+        }
+
+        __device__ void normalizeHistogram(PtrStepf weights, const int x, const int y, const int nfeatures)
+        {
+            float total = 0.0f;
+            for (int i = 0, fy = y; i < nfeatures; ++i, fy += c_height)
+                total += weights(fy, x);
+
+            if (total != 0.0f)
+            {
+                for (int i = 0, fy = y; i < nfeatures; ++i, fy += c_height)
+                    weights(fy, x) /= total;
+            }
+        }
+
+        __device__ bool insertFeature(const int color, const float weight, PtrStepi colors, PtrStepf weights, const int x, const int y, int& nfeatures)
+        {
+            for (int i = 0, fy = y; i < nfeatures; ++i, fy += c_height)
+            {
+                if (color == colors(fy, x))
+                {
+                    // feature in histogram
+
+                    weights(fy, x) += weight;
+
+                    return false;
+                }
+            }
+
+            if (nfeatures == c_maxFeatures)
+            {
+                // discard oldest feature
+
+                int idx = -1;
+                float minVal = numeric_limits<float>::max();
+                for (int i = 0, fy = y; i < nfeatures; ++i, fy += c_height)
+                {
+                    const float w = weights(fy, x);
+                    if (w < minVal)
+                    {
+                        minVal = w;
+                        idx = fy;
+                    }
+                }
+
+                colors(idx, x) = color;
+                weights(idx, x) = weight;
+
+                return false;
+            }
+
+            colors(nfeatures * c_height + y, x) = color;
+            weights(nfeatures * c_height + y, x) = weight;
+
+            ++nfeatures;
+
+            return true;
+        }
+
+        namespace detail
+        {
+            template <int cn> struct Quantization
+            {
+                template <typename T>
+                __device__ static int apply(const T& val)
+                {
+                    int res = 0;
+                    res |= static_cast<int>((val.x - c_minVal) * c_quantizationLevels / (c_maxVal - c_minVal));
+                    res |= static_cast<int>((val.y - c_minVal) * c_quantizationLevels / (c_maxVal - c_minVal)) << 8;
+                    res |= static_cast<int>((val.z - c_minVal) * c_quantizationLevels / (c_maxVal - c_minVal)) << 16;
+                    return res;
+                }
+            };
+
+            template <> struct Quantization<1>
+            {
+                template <typename T>
+                __device__ static int apply(T val)
+                {
+                    return static_cast<int>((val - c_minVal) * c_quantizationLevels / (c_maxVal - c_minVal));
+                }
+            };
+        }
+
+        template <typename T> struct Quantization : detail::Quantization<VecTraits<T>::cn> {};
+
+        template <typename SrcT>
+        __global__ void update(const PtrStep_<SrcT> frame, PtrStepb fgmask, PtrStepi colors_, PtrStepf weights_, PtrStepi nfeatures_,
+                               const int frameNum, const float learningRate, const bool updateBackgroundModel)
+        {
+            const int x = blockIdx.x * blockDim.x + threadIdx.x;
+            const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+            if (x >= c_width || y >= c_height)
+                return;
+
+            const SrcT pix = frame(y, x);
+            const int newFeatureColor = Quantization<SrcT>::apply(pix);
+
+            int nfeatures = nfeatures_(y, x);
+
+            if (frameNum >= c_numInitializationFrames)
+            {
+                // typical operation
+
+                const float weight = findFeature(newFeatureColor, colors_, weights_, x, y, nfeatures);
+
+                // see Godbehere, Matsukawa, Goldberg (2012) for reasoning behind this implementation of Bayes rule
+                const float posterior = (weight * c_backgroundPrior) / (weight * c_backgroundPrior + (1.0f - weight) * (1.0f - c_backgroundPrior));
+
+                const bool isForeground = ((1.0f - posterior) > c_decisionThreshold);
+                fgmask(y, x) = (uchar)(-isForeground);
+
+                // update histogram.
+
+                if (updateBackgroundModel)
+                {
+                    for (int i = 0, fy = y; i < nfeatures; ++i, fy += c_height)
+                        weights_(fy, x) *= 1.0f - learningRate;
+
+                    bool inserted = insertFeature(newFeatureColor, learningRate, colors_, weights_, x, y, nfeatures);
+
+                    if (inserted)
+                    {
+                        normalizeHistogram(weights_, x, y, nfeatures);
+                        nfeatures_(y, x) = nfeatures;
+                    }
+                }
+            }
+            else if (updateBackgroundModel)
+            {
+                // training-mode update
+
+                insertFeature(newFeatureColor, 1.0f, colors_, weights_, x, y, nfeatures);
+
+                if (frameNum == c_numInitializationFrames - 1)
+                    normalizeHistogram(weights_, x, y, nfeatures);
+            }
+        }
+
+        template <typename SrcT>
+        void update_gpu(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures,
+                        int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream)
+        {
+            const dim3 block(32, 8);
+            const dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
+
+            cudaSafeCall( cudaFuncSetCacheConfig(update<SrcT>, cudaFuncCachePreferL1) );
+
+            update<SrcT><<<grid, block, 0, stream>>>((DevMem2D_<SrcT>) frame, fgmask, colors, weights, nfeatures, frameNum, learningRate, updateBackgroundModel);
+
+            cudaSafeCall( cudaGetLastError() );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
+        }
+
+        template void update_gpu<uchar  >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
+        template void update_gpu<uchar3 >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
+        template void update_gpu<uchar4 >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
+
+        template void update_gpu<ushort >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
+        template void update_gpu<ushort3>(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
+        template void update_gpu<ushort4>(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
+
+        template void update_gpu<float  >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
+        template void update_gpu<float3 >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
+        template void update_gpu<float4 >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
+    }
+}}}
diff --git a/modules/gpu/test/test_video.cpp b/modules/gpu/test/test_video.cpp
index db0d6f2ec8..0ee66ba522 100644
--- a/modules/gpu/test/test_video.cpp
+++ b/modules/gpu/test/test_video.cpp
@@ -624,6 +624,9 @@ TEST_P(MOG2, Update)
 
 TEST_P(MOG2, getBackgroundImage)
 {
+    if (useGray)
+        return;
+
     cv::VideoCapture cap(inputFile);
     ASSERT_TRUE(cap.isOpened());
 
@@ -640,13 +643,6 @@ TEST_P(MOG2, getBackgroundImage)
         cap >> frame;
         ASSERT_FALSE(frame.empty());
 
-//        if (useGray)
-//        {
-//            cv::Mat temp;
-//            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-//            cv::swap(temp, frame);
-//        }
-
         mog2(loadMat(frame, useRoi), foreground);
 
         mog2_gold(frame, foreground_gold);
@@ -667,6 +663,101 @@ INSTANTIATE_TEST_CASE_P(GPU_Video, MOG2, testing::Combine(
     testing::Values(UseGray(true), UseGray(false)),
     WHOLE_SUBMAT));
 
+//////////////////////////////////////////////////////
+// VIBE
+
+PARAM_TEST_CASE(VIBE, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
+{
+};
+
+TEST_P(VIBE, Accuracy)
+{
+    const cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+    const cv::Size size = GET_PARAM(1);
+    const int type = GET_PARAM(2);
+    const bool useRoi = GET_PARAM(3);
+
+    const cv::Mat fullfg(size, CV_8UC1, cv::Scalar::all(255));
+
+    cv::Mat frame = randomMat(size, type, 0.0, 100);
+    cv::gpu::GpuMat d_frame = loadMat(frame, useRoi);
+
+    cv::gpu::VIBE_GPU vibe;
+    cv::gpu::GpuMat d_fgmask = createMat(size, CV_8UC1, useRoi);
+    vibe.initialize(d_frame);
+
+    for (int i = 0; i < 20; ++i)
+        vibe(d_frame, d_fgmask);
+
+    frame = randomMat(size, type, 160, 255);
+    d_frame = loadMat(frame, useRoi);
+    vibe(d_frame, d_fgmask);
+
+    // now fgmask should be entirely foreground
+    ASSERT_MAT_NEAR(fullfg, d_fgmask, 0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Video, VIBE, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4)),
+    WHOLE_SUBMAT));
+
+//////////////////////////////////////////////////////
+// GMG
+
+PARAM_TEST_CASE(GMG, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, UseRoi)
+{
+};
+
+TEST_P(GMG, Accuracy)
+{
+    const cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+    const cv::Size size = GET_PARAM(1);
+    const int depth = GET_PARAM(2);
+    const int channels = GET_PARAM(3);
+    const bool useRoi = GET_PARAM(4);
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    const cv::Mat zeros(size, CV_8UC1, cv::Scalar::all(0));
+    const cv::Mat fullfg(size, CV_8UC1, cv::Scalar::all(255));
+
+    cv::Mat frame = randomMat(size, type, 0, 100);
+    cv::gpu::GpuMat d_frame = loadMat(frame, useRoi);
+
+    cv::gpu::GMG_GPU gmg;
+    gmg.numInitializationFrames = 5;
+    gmg.smoothingRadius = 0;
+    gmg.initialize(d_frame.size(), 0, 255);
+
+    cv::gpu::GpuMat d_fgmask = createMat(size, CV_8UC1, useRoi);
+
+    for (int i = 0; i < gmg.numInitializationFrames; ++i)
+    {
+        gmg(d_frame, d_fgmask);
+
+        // fgmask should be entirely background during training
+        ASSERT_MAT_NEAR(zeros, d_fgmask, 0);
+    }
+
+    frame = randomMat(size, type, 160, 255);
+    d_frame = loadMat(frame, useRoi);
+    gmg(d_frame, d_fgmask);
+
+    // now fgmask should be entirely foreground
+    ASSERT_MAT_NEAR(fullfg, d_fgmask, 0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Video, GMG, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(MatType(CV_8U), MatType(CV_16U), MatType(CV_32F)),
+    testing::Values(Channels(1), Channels(3), Channels(4)),
+    WHOLE_SUBMAT));
+
 //////////////////////////////////////////////////////
 // VideoWriter
 
diff --git a/samples/gpu/bgfg_segm.cpp b/samples/gpu/bgfg_segm.cpp
index 839b0a982c..7c5e148f77 100644
--- a/samples/gpu/bgfg_segm.cpp
+++ b/samples/gpu/bgfg_segm.cpp
@@ -14,7 +14,8 @@ enum Method
     FGD_STAT,
     MOG,
     MOG2,
-    VIBE
+    VIBE,
+    GMG
 };
 
 int main(int argc, const char** argv)
@@ -22,7 +23,7 @@ int main(int argc, const char** argv)
     cv::CommandLineParser cmd(argc, argv,
         "{ c | camera | false       | use camera }"
         "{ f | file   | 768x576.avi | input video file }"
-        "{ m | method | mog         | method (fgd_stat, mog, mog2, vibe) }"
+        "{ m | method | mog         | method (fgd, mog, mog2, vibe, gmg) }"
         "{ h | help   | false       | print help message }");
 
     if (cmd.get<bool>("help"))
@@ -37,13 +38,13 @@ int main(int argc, const char** argv)
     string file = cmd.get<string>("file");
     string method = cmd.get<string>("method");
 
-    if (method != "fgd_stat" && method != "mog" && method != "mog2" && method != "vibe")
+    if (method != "fgd" && method != "mog" && method != "mog2" && method != "vibe" && method != "gmg")
     {
         cerr << "Incorrect method" << endl;
         return -1;
     }
 
-    Method m = method == "fgd_stat" ? FGD_STAT : method == "mog" ? MOG : method == "mog2" ? MOG2 : VIBE;
+    Method m = method == "fgd" ? FGD_STAT : method == "mog" ? MOG : method == "mog2" ? MOG2 : method == "vibe" ? VIBE : GMG;
 
     VideoCapture cap;
 
@@ -67,6 +68,8 @@ int main(int argc, const char** argv)
     MOG_GPU mog;
     MOG2_GPU mog2;
     VIBE_GPU vibe;
+    GMG_GPU gmg;
+    gmg.numInitializationFrames = 40;
 
     GpuMat d_fgmask;
     GpuMat d_fgimg;
@@ -93,12 +96,16 @@ int main(int argc, const char** argv)
     case VIBE:
         vibe.initialize(d_frame);
         break;
+
+    case GMG:
+        gmg.initialize(d_frame.size());
+        break;
     }
 
     namedWindow("image", WINDOW_NORMAL);
     namedWindow("foreground mask", WINDOW_NORMAL);
     namedWindow("foreground image", WINDOW_NORMAL);
-    if (m != VIBE)
+    if (m != VIBE && m != GMG)
         namedWindow("mean background image", WINDOW_NORMAL);
 
     for(;;)
@@ -108,6 +115,8 @@ int main(int argc, const char** argv)
             break;
         d_frame.upload(frame);
 
+        int64 start = cv::getTickCount();
+
         //update the model
         switch (m)
         {
@@ -130,8 +139,15 @@ int main(int argc, const char** argv)
         case VIBE:
             vibe(d_frame, d_fgmask);
             break;
+
+        case GMG:
+            gmg(d_frame, d_fgmask);
+            break;
         }
 
+        double fps = cv::getTickFrequency() / (cv::getTickCount() - start);
+        std::cout << "FPS : " << fps << std::endl;
+
         d_fgimg.setTo(Scalar::all(0));
         d_frame.copyTo(d_fgimg, d_fgmask);