Merge remote-tracking branch 'origin/2.4' into merge-2.4

Conflicts: .gitignore doc/tutorials/objdetect/cascade_classifier/cascade_classifier.rst modules/gpu/src/match_template.cpp modules/imgproc/include/opencv2/imgproc/imgproc.hpp modules/ocl/include/opencv2/ocl/ocl.hpp modules/ocl/perf/perf_precomp.hpp
12 years ago · 95c2e8b51f
parent 77a2529eb7 531471b0aa
commit 95c2e8b51f
41 changed files with 3519 additions and 148 deletions
--- a/.gitignore
+++ b/.gitignore
@ -9,3 +9,4 @@ tegra/
 tags
 build/
 Thumbs.db
+*.autosave
--- a/cmake/OpenCVModule.cmake
+++ b/cmake/OpenCVModule.cmake
@ -33,6 +33,7 @@
 #   <add extra installation rules>
 #   ocv_add_accuracy_tests(<extra dependencies>)
 #   ocv_add_perf_tests(<extra dependencies>)
+#   ocv_add_samples(<extra dependencies>)
 #
 #
 # If module have no "extra" then you can define it in one line:
@ -581,6 +582,7 @@ macro(ocv_define_module module_name)

  ocv_add_accuracy_tests()
  ocv_add_perf_tests()
+  ocv_add_samples()
 endmacro()

 # ensures that all passed modules are available
@ -725,6 +727,48 @@ function(ocv_add_accuracy_tests)
  endif()
 endfunction()

+function(ocv_add_samples)
+  set(samples_path "${CMAKE_CURRENT_SOURCE_DIR}/samples")
+  string(REGEX REPLACE "^opencv_" "" module_id ${the_module})
+
+  if(BUILD_EXAMPLES AND EXISTS "${samples_path}")
+    set(samples_deps ${the_module} ${OPENCV_MODULE_${the_module}_DEPS} opencv_highgui ${ARGN})
+    ocv_check_dependencies(${samples_deps})
+
+    if(OCV_DEPENDENCIES_FOUND)
+      file(GLOB sample_sources "${samples_path}/*.cpp")
+      ocv_include_modules(${OPENCV_MODULE_${the_module}_DEPS})
+
+      foreach(source ${sample_sources})
+        get_filename_component(name "${source}" NAME_WE)
+        set(the_target "example_${module_id}_${name}")
+
+        add_executable(${the_target} "${source}")
+        target_link_libraries(${the_target} ${samples_deps})
+
+        set_target_properties(${the_target} PROPERTIES PROJECT_LABEL "(sample) ${name}")
+
+        if(ENABLE_SOLUTION_FOLDERS)
+          set_target_properties(${the_target} PROPERTIES
+            OUTPUT_NAME "${module_id}-example-${name}"
+            FOLDER "samples/${module_id}")
+        endif()
+
+        if(WIN32)
+          install(TARGETS ${the_target} RUNTIME DESTINATION "samples/${module_id}" COMPONENT main)
+        endif()
+      endforeach()
+    endif()
+  endif()
+
+  if(INSTALL_C_EXAMPLES AND NOT WIN32 AND EXISTS "${samples_path}")
+    file(GLOB sample_files "${samples_path}/*")
+    install(FILES ${sample_files}
+            DESTINATION share/OpenCV/samples/${module_id}
+            PERMISSIONS OWNER_READ GROUP_READ WORLD_READ)
+  endif()
+endfunction()
+
 # internal macro; finds all link dependencies of the module
 # should be used at the end of CMake processing
 macro(__ocv_track_module_link_dependencies the_module optkind)
--- a/doc/tutorials/imgproc/imgtrans/hough_circle/hough_circle.rst
+++ b/doc/tutorials/imgproc/imgtrans/hough_circle/hough_circle.rst
@ -40,7 +40,7 @@ Code
   * Display the detected circle in a window.

   .. |TutorialHoughCirclesSimpleDownload| replace:: here
-   .. _TutorialHoughCirclesSimpleDownload: http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/houghlines.cpp
+   .. _TutorialHoughCirclesSimpleDownload: http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/houghcircles.cpp
   .. |TutorialHoughCirclesFancyDownload| replace:: here
   .. _TutorialHoughCirclesFancyDownload: http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/ImgTrans/HoughCircle_Demo.cpp

--- a/modules/contrib/doc/facerec/facerec_api.rst
+++ b/modules/contrib/doc/facerec/facerec_api.rst
@ -70,6 +70,8 @@ Moreover every :ocv:class:`FaceRecognizer` supports the:

 * **Loading/Saving** the model state from/to a given XML or YAML.

+.. note:: When using the FaceRecognizer interface in combination with Python, please stick to Python 2. Some underlying scripts like create_csv will not work in other versions, like Python 3.
+
 Setting the Thresholds
 +++++++++++++++++++++++

--- a/modules/flann/include/opencv2/flann/any.h
+++ b/modules/flann/include/opencv2/flann/any.h
@ -257,8 +257,7 @@ public:
    const T& cast() const
    {
        if (policy->type() != typeid(T)) throw anyimpl::bad_any_cast();
-        void* obj = const_cast<void*>(object);
-        T* r = reinterpret_cast<T*>(policy->get_value(&obj));
+        T* r = reinterpret_cast<T*>(policy->get_value(const_cast<void **>(&object)));
        return *r;
    }

--- a/modules/gpuwarping/src/cuda/resize.cu
+++ b/modules/gpuwarping/src/cuda/resize.cu
@ -194,7 +194,7 @@ namespace cv { namespace gpu { namespace cudev
    }

    template <typename T>
-    void call_resize_nearest_tex(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx)
+    void call_resize_nearest_tex(const PtrStepSz<T>& /*src*/, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx)
    {
        const dim3 block(32, 8);
        const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
@ -301,7 +301,7 @@ namespace cv { namespace gpu { namespace cudev

    template <typename T> struct ResizeNearestDispatcher
    {
-        static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
+        static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& /*srcWhole*/, int /*yoff*/, int /*xoff*/, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
        {
            call_resize_nearest_glob(src, dst, fy, fx, stream);
        }
--- a/modules/imgproc/doc/filtering.rst
+++ b/modules/imgproc/doc/filtering.rst
@ -412,6 +412,28 @@ http://www.dai.ed.ac.uk/CVonline/LOCAL\_COPIES/MANDUCHI1/Bilateral\_Filtering.ht
 This filter does not work inplace.


+adaptiveBilateralFilter
+-----------------------
+Applies the adaptive bilateral filter to an image.
+
+.. ocv:function:: void adaptiveBilateralFilter( InputArray src, OutputArray dst, Size ksize, double sigmaSpace, Point anchor=Point(-1, -1), int borderType=BORDER_DEFAULT )
+
+.. ocv:pyfunction:: cv2.adaptiveBilateralFilter(src, ksize, sigmaSpace[, dst[, anchor[, borderType]]]) -> dst
+
+    :param src: Source 8-bit, 1-channel or 3-channel image.
+
+    :param dst: Destination image of the same size and type as  ``src`` .
+
+    :param ksize: filter kernel size.
+
+    :param sigmaSpace: Filter sigma in the coordinate space. It has similar meaning with ``sigmaSpace`` in ``bilateralFilter``.
+
+    :param anchor: anchor point; default value ``Point(-1,-1)`` means that the anchor is at the kernel center. Only default value is supported now.
+
+    :param borderType: border mode used to extrapolate pixels outside of the image.
+
+The function applies adaptive bilateral filtering to the input image. This filter is similar to ``bilateralFilter``, in that dissimilarity from and distance to the center pixel is punished. Instead of using ``sigmaColor``, we employ the variance of pixel values in the neighbourhood.
+


 blur
--- a/modules/imgproc/include/opencv2/imgproc.hpp
+++ b/modules/imgproc/include/opencv2/imgproc.hpp
@ -1061,6 +1061,11 @@ CV_EXPORTS_W void bilateralFilter( InputArray src, OutputArray dst, int d,
                                   double sigmaColor, double sigmaSpace,
                                   int borderType = BORDER_DEFAULT );

+//! smooths the image using adaptive bilateral filter
+CV_EXPORTS_W void adaptiveBilateralFilter( InputArray src, OutputArray dst, Size ksize,
+                                           double sigmaSpace, Point anchor=Point(-1, -1),
+                                           int borderType=BORDER_DEFAULT );
+
 //! smooths the image using the box filter. Each pixel is processed in O(1) time
 CV_EXPORTS_W void boxFilter( InputArray src, OutputArray dst, int ddepth,
                             Size ksize, Point anchor = Point(-1,-1),
--- a/modules/imgproc/src/color.cpp
+++ b/modules/imgproc/src/color.cpp
@ -254,19 +254,19 @@ bool CvtColorIPPLoopCopy(Mat& src, Mat& dst, const Cvt& cvt)
    return ok;
 }

-IppStatus __stdcall ippiSwapChannels_8u_C3C4Rf(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep,
+static IppStatus CV_STDCALL ippiSwapChannels_8u_C3C4Rf(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep,
         IppiSize roiSize, const int *dstOrder)
 {
    return ippiSwapChannels_8u_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP8u);
 }

-IppStatus __stdcall ippiSwapChannels_16u_C3C4Rf(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep,
+static IppStatus CV_STDCALL ippiSwapChannels_16u_C3C4Rf(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep,
         IppiSize roiSize, const int *dstOrder)
 {
    return ippiSwapChannels_16u_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP16u);
 }

-IppStatus __stdcall ippiSwapChannels_32f_C3C4Rf(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep,
+static IppStatus CV_STDCALL ippiSwapChannels_32f_C3C4Rf(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep,
         IppiSize roiSize, const int *dstOrder)
 {
    return ippiSwapChannels_32f_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP32f);
--- a/modules/imgproc/src/imgwarp.cpp
+++ b/modules/imgproc/src/imgwarp.cpp
@ -50,9 +50,73 @@
 #include <iostream>
 #include <vector>

+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+static IppStatus sts = ippInit();
+#endif
+
 namespace cv
 {

+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    typedef IppStatus (CV_STDCALL* ippiSetFunc)(const void*, void *, int, IppiSize);
+    typedef IppStatus (CV_STDCALL* ippiWarpPerspectiveBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [3][3], int);
+    typedef IppStatus (CV_STDCALL* ippiWarpAffineBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [2][3], int);
+    typedef IppStatus (CV_STDCALL* ippiResizeSqrPixelFunc)(const void*, IppiSize, int, IppiRect, void*, int, IppiRect, double, double, double, double, int, Ipp8u *);
+
+    template <int channels, typename Type>
+    bool IPPSetSimple(cv::Scalar value, void *dataPointer, int step, IppiSize &size, ippiSetFunc func)
+    {
+        Type values[channels];
+        for( int i = 0; i < channels; i++ )
+            values[i] = (Type)value[i];
+        return func(values, dataPointer, step, size) >= 0;
+    }
+
+    bool IPPSet(const cv::Scalar &value, void *dataPointer, int step, IppiSize &size, int channels, int depth)
+    {
+        if( channels == 1 )
+        {
+            switch( depth )
+            {
+            case CV_8U:
+                return ippiSet_8u_C1R((Ipp8u)value[0], (Ipp8u *)dataPointer, step, size) >= 0;
+            case CV_16U:
+                return ippiSet_16u_C1R((Ipp16u)value[0], (Ipp16u *)dataPointer, step, size) >= 0;
+            case CV_32F:
+                return ippiSet_32f_C1R((Ipp32f)value[0], (Ipp32f *)dataPointer, step, size) >= 0;
+            }
+        }
+        else
+        {
+            if( channels == 3 )
+            {
+                switch( depth )
+                {
+                case CV_8U:
+                    return IPPSetSimple<3, Ipp8u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_8u_C3R);
+                case CV_16U:
+                    return IPPSetSimple<3, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C3R);
+                case CV_32F:
+                    return IPPSetSimple<3, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C3R);
+                }
+            }
+            else if( channels == 4 )
+            {
+                switch( depth )
+                {
+                case CV_8U:
+                    return IPPSetSimple<4, Ipp8u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_8u_C4R);
+                case CV_16U:
+                    return IPPSetSimple<4, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C4R);
+                case CV_32F:
+                    return IPPSetSimple<4, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C4R);
+                }
+            }
+        }
+        return false;
+    }
+#endif
+
 /************** interpolation formulas and tables ***************/

 const int INTER_RESIZE_COEF_BITS=11;
@ -1795,6 +1859,45 @@ static int computeResizeAreaTab( int ssize, int dsize, int cn, double scale, Dec
    return k;
 }

+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+class IPPresizeInvoker :
+    public ParallelLoopBody
+{
+public:
+    IPPresizeInvoker(Mat &_src, Mat &_dst, double &_inv_scale_x, double &_inv_scale_y, int _mode, ippiResizeSqrPixelFunc _func, bool *_ok) :
+      ParallelLoopBody(), src(_src), dst(_dst), inv_scale_x(_inv_scale_x), inv_scale_y(_inv_scale_y), mode(_mode), func(_func), ok(_ok)
+      {
+          *ok = true;
+      }
+
+      virtual void operator() (const Range& range) const
+      {
+          int cn = src.channels();
+          IppiRect srcroi = { 0, range.start, src.cols, range.end - range.start };
+          int dsty = CV_IMIN(cvRound(range.start * inv_scale_y), dst.rows);
+          int dstwidth = CV_IMIN(cvRound(src.cols * inv_scale_x), dst.cols);
+          int dstheight = CV_IMIN(cvRound(range.end * inv_scale_y), dst.rows);
+          IppiRect dstroi = { 0, dsty, dstwidth, dstheight - dsty };
+          int bufsize;
+          ippiResizeGetBufSize( srcroi, dstroi, cn, mode, &bufsize );
+          Ipp8u *buf;
+          buf = ippsMalloc_8u( bufsize );
+          IppStatus sts;
+          if( func( src.data, ippiSize(src.cols, src.rows), (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, inv_scale_x, inv_scale_y, 0, 0, mode, buf ) < 0 )
+              *ok = false;
+          ippsFree(buf);
+      }
+private:
+    Mat &src;
+    Mat &dst;
+    double inv_scale_x;
+    double inv_scale_y;
+    int mode;
+    ippiResizeSqrPixelFunc func;
+    bool *ok;
+    const IPPresizeInvoker& operator= (const IPPresizeInvoker&);
+};
+#endif

 }

@ -1937,6 +2040,34 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
    double scale_x = 1./inv_scale_x, scale_y = 1./inv_scale_y;
    int k, sx, sy, dx, dy;

+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    int mode = interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR : 0;
+    int type = src.type();
+    ippiResizeSqrPixelFunc ippFunc =
+        type == CV_8UC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C1R :
+        type == CV_8UC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C3R :
+        type == CV_8UC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C4R :
+        type == CV_16UC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16u_C1R :
+        type == CV_16UC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16u_C3R :
+        type == CV_16UC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16u_C4R :
+        type == CV_16SC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16s_C1R :
+        type == CV_16SC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16s_C3R :
+        type == CV_16SC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16s_C4R :
+        type == CV_32FC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C1R :
+        type == CV_32FC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C3R :
+        type == CV_32FC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C4R :
+        0;
+    if( ippFunc && mode != 0 )
+    {
+        bool ok;
+        Range range(0, src.rows);
+        IPPresizeInvoker invoker(src, dst, inv_scale_x, inv_scale_y, mode, ippFunc, &ok);
+        parallel_for_(range, invoker, dst.total()/(double)(1<<16));
+        if( ok )
+            return;
+    }
+#endif
+
    if( interpolation == INTER_NEAREST )
    {
        resizeNN( src, dst, inv_scale_x, inv_scale_y );
@ -3446,6 +3577,49 @@ private:
    double *M;
 };

+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+class IPPwarpAffineInvoker :
+    public ParallelLoopBody
+{
+public:
+    IPPwarpAffineInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[2][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpAffineBackFunc _func, bool *_ok) :
+      ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
+      {
+          *ok = true;
+      }
+
+      virtual void operator() (const Range& range) const
+      {
+          IppiSize srcsize = { src.cols, src.rows };
+          IppiRect srcroi = { 0, 0, src.cols, src.rows };
+          IppiRect dstroi = { 0, range.start, dst.cols, range.end - range.start };
+          int cnn = src.channels();
+          if( borderType == BORDER_CONSTANT )
+          {
+              IppiSize setSize = { dst.cols, range.end - range.start };
+              void *dataPointer = dst.data + dst.step[0] * range.start;
+              if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) )
+              {
+                  *ok = false;
+                  return;
+              }
+          }
+          if( func( src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode ) < 0) ////Aug 2013: problem in IPP 7.1, 8.0 : sometimes function return ippStsCoeffErr
+              *ok = false;
+      }
+private:
+    Mat &src;
+    Mat &dst;
+    double (&coeffs)[2][3];
+    int mode;
+    int borderType;
+    Scalar borderValue;
+    ippiWarpAffineBackFunc func;
+    bool *ok;
+    const IPPwarpAffineInvoker& operator= (const IPPwarpAffineInvoker&);
+};
+#endif
+
 }


@ -3492,6 +3666,50 @@ void cv::warpAffine( InputArray _src, OutputArray _dst,
    const int AB_BITS = MAX(10, (int)INTER_BITS);
    const int AB_SCALE = 1 << AB_BITS;

+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    int depth = src.depth();
+    int channels = src.channels();
+    if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) &&
+        ( channels == 1 || channels == 3 || channels == 4 ) &&
+        ( borderType == cv::BORDER_TRANSPARENT || ( borderType == cv::BORDER_CONSTANT ) ) )
+    {
+        int type = src.type();
+        ippiWarpAffineBackFunc ippFunc =
+            type == CV_8UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C1R :
+            type == CV_8UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C3R :
+            type == CV_8UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C4R :
+            type == CV_16UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C1R :
+            type == CV_16UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C3R :
+            type == CV_16UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C4R :
+            type == CV_32FC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C1R :
+            type == CV_32FC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C3R :
+            type == CV_32FC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C4R :
+            0;
+        int mode =
+            flags == INTER_LINEAR ? IPPI_INTER_LINEAR :
+            flags == INTER_NEAREST ? IPPI_INTER_NN :
+            flags == INTER_CUBIC ? IPPI_INTER_CUBIC :
+            0;
+        if( mode && ippFunc )
+        {
+            double coeffs[2][3];
+            for( int i = 0; i < 2; i++ )
+            {
+                for( int j = 0; j < 3; j++ )
+                {
+                    coeffs[i][j] = matM.at<double>(i, j);
+                }
+            }
+            bool ok;
+            Range range(0, dst.rows);
+            IPPwarpAffineInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
+            parallel_for_(range, invoker, dst.total()/(double)(1<<16));
+            if( ok )
+                return;
+        }
+    }
+#endif
+
    for( x = 0; x < dst.cols; x++ )
    {
        adelta[x] = saturate_cast<int>(M[0]*x*AB_SCALE);
@ -3599,6 +3817,50 @@ private:
    Scalar borderValue;
 };

+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+class IPPwarpPerspectiveInvoker :
+    public ParallelLoopBody
+{
+public:
+    IPPwarpPerspectiveInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[3][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpPerspectiveBackFunc _func, bool *_ok) :
+      ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
+      {
+          *ok = true;
+      }
+
+      virtual void operator() (const Range& range) const
+      {
+          IppiSize srcsize = {src.cols, src.rows};
+          IppiRect srcroi = {0, 0, src.cols, src.rows};
+          IppiRect dstroi = {0, range.start, dst.cols, range.end - range.start};
+          int cnn = src.channels();
+
+          if( borderType == BORDER_CONSTANT )
+          {
+              IppiSize setSize = {dst.cols, range.end - range.start};
+              void *dataPointer = dst.data + dst.step[0] * range.start;
+              if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) )
+              {
+                  *ok = false;
+                  return;
+              }
+          }
+          if( func(src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode) < 0)
+              *ok = false;
+      }
+private:
+    Mat &src;
+    Mat &dst;
+    double (&coeffs)[3][3];
+    int mode;
+    int borderType;
+    const Scalar borderValue;
+    ippiWarpPerspectiveBackFunc func;
+    bool *ok;
+    const IPPwarpPerspectiveInvoker& operator= (const IPPwarpPerspectiveInvoker&);
+};
+#endif
+
 }

 void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
@ -3629,6 +3891,50 @@ void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
    if( !(flags & WARP_INVERSE_MAP) )
         invert(matM, matM);

+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    int depth = src.depth();
+    int channels = src.channels();
+    if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) &&
+        ( channels == 1 || channels == 3 || channels == 4 ) &&
+        ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT ) )
+    {
+        int type = src.type();
+        ippiWarpPerspectiveBackFunc ippFunc =
+            type == CV_8UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C1R :
+            type == CV_8UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C3R :
+            type == CV_8UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C4R :
+            type == CV_16UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C1R :
+            type == CV_16UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C3R :
+            type == CV_16UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C4R :
+            type == CV_32FC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C1R :
+            type == CV_32FC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C3R :
+            type == CV_32FC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C4R :
+            0;
+        int mode =
+            flags == INTER_LINEAR ? IPPI_INTER_LINEAR :
+            flags == INTER_NEAREST ? IPPI_INTER_NN :
+            flags == INTER_CUBIC ? IPPI_INTER_CUBIC :
+            0;
+        if( mode && ippFunc )
+        {
+            double coeffs[3][3];
+            for( int i = 0; i < 3; i++ )
+            {
+                for( int j = 0; j < 3; j++ )
+                {
+                    coeffs[i][j] = matM.at<double>(i, j);
+                }
+            }
+            bool ok;
+            Range range(0, dst.rows);
+            IPPwarpPerspectiveInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
+            parallel_for_(range, invoker, dst.total()/(double)(1<<16));
+            if( ok )
+                return;
+        }
+    }
+#endif
+
    Range range(0, dst.rows);
    warpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, borderValue);
    parallel_for_(range, invoker, dst.total()/(double)(1<<16));
--- a/modules/imgproc/src/morph.cpp
+++ b/modules/imgproc/src/morph.cpp
@ -1213,11 +1213,10 @@ static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kerne
 }

 static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,
-    InputArray _kernel,
-    const Point &anchor, int iterations,
+    const Mat& _kernel, Point anchor, int iterations,
    int borderType, const Scalar &borderValue)
 {
-    Mat src = _src.getMat(), kernel = _kernel.getMat();
+    Mat src = _src.getMat(), kernel = _kernel;
    if( !( src.depth() == CV_8U || src.depth() == CV_32F ) || ( iterations > 1 ) ||
        !( borderType == cv::BORDER_REPLICATE || (borderType == cv::BORDER_CONSTANT && borderValue == morphologyDefaultBorderValue()) )
        || !( op == MORPH_DILATE || op == MORPH_ERODE) )
@ -1248,9 +1247,6 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,

    }
    Size ksize = kernel.data ? kernel.size() : Size(3,3);
-    Point normanchor = normalizeAnchor(anchor, ksize);
-
-    CV_Assert( normanchor.inside(Rect(0, 0, ksize.width, ksize.height)) );

    _dst.create( src.size(), src.type() );
    Mat dst = _dst.getMat();
@ -1265,7 +1261,7 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,
    if( !kernel.data )
    {
        ksize = Size(1+iterations*2,1+iterations*2);
-        normanchor = Point(iterations, iterations);
+        anchor = Point(iterations, iterations);
        rectKernel = true;
        iterations = 1;
    }
@ -1273,7 +1269,7 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,
    {
        ksize = Size(ksize.width + (iterations-1)*(ksize.width-1),
             ksize.height + (iterations-1)*(ksize.height-1)),
-        normanchor = Point(normanchor.x*iterations, normanchor.y*iterations);
+        anchor = Point(anchor.x*iterations, anchor.y*iterations);
        kernel = Mat();
        rectKernel = true;
        iterations = 1;
@ -1283,7 +1279,7 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,
    if( iterations > 1 )
        return false;

-    return IPPMorphReplicate( op, src, dst, kernel, ksize, normanchor, rectKernel );
+    return IPPMorphReplicate( op, src, dst, kernel, ksize, anchor, rectKernel );
 }
 #endif

@ -1292,17 +1288,18 @@ static void morphOp( int op, InputArray _src, OutputArray _dst,
                     Point anchor, int iterations,
                     int borderType, const Scalar& borderValue )
 {
+    Mat kernel = _kernel.getMat();
+    Size ksize = kernel.data ? kernel.size() : Size(3,3);
+    anchor = normalizeAnchor(anchor, ksize);
+
+    CV_Assert( anchor.inside(Rect(0, 0, ksize.width, ksize.height)) );

 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
-    if( IPPMorphOp(op, _src, _dst, _kernel, anchor, iterations, borderType, borderValue) )
+    if( IPPMorphOp(op, _src, _dst, kernel, anchor, iterations, borderType, borderValue) )
        return;
 #endif

-    Mat src = _src.getMat(), kernel = _kernel.getMat();
-    Size ksize = kernel.data ? kernel.size() : Size(3,3);
-    anchor = normalizeAnchor(anchor, ksize);
-
-    CV_Assert( anchor.inside(Rect(0, 0, ksize.width, ksize.height)) );
+    Mat src = _src.getMat();

    _dst.create( src.size(), src.type() );
    Mat dst = _dst.getMat();
--- a/modules/imgproc/src/smooth.cpp
+++ b/modules/imgproc/src/smooth.cpp
@ -1879,6 +1879,41 @@ private:
    float *space_weight, *color_weight;
 };

+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+class IPPBilateralFilter_8u_Invoker :
+    public ParallelLoopBody
+{
+public:
+    IPPBilateralFilter_8u_Invoker(Mat &_src, Mat &_dst, double _sigma_color, double _sigma_space, int _radius, bool *_ok) :
+      ParallelLoopBody(), src(_src), dst(_dst), sigma_color(_sigma_color), sigma_space(_sigma_space), radius(_radius), ok(_ok)
+      {
+          *ok = true;
+      }
+
+      virtual void operator() (const Range& range) const
+      {
+          int d = radius * 2 + 1;
+          IppiSize kernel = {d, d};
+          IppiSize roi={dst.cols, range.end - range.start};
+          int bufsize=0;
+          ippiFilterBilateralGetBufSize_8u_C1R( ippiFilterBilateralGauss, roi, kernel, &bufsize);
+          AutoBuffer<uchar> buf(bufsize);
+          IppiFilterBilateralSpec *pSpec = (IppiFilterBilateralSpec *)alignPtr(&buf[0], 32);
+          ippiFilterBilateralInit_8u_C1R( ippiFilterBilateralGauss, kernel, (Ipp32f)sigma_color, (Ipp32f)sigma_space, 1, pSpec );
+          if( ippiFilterBilateral_8u_C1R( src.ptr<uchar>(range.start) + radius * ((int)src.step[0] + 1), (int)src.step[0], dst.ptr<uchar>(range.start), (int)dst.step[0], roi, kernel, pSpec ) < 0)
+              *ok = false;
+      }
+private:
+    Mat &src;
+    Mat &dst;
+    double sigma_color;
+    double sigma_space;
+    int radius;
+    bool *ok;
+    const IPPBilateralFilter_8u_Invoker& operator= (const IPPBilateralFilter_8u_Invoker&);
+};
+#endif
+
 static void
 bilateralFilter_8u( const Mat& src, Mat& dst, int d,
    double sigma_color, double sigma_space,
@ -1908,31 +1943,18 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d,
    radius = MAX(radius, 1);
    d = radius*2 + 1;

-#if 0 && defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7)
-    if(cn == 1)
+    Mat temp;
+    copyMakeBorder( src, temp, radius, radius, radius, radius, borderType );
+
+#if defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7)
+    if( cn == 1 )
    {
-        IppiSize kernel = {d, d};
-        IppiSize roi={src.cols, src.rows};
-        int bufsize=0;
-        ippiFilterBilateralGetBufSize_8u_C1R( ippiFilterBilateralGauss, roi, kernel, &bufsize);
-        AutoBuffer<uchar> buf(bufsize+128);
-        IppiFilterBilateralSpec *pSpec = (IppiFilterBilateralSpec *)alignPtr(&buf[0], 32);
-        ippiFilterBilateralInit_8u_C1R( ippiFilterBilateralGauss, kernel, sigma_color*sigma_color, sigma_space*sigma_space, 1, pSpec );
-        Mat tsrc;
-        const Mat* psrc = &src;
-        if( src.data == dst.data )
-        {
-            src.copyTo(tsrc);
-            psrc = &tsrc;
-        }
-        if( ippiFilterBilateral_8u_C1R(psrc->data, (int)psrc->step[0],
-                                       dst.data, (int)dst.step[0],
-                                       roi, kernel, pSpec) >= 0 )
-            return;
+        bool ok;
+        IPPBilateralFilter_8u_Invoker body(temp, dst, sigma_color * sigma_color, sigma_space * sigma_space, radius, &ok );
+        parallel_for_(Range(0, dst.rows), body, dst.total()/(double)(1<<16));
+        if( ok ) return;
    }
 #endif
-    Mat temp;
-    copyMakeBorder( src, temp, radius, radius, radius, radius, borderType );

    std::vector<float> _color_weight(cn*256);
    std::vector<float> _space_weight(d*d);
@ -2258,6 +2280,236 @@ void cv::bilateralFilter( InputArray _src, OutputArray _dst, int d,
        "Bilateral filtering is only implemented for 8u and 32f images" );
 }

+
+/****************************************************************************************\
+                                  Adaptive Bilateral Filtering
+\****************************************************************************************/
+
+namespace cv
+{
+#define CALCVAR 1
+#define FIXED_WEIGHT 0
+
+class adaptiveBilateralFilter_8u_Invoker :
+    public ParallelLoopBody
+{
+public:
+    adaptiveBilateralFilter_8u_Invoker(Mat& _dest, const Mat& _temp, Size _ksize, double _sigma_space, Point _anchor) :
+        temp(&_temp), dest(&_dest), ksize(_ksize), sigma_space(_sigma_space), anchor(_anchor)
+    {
+        if( sigma_space <= 0 )
+            sigma_space = 1;
+        CV_Assert((ksize.width & 1) && (ksize.height & 1));
+        space_weight.resize(ksize.width * ksize.height);
+        double sigma2 = sigma_space * sigma_space;
+        int idx = 0;
+        int w = ksize.width / 2;
+        int h = ksize.height / 2;
+        for(int y=-h; y<=h; y++)
+            for(int x=-w; x<=w; x++)
+        {
+            space_weight[idx++] = (float)(sigma2 / (sigma2 + x * x + y * y));
+        }
+    }
+    virtual void operator()(const Range& range) const
+    {
+        int cn = dest->channels();
+        int anX = anchor.x;
+
+        const uchar *tptr;
+
+        for(int i = range.start;i < range.end; i++)
+        {
+            int startY = i;
+            if(cn == 1)
+            {
+                float var;
+                int currVal;
+                int sumVal = 0;
+                int sumValSqr = 0;
+                int currValCenter;
+                int currWRTCenter;
+                float weight;
+                float totalWeight = 0.;
+                float tmpSum = 0.;
+
+                for(int j = 0;j < dest->cols *cn; j+=cn)
+                {
+                    sumVal = 0;
+                    sumValSqr= 0;
+                    totalWeight = 0.;
+                    tmpSum = 0.;
+
+                    // Top row: don't sum the very last element
+                    int startLMJ = 0;
+                    int endLMJ  = ksize.width  - 1;
+                    int howManyAll = (anX *2 +1)*(ksize.width );
+#if CALCVAR
+                    for(int x = startLMJ; x< endLMJ; x++)
+                    {
+                        tptr = temp->ptr(startY + x) +j;
+                        for(int y=-anX; y<=anX; y++)
+                        {
+                            currVal = tptr[cn*(y+anX)];
+                            sumVal += currVal;
+                            sumValSqr += (currVal *currVal);
+                        }
+                    }
+                    var = ( (sumValSqr * howManyAll)- sumVal * sumVal )  /  ( (float)(howManyAll*howManyAll));
+#else
+                    var = 900.0;
+#endif
+                    startLMJ = 0;
+                    endLMJ = ksize.width;
+                    tptr = temp->ptr(startY + (startLMJ+ endLMJ)/2);
+                    currValCenter =tptr[j+cn*anX];
+                    for(int x = startLMJ; x< endLMJ; x++)
+                    {
+                        tptr = temp->ptr(startY + x) +j;
+                        for(int y=-anX; y<=anX; y++)
+                        {
+#if FIXED_WEIGHT
+                            weight = 1.0;
+#else
+                            currVal = tptr[cn*(y+anX)];
+                            currWRTCenter = currVal - currValCenter;
+
+                            weight = var / ( var + (currWRTCenter * currWRTCenter) ) * space_weight[x*ksize.width+y+anX];;
+#endif
+                            tmpSum += ((float)tptr[cn*(y+anX)] * weight);
+                            totalWeight += weight;
+                        }
+                    }
+                    tmpSum /= totalWeight;
+
+                   dest->at<uchar>(startY ,j)= static_cast<uchar>(tmpSum);
+                }
+            }
+            else
+            {
+                assert(cn == 3);
+                float var_b, var_g, var_r;
+                int currVal_b, currVal_g, currVal_r;
+                int sumVal_b= 0, sumVal_g= 0, sumVal_r= 0;
+                int sumValSqr_b= 0, sumValSqr_g= 0, sumValSqr_r= 0;
+                int currValCenter_b= 0, currValCenter_g= 0, currValCenter_r= 0;
+                int currWRTCenter_b, currWRTCenter_g, currWRTCenter_r;
+                float weight_b, weight_g, weight_r;
+                float totalWeight_b= 0., totalWeight_g= 0., totalWeight_r= 0.;
+                float tmpSum_b = 0., tmpSum_g= 0., tmpSum_r = 0.;
+
+                for(int j = 0;j < dest->cols *cn; j+=cn)
+                {
+                    sumVal_b= 0, sumVal_g= 0, sumVal_r= 0;
+                    sumValSqr_b= 0, sumValSqr_g= 0, sumValSqr_r= 0;
+                    totalWeight_b= 0., totalWeight_g= 0., totalWeight_r= 0.;
+                    tmpSum_b = 0., tmpSum_g= 0., tmpSum_r = 0.;
+
+                    // Top row: don't sum the very last element
+                    int startLMJ = 0;
+                    int endLMJ  = ksize.width - 1;
+                    int howManyAll = (anX *2 +1)*(ksize.width);
+#if CALCVAR
+                    for(int x = startLMJ; x< endLMJ; x++)
+                    {
+                        tptr = temp->ptr(startY + x) +j;
+                        for(int y=-anX; y<=anX; y++)
+                        {
+                            currVal_b = tptr[cn*(y+anX)], currVal_g = tptr[cn*(y+anX)+1], currVal_r =tptr[cn*(y+anX)+2];
+                            sumVal_b += currVal_b;
+                            sumVal_g += currVal_g;
+                            sumVal_r += currVal_r;
+                            sumValSqr_b += (currVal_b *currVal_b);
+                            sumValSqr_g += (currVal_g *currVal_g);
+                            sumValSqr_r += (currVal_r *currVal_r);
+                        }
+                    }
+                    var_b = ( (sumValSqr_b * howManyAll)- sumVal_b * sumVal_b )  /  ( (float)(howManyAll*howManyAll));
+                    var_g = ( (sumValSqr_g * howManyAll)- sumVal_g * sumVal_g )  /  ( (float)(howManyAll*howManyAll));
+                    var_r = ( (sumValSqr_r * howManyAll)- sumVal_r * sumVal_r )  /  ( (float)(howManyAll*howManyAll));
+#else
+                    var_b = 900.0; var_g = 900.0;var_r = 900.0;
+#endif
+                    startLMJ = 0;
+                    endLMJ = ksize.width;
+                    tptr = temp->ptr(startY + (startLMJ+ endLMJ)/2) + j;
+                    currValCenter_b =tptr[cn*anX], currValCenter_g =tptr[cn*anX+1], currValCenter_r =tptr[cn*anX+2];
+                    for(int x = startLMJ; x< endLMJ; x++)
+                    {
+                        tptr = temp->ptr(startY + x) +j;
+                        for(int y=-anX; y<=anX; y++)
+                        {
+#if FIXED_WEIGHT
+                            weight_b = 1.0;
+                            weight_g = 1.0;
+                            weight_r = 1.0;
+#else
+                            currVal_b = tptr[cn*(y+anX)];currVal_g=tptr[cn*(y+anX)+1];currVal_r=tptr[cn*(y+anX)+2];
+                            currWRTCenter_b = currVal_b - currValCenter_b;
+                            currWRTCenter_g = currVal_g - currValCenter_g;
+                            currWRTCenter_r = currVal_r - currValCenter_r;
+
+                            float cur_spw = space_weight[x*ksize.width+y+anX];
+                            weight_b = var_b / ( var_b + (currWRTCenter_b * currWRTCenter_b) ) * cur_spw;
+                            weight_g = var_g / ( var_g + (currWRTCenter_g * currWRTCenter_g) ) * cur_spw;
+                            weight_r = var_r / ( var_r + (currWRTCenter_r * currWRTCenter_r) ) * cur_spw;
+#endif
+                            tmpSum_b += ((float)tptr[cn*(y+anX)]   * weight_b);
+                            tmpSum_g += ((float)tptr[cn*(y+anX)+1] * weight_g);
+                            tmpSum_r += ((float)tptr[cn*(y+anX)+2] * weight_r);
+                            totalWeight_b += weight_b, totalWeight_g += weight_g, totalWeight_r += weight_r;
+                        }
+                    }
+                    tmpSum_b /= totalWeight_b;
+                    tmpSum_g /= totalWeight_g;
+                    tmpSum_r /= totalWeight_r;
+
+                    dest->at<uchar>(startY,j  )= static_cast<uchar>(tmpSum_b);
+                    dest->at<uchar>(startY,j+1)= static_cast<uchar>(tmpSum_g);
+                    dest->at<uchar>(startY,j+2)= static_cast<uchar>(tmpSum_r);
+                }
+            }
+        }
+    }
+private:
+    const Mat *temp;
+    Mat *dest;
+    Size ksize;
+    double sigma_space;
+    Point anchor;
+    std::vector<float> space_weight;
+};
+static void adaptiveBilateralFilter_8u( const Mat& src, Mat& dst, Size ksize, double sigmaSpace, Point anchor, int borderType )
+{
+    Size size = src.size();
+
+    CV_Assert( (src.type() == CV_8UC1 || src.type() == CV_8UC3) &&
+              src.type() == dst.type() && src.size() == dst.size() &&
+              src.data != dst.data );
+    Mat temp;
+    copyMakeBorder(src, temp, anchor.x, anchor.y, anchor.x, anchor.y, borderType);
+
+    adaptiveBilateralFilter_8u_Invoker body(dst, temp, ksize, sigmaSpace, anchor);
+    parallel_for_(Range(0, size.height), body, dst.total()/(double)(1<<16));
+}
+}
+void cv::adaptiveBilateralFilter( InputArray _src, OutputArray _dst, Size ksize,
+                                  double sigmaSpace, Point anchor, int borderType )
+{
+    Mat src = _src.getMat();
+    _dst.create(src.size(), src.type());
+    Mat dst = _dst.getMat();
+
+    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3);
+
+    anchor = normalizeAnchor(anchor,ksize);
+    if( src.depth() == CV_8U )
+        adaptiveBilateralFilter_8u( src, dst, ksize, sigmaSpace, anchor, borderType );
+    else
+        CV_Error( CV_StsUnsupportedFormat,
+        "Adaptive Bilateral filtering is only implemented for 8u images" );
+}
+
 //////////////////////////////////////////////////////////////////////////////////////////

 CV_IMPL void
--- a/modules/imgproc/test/test_bilateral_filter.cpp
+++ b/modules/imgproc/test/test_bilateral_filter.cpp
@ -251,7 +251,7 @@ namespace cvtest

    int CV_BilateralFilterTest::validate_test_results(int test_case_index)
    {
-        static const double eps = 1;
+        static const double eps = 4;

        Mat reference_dst, reference_src;
        if (_src.depth() == CV_32F)
--- a/modules/imgproc/test/test_imgwarp.cpp
+++ b/modules/imgproc/test/test_imgwarp.cpp
@ -1424,7 +1424,7 @@ TEST(Imgproc_fitLine_vector_2d, regression)

 TEST(Imgproc_fitLine_Mat_2dC2, regression)
 {
-    cv::Mat mat1(3, 1, CV_32SC2);
+    cv::Mat mat1 = Mat::zeros(3, 1, CV_32SC2);
    std::vector<float> line1;

    cv::fitLine(mat1, line1, CV_DIST_L2, 0 ,0 ,0);
@ -1444,7 +1444,7 @@ TEST(Imgproc_fitLine_Mat_2dC1, regression)

 TEST(Imgproc_fitLine_Mat_3dC3, regression)
 {
-    cv::Mat mat1(2, 1, CV_32SC3);
+    cv::Mat mat1 = Mat::zeros(2, 1, CV_32SC3);
    std::vector<float> line1;

    cv::fitLine(mat1, line1, CV_DIST_L2, 0 ,0 ,0);
@ -1454,7 +1454,7 @@ TEST(Imgproc_fitLine_Mat_3dC3, regression)

 TEST(Imgproc_fitLine_Mat_3dC1, regression)
 {
-    cv::Mat mat2(2, 3, CV_32SC1);
+    cv::Mat mat2 = Mat::zeros(2, 3, CV_32SC1);
    std::vector<float> line2;

    cv::fitLine(mat2, line2, CV_DIST_L2, 0 ,0 ,0);
--- a/modules/imgproc/test/test_imgwarp_strict.cpp
+++ b/modules/imgproc/test/test_imgwarp_strict.cpp
@ -678,8 +678,8 @@ void CV_Remap_Test::generate_test_data()
            MatIterator_<Vec2s> begin_x = mapx.begin<Vec2s>(), end_x = mapx.end<Vec2s>();
            for ( ; begin_x != end_x; ++begin_x)
            {
-                begin_x[0] = static_cast<short>(rng.uniform(static_cast<int>(_n), std::max(src.cols + n - 1, 0)));
-                begin_x[1] = static_cast<short>(rng.uniform(static_cast<int>(_n), std::max(src.rows + n - 1, 0)));
+                (*begin_x)[0] = static_cast<short>(rng.uniform(static_cast<int>(_n), std::max(src.cols + n - 1, 0)));
+                (*begin_x)[1] = static_cast<short>(rng.uniform(static_cast<int>(_n), std::max(src.rows + n - 1, 0)));
            }

            if (interpolation != INTER_NEAREST)
--- a/modules/java/android_test/AndroidManifest.xml
+++ b/modules/java/android_test/AndroidManifest.xml
@ -3,7 +3,7 @@
      package="org.opencv.test"
      android:versionCode="1"
      android:versionName="1.0">
-    
+
    <uses-sdk android:minSdkVersion="8" />

    <!-- We add an application tag here just so that we can indicate that
@ -20,7 +20,7 @@
    <instrumentation android:name="org.opencv.test.OpenCVTestRunner"
                     android:targetPackage="org.opencv.test"
                     android:label="Tests for org.opencv"/>
-    
+
    <uses-permission android:name="android.permission.CAMERA"/>
    <uses-feature android:name="android.hardware.camera" />
    <uses-feature android:name="android.hardware.camera.autofocus" />
--- a/modules/java/android_test/res/layout/main.xml
+++ b/modules/java/android_test/res/layout/main.xml
@ -4,9 +4,9 @@
    android:layout_width="fill_parent"
    android:layout_height="fill_parent"
    >
-<TextView  
-    android:layout_width="fill_parent" 
-    android:layout_height="wrap_content" 
+<TextView
+    android:layout_width="fill_parent"
+    android:layout_height="wrap_content"
    android:text="@string/hello"
    />
 </LinearLayout>
--- a/modules/ocl/doc/image_filtering.rst
+++ b/modules/ocl/doc/image_filtering.rst
@ -162,7 +162,7 @@ ocl::bilateralFilter
 --------------------
 Returns void

-.. ocv:function:: void ocl::bilateralFilter(const oclMat &src, oclMat &dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT)
+.. ocv:function:: void ocl::bilateralFilter(const oclMat &src, oclMat &dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT)

    :param src: The source image

--- a/modules/ocl/include/opencv2/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl.hpp
@ -519,7 +519,15 @@ namespace cv

        //! bilateralFilter
        // supports 8UC1 8UC4
-        CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT);
+        CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT);
+
+        //! Applies an adaptive bilateral filter to the input image
+        //  This is not truly a bilateral filter. Instead of using user provided fixed parameters,
+        //  the function calculates a constant at each window based on local standard deviation,
+        //  and use this constant to do filtering.
+        //  supports 8UC1 8UC3
+        CV_EXPORTS void adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, Point anchor = Point(-1, -1), int borderType=BORDER_DEFAULT);
+
        //! computes exponent of each matrix element (b = e**a)
        // supports only CV_32FC1 type
        CV_EXPORTS void exp(const oclMat &a, oclMat &b);
@ -1797,6 +1805,155 @@ namespace cv
        //    keys   = {1,    2,   3}   (CV_8UC1)
        //    values = {6,2, 10,5, 4,3} (CV_8UC2)
        void CV_EXPORTS sortByKey(oclMat& keys, oclMat& values, int method, bool isGreaterThan = false);
+        /*!Base class for MOG and MOG2!*/
+        class CV_EXPORTS BackgroundSubtractor
+        {
+        public:
+            //! the virtual destructor
+            virtual ~BackgroundSubtractor();
+            //! the update operator that takes the next video frame and returns the current foreground mask as 8-bit binary image.
+            virtual void operator()(const oclMat& image, oclMat& fgmask, float learningRate);
+
+            //! computes a background image
+            virtual void getBackgroundImage(oclMat& backgroundImage) const = 0;
+        };
+                /*!
+        Gaussian Mixture-based Backbround/Foreground Segmentation Algorithm
+
+        The class implements the following algorithm:
+        "An improved adaptive background mixture model for real-time tracking with shadow detection"
+        P. KadewTraKuPong and R. Bowden,
+        Proc. 2nd European Workshp on Advanced Video-Based Surveillance Systems, 2001."
+        http://personal.ee.surrey.ac.uk/Personal/R.Bowden/publications/avbs01/avbs01.pdf
+        */
+        class CV_EXPORTS MOG: public cv::ocl::BackgroundSubtractor
+        {
+        public:
+            //! the default constructor
+            MOG(int nmixtures = -1);
+
+            //! re-initiaization method
+            void initialize(Size frameSize, int frameType);
+
+            //! the update operator
+            void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = 0.f);
+
+            //! computes a background image which are the mean of all background gaussians
+            void getBackgroundImage(oclMat& backgroundImage) const;
+
+            //! releases all inner buffers
+            void release();
+
+            int history;
+            float varThreshold;
+            float backgroundRatio;
+            float noiseSigma;
+
+        private:
+            int nmixtures_;
+
+            Size frameSize_;
+            int frameType_;
+            int nframes_;
+
+            oclMat weight_;
+            oclMat sortKey_;
+            oclMat mean_;
+            oclMat var_;
+        };
+
+        /*!
+        The class implements the following algorithm:
+        "Improved adaptive Gausian mixture model for background subtraction"
+        Z.Zivkovic
+        International Conference Pattern Recognition, UK, August, 2004.
+        http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf
+        */
+        class CV_EXPORTS MOG2: public cv::ocl::BackgroundSubtractor
+        {
+        public:
+            //! the default constructor
+            MOG2(int nmixtures = -1);
+
+            //! re-initiaization method
+            void initialize(Size frameSize, int frameType);
+
+            //! the update operator
+            void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = -1.0f);
+
+            //! computes a background image which are the mean of all background gaussians
+            void getBackgroundImage(oclMat& backgroundImage) const;
+
+            //! releases all inner buffers
+            void release();
+
+            // parameters
+            // you should call initialize after parameters changes
+
+            int history;
+
+            //! here it is the maximum allowed number of mixture components.
+            //! Actual number is determined dynamically per pixel
+            float varThreshold;
+            // threshold on the squared Mahalanobis distance to decide if it is well described
+            // by the background model or not. Related to Cthr from the paper.
+            // This does not influence the update of the background. A typical value could be 4 sigma
+            // and that is varThreshold=4*4=16; Corresponds to Tb in the paper.
+
+            /////////////////////////
+            // less important parameters - things you might change but be carefull
+            ////////////////////////
+
+            float backgroundRatio;
+            // corresponds to fTB=1-cf from the paper
+            // TB - threshold when the component becomes significant enough to be included into
+            // the background model. It is the TB=1-cf from the paper. So I use cf=0.1 => TB=0.
+            // For alpha=0.001 it means that the mode should exist for approximately 105 frames before
+            // it is considered foreground
+            // float noiseSigma;
+            float varThresholdGen;
+
+            //correspondts to Tg - threshold on the squared Mahalan. dist. to decide
+            //when a sample is close to the existing components. If it is not close
+            //to any a new component will be generated. I use 3 sigma => Tg=3*3=9.
+            //Smaller Tg leads to more generated components and higher Tg might make
+            //lead to small number of components but they can grow too large
+            float fVarInit;
+            float fVarMin;
+            float fVarMax;
+
+            //initial variance  for the newly generated components.
+            //It will will influence the speed of adaptation. A good guess should be made.
+            //A simple way is to estimate the typical standard deviation from the images.
+            //I used here 10 as a reasonable value
+            // min and max can be used to further control the variance
+            float fCT; //CT - complexity reduction prior
+            //this is related to the number of samples needed to accept that a component
+            //actually exists. We use CT=0.05 of all the samples. By setting CT=0 you get
+            //the standard Stauffer&Grimson algorithm (maybe not exact but very similar)
+
+            //shadow detection parameters
+            bool bShadowDetection; //default 1 - do shadow detection
+            unsigned char nShadowDetection; //do shadow detection - insert this value as the detection result - 127 default value
+            float fTau;
+            // Tau - shadow threshold. The shadow is detected if the pixel is darker
+            //version of the background. Tau is a threshold on how much darker the shadow can be.
+            //Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow
+            //See: Prati,Mikic,Trivedi,Cucchiarra,"Detecting Moving Shadows...",IEEE PAMI,2003.
+
+        private:
+            int nmixtures_;
+
+            Size frameSize_;
+            int frameType_;
+            int nframes_;
+
+            oclMat weight_;
+            oclMat variance_;
+            oclMat mean_;
+
+            oclMat bgmodelUsedModes_; //keep track of number of modes per pixel
+        };
    }
 }
 #if defined _MSC_VER && _MSC_VER >= 1200
--- a/modules/ocl/perf/perf_bgfg.cpp
+++ b/modules/ocl/perf/perf_bgfg.cpp
@ -0,0 +1,282 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include "perf_precomp.hpp"
+using namespace perf;
+using namespace std;
+using namespace cv::ocl;
+using namespace cv;
+using std::tr1::tuple;
+using std::tr1::get;
+#if defined(HAVE_XINE)         || \
+    defined(HAVE_GSTREAMER)    || \
+    defined(HAVE_QUICKTIME)    || \
+    defined(HAVE_AVFOUNDATION) || \
+    defined(HAVE_FFMPEG)       || \
+    defined(WIN32)
+
+#  define BUILD_WITH_VIDEO_INPUT_SUPPORT 1
+#else
+#  define BUILD_WITH_VIDEO_INPUT_SUPPORT 0
+#endif
+
+#if BUILD_WITH_VIDEO_INPUT_SUPPORT
+static void cvtFrameFmt(vector<Mat>& input, vector<Mat>& output)
+{
+    for(int i = 0; i< (int)(input.size()); i++)
+    {
+        cvtColor(input[i], output[i], COLOR_RGB2GRAY);
+    }
+}
+//prepare data for CPU
+static void prepareData(VideoCapture& cap, int cn, vector<Mat>& frame_buffer)
+{
+    cv::Mat frame;
+    std::vector<Mat> frame_buffer_init;
+    int nFrame = (int)frame_buffer.size();
+    for(int i = 0; i < nFrame; i++)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+        frame_buffer_init.push_back(frame);
+    }
+
+    if(cn == 1)
+        cvtFrameFmt(frame_buffer_init, frame_buffer);
+    else
+        frame_buffer = frame_buffer_init;
+}
+//copy CPU data to GPU
+static void prepareData(vector<Mat>& frame_buffer, vector<oclMat>& frame_buffer_ocl)
+{
+    for(int i = 0; i < (int)frame_buffer.size(); i++)
+        frame_buffer_ocl.push_back(cv::ocl::oclMat(frame_buffer[i]));
+}
+#endif
+///////////// MOG ////////////////////////
+#if BUILD_WITH_VIDEO_INPUT_SUPPORT
+
+typedef tuple<string, int, double> VideoMOGParamType;
+typedef TestBaseWithParam<VideoMOGParamType> VideoMOGFixture;
+
+PERF_TEST_P(VideoMOGFixture, MOG,
+            ::testing::Combine(::testing::Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
+            ::testing::Values(1, 3),
+            ::testing::Values(0.0, 0.01)))
+{
+    VideoMOGParamType params = GetParam();
+
+    const string inputFile = perf::TestBase::getDataPath(get<0>(params));
+    const int cn = get<1>(params);
+    const float learningRate = static_cast<float>(get<2>(params));
+
+    const int nFrame = 5;
+
+    Mat foreground_cpu;
+    std::vector<Mat> frame_buffer(nFrame);
+    std::vector<oclMat> frame_buffer_ocl;
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    prepareData(cap, cn, frame_buffer);
+
+    cv::Mat foreground;
+    cv::ocl::oclMat foreground_d;
+    if(RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE()
+        {
+            cv::Ptr<cv::BackgroundSubtractorMOG> mog = createBackgroundSubtractorMOG();
+            foreground.release();
+            for (int i = 0; i < nFrame; i++)
+            {
+                mog->apply(frame_buffer[i], foreground, learningRate);
+            }
+        }
+        SANITY_CHECK(foreground);
+    }else if(RUN_OCL_IMPL)
+    {
+        prepareData(frame_buffer, frame_buffer_ocl);
+        CV_Assert((int)(frame_buffer_ocl.size()) == nFrame);
+        OCL_TEST_CYCLE()
+        {
+            cv::ocl::MOG d_mog;
+            foreground_d.release();
+            for (int i = 0; i < nFrame; ++i)
+            {
+                d_mog(frame_buffer_ocl[i], foreground_d, learningRate);
+            }
+        }
+        foreground_d.download(foreground);
+        SANITY_CHECK(foreground);
+    }else
+        OCL_PERF_ELSE
+}
+#endif
+
+///////////// MOG2 ////////////////////////
+#if BUILD_WITH_VIDEO_INPUT_SUPPORT
+
+typedef tuple<string, int> VideoMOG2ParamType;
+typedef TestBaseWithParam<VideoMOG2ParamType> VideoMOG2Fixture;
+
+PERF_TEST_P(VideoMOG2Fixture, MOG2,
+            ::testing::Combine(::testing::Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
+            ::testing::Values(1, 3)))
+{
+    VideoMOG2ParamType params = GetParam();
+
+    const string inputFile = perf::TestBase::getDataPath(get<0>(params));
+    const int cn = get<1>(params);
+    int nFrame = 5;
+
+    std::vector<cv::Mat> frame_buffer(nFrame);
+    std::vector<cv::ocl::oclMat> frame_buffer_ocl;
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+    prepareData(cap, cn, frame_buffer);
+    cv::Mat foreground;
+    cv::ocl::oclMat foreground_d;
+
+    if(RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE()
+        {
+            cv::Ptr<cv::BackgroundSubtractorMOG2> mog2 = createBackgroundSubtractorMOG2();
+            mog2->set("detectShadows", false);
+            foreground.release();
+
+            for (int i = 0; i < nFrame; i++)
+            {
+                mog2->apply(frame_buffer[i], foreground);
+            }
+        }
+        SANITY_CHECK(foreground);
+    }else if(RUN_OCL_IMPL)
+    {
+        prepareData(frame_buffer, frame_buffer_ocl);
+        CV_Assert((int)(frame_buffer_ocl.size()) == nFrame);
+        OCL_TEST_CYCLE()
+        {
+            cv::ocl::MOG2 d_mog2;
+            foreground_d.release();
+            for (int i = 0; i < nFrame; i++)
+            {
+                d_mog2(frame_buffer_ocl[i], foreground_d);
+            }
+        }
+        foreground_d.download(foreground);
+        SANITY_CHECK(foreground);
+    }else
+        OCL_PERF_ELSE
+}
+#endif
+
+///////////// MOG2_GetBackgroundImage //////////////////
+#if BUILD_WITH_VIDEO_INPUT_SUPPORT
+
+typedef TestBaseWithParam<VideoMOG2ParamType> Video_MOG2GetBackgroundImage;
+
+PERF_TEST_P(Video_MOG2GetBackgroundImage, MOG2,
+            ::testing::Combine(::testing::Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
+            ::testing::Values(3)))
+{
+    VideoMOG2ParamType params = GetParam();
+
+    const string inputFile = perf::TestBase::getDataPath(get<0>(params));
+    const int cn = get<1>(params);
+    int nFrame = 5;
+
+    std::vector<cv::Mat> frame_buffer(nFrame);
+    std::vector<cv::ocl::oclMat> frame_buffer_ocl;
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    prepareData(cap, cn, frame_buffer);
+
+    cv::Mat foreground;
+    cv::Mat background;
+    cv::ocl::oclMat foreground_d;
+    cv::ocl::oclMat background_d;
+
+    if(RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE()
+        {
+            cv::Ptr<cv::BackgroundSubtractorMOG2> mog2 = createBackgroundSubtractorMOG2();
+            mog2->set("detectShadows", false);
+            foreground.release();
+            background.release();
+            for (int i = 0; i < nFrame; i++)
+            {
+                mog2->apply(frame_buffer[i], foreground);
+            }
+            mog2->getBackgroundImage(background);
+        }
+        SANITY_CHECK(background);
+    }else if(RUN_OCL_IMPL)
+    {
+        prepareData(frame_buffer, frame_buffer_ocl);
+        CV_Assert((int)(frame_buffer_ocl.size()) == nFrame);
+        OCL_TEST_CYCLE()
+        {
+            cv::ocl::MOG2 d_mog2;
+            foreground_d.release();
+            background_d.release();
+            for (int i = 0; i < nFrame; i++)
+            {
+                d_mog2(frame_buffer_ocl[i], foreground_d);
+            }
+            d_mog2.getBackgroundImage(background_d);
+        }
+        background_d.download(background);
+        SANITY_CHECK(background);
+    }else
+        OCL_PERF_ELSE
+}
+#endif
--- a/modules/ocl/perf/perf_fft.cpp
+++ b/modules/ocl/perf/perf_fft.cpp
@ -43,6 +43,7 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
+
 #include "perf_precomp.hpp"

 using namespace perf;
@ -51,7 +52,9 @@ using namespace perf;

 typedef TestBaseWithParam<Size> dftFixture;

-PERF_TEST_P(dftFixture, DISABLED_dft, OCL_TYPICAL_MAT_SIZES) // TODO not implemented
+#ifdef HAVE_CLAMDFFT
+
+PERF_TEST_P(dftFixture, dft, OCL_TYPICAL_MAT_SIZES)
 {
    const Size srcSize = GetParam();

@ -70,7 +73,7 @@ PERF_TEST_P(dftFixture, DISABLED_dft, OCL_TYPICAL_MAT_SIZES) // TODO not impleme

        oclDst.download(dst);

-        SANITY_CHECK(dst);
+        SANITY_CHECK(dst, 1.5);
    }
    else if (RUN_PLAIN_IMPL)
    {
@ -81,3 +84,5 @@ PERF_TEST_P(dftFixture, DISABLED_dft, OCL_TYPICAL_MAT_SIZES) // TODO not impleme
    else
        OCL_PERF_ELSE
 }
+
+#endif
--- a/modules/ocl/perf/perf_filters.cpp
+++ b/modules/ocl/perf/perf_filters.cpp
@ -321,3 +321,82 @@ PERF_TEST_P(filter2DFixture, filter2D,
    else
        OCL_PERF_ELSE
 }
+
+///////////// Bilateral////////////////////////
+
+typedef Size_MatType BilateralFixture;
+
+PERF_TEST_P(BilateralFixture, Bilateral,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC3)))
+{
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params), d = 7;
+    double sigmacolor = 50.0, sigmaspace = 50.0;
+
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
+
+    if (srcSize == OCL_SIZE_4000 && type == CV_8UC3)
+        declare.time(8);
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
+
+        OCL_TEST_CYCLE() cv::ocl::bilateralFilter(oclSrc, oclDst, d, sigmacolor, sigmaspace);
+
+        oclDst.download(dst);
+
+        SANITY_CHECK(dst);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::bilateralFilter(src, dst, d, sigmacolor, sigmaspace);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
+}
+
+///////////// adaptiveBilateral////////////////////////
+
+typedef Size_MatType adaptiveBilateralFixture;
+
+PERF_TEST_P(adaptiveBilateralFixture, adaptiveBilateral,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC3)))
+{
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
+    double sigmaspace = 10.0;
+    Size ksize(9,9);
+
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
+
+    if (srcSize == OCL_SIZE_4000)
+        declare.time(15);
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
+
+        OCL_TEST_CYCLE() cv::ocl::adaptiveBilateralFilter(oclSrc, oclDst, ksize, sigmaspace);
+
+        oclDst.download(dst);
+
+        SANITY_CHECK(dst, 1.);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::adaptiveBilateralFilter(src, dst, ksize, sigmaspace);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
+}
--- a/modules/ocl/perf/perf_gemm.cpp
+++ b/modules/ocl/perf/perf_gemm.cpp
@ -51,8 +51,9 @@ using namespace perf;

 typedef TestBaseWithParam<Size> gemmFixture;

-PERF_TEST_P(gemmFixture, DISABLED_gemm,
-            ::testing::Values(OCL_SIZE_1000, OCL_SIZE_2000)) // TODO not implemented
+#ifdef HAVE_CLAMDBLAS
+
+PERF_TEST_P(gemmFixture, gemm, ::testing::Values(OCL_SIZE_1000, OCL_SIZE_2000))
 {
    const Size srcSize = GetParam();

@ -72,14 +73,16 @@ PERF_TEST_P(gemmFixture, DISABLED_gemm,

        oclDst.download(dst);

-        SANITY_CHECK(dst);
+        SANITY_CHECK(dst, 0.01);
    }
    else if (RUN_PLAIN_IMPL)
    {
        TEST_CYCLE() cv::gemm(src1, src2, 1.0, src3, 1.0, dst);

-        SANITY_CHECK(dst);
+        SANITY_CHECK(dst, 0.01);
    }
    else
        OCL_PERF_ELSE
 }
+
+#endif
--- a/modules/ocl/perf/perf_precomp.hpp
+++ b/modules/ocl/perf/perf_precomp.hpp
@ -67,6 +67,7 @@
 #include <vector>
 #include <numeric>

+#include "cvconfig.h"
 #include "opencv2/core.hpp"
 #include "opencv2/core/utility.hpp"
 #include "opencv2/imgproc.hpp"
@ -102,7 +103,7 @@ using namespace cv;

 #ifdef HAVE_OPENCV_GPU
 #define OCL_PERF_ELSE               \
-        if (RUN_GPU_IMPL)          \
+        if (RUN_GPU_IMPL)           \
            CV_TEST_FAIL_NO_IMPL(); \
        else                        \
            CV_TEST_FAIL_NO_IMPL();
--- a/modules/ocl/src/bgfg_mog.cpp
+++ b/modules/ocl/src/bgfg_mog.cpp
@ -0,0 +1,638 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Jin Ma, jin@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+using namespace cv;
+using namespace cv::ocl;
+namespace cv
+{
+    namespace ocl
+    {
+        extern const char* bgfg_mog;
+
+        typedef struct _contant_struct
+        {
+            cl_float c_Tb;
+            cl_float c_TB;
+            cl_float c_Tg;
+            cl_float c_varInit;
+            cl_float c_varMin;
+            cl_float c_varMax;
+            cl_float c_tau;
+            cl_uchar c_shadowVal;
+        }contant_struct;
+
+        cl_mem cl_constants = NULL;
+        float c_TB;
+    }
+}
+
+#if defined _MSC_VER
+#define snprintf sprintf_s
+#endif
+
+namespace cv { namespace ocl { namespace device
+{
+    namespace mog
+    {
+        void mog_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var,
+            int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma);
+
+        void getBackgroundImage_ocl(int cn, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures, float backgroundRatio);
+
+        void loadConstants(float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau,
+                            unsigned char shadowVal);
+
+        void mog2_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& modesUsed, oclMat& weight, oclMat& variance, oclMat& mean,
+                      float alphaT, float prune, bool detectShadows, int nmixtures);
+
+        void getBackgroundImage2_ocl(int cn, const oclMat& modesUsed, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures);
+    }
+}}}
+
+namespace mog
+{
+    const int defaultNMixtures = 5;
+    const int defaultHistory = 200;
+    const float defaultBackgroundRatio = 0.7f;
+    const float defaultVarThreshold = 2.5f * 2.5f;
+    const float defaultNoiseSigma = 30.0f * 0.5f;
+    const float defaultInitialWeight = 0.05f;
+}
+void cv::ocl::BackgroundSubtractor::operator()(const oclMat&, oclMat&, float)
+{
+
+}
+cv::ocl::BackgroundSubtractor::~BackgroundSubtractor()
+{
+
+}
+
+cv::ocl::MOG::MOG(int nmixtures) :
+frameSize_(0, 0), frameType_(0), nframes_(0)
+{
+    nmixtures_ = std::min(nmixtures > 0 ? nmixtures : mog::defaultNMixtures, 8);
+    history = mog::defaultHistory;
+    varThreshold = mog::defaultVarThreshold;
+    backgroundRatio = mog::defaultBackgroundRatio;
+    noiseSigma = mog::defaultNoiseSigma;
+}
+
+void cv::ocl::MOG::initialize(cv::Size frameSize, int frameType)
+{
+    CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4);
+
+    frameSize_ = frameSize;
+    frameType_ = frameType;
+
+    int ch = CV_MAT_CN(frameType);
+    int work_ch = ch;
+
+    // for each gaussian mixture of each pixel bg model we store
+    // the mixture sort key (w/sum_of_variances), the mixture weight (w),
+    // the mean (nchannels values) and
+    // the diagonal covariance matrix (another nchannels values)
+
+    weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
+    sortKey_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
+    mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch));
+    var_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch));
+
+    weight_.setTo(cv::Scalar::all(0));
+    sortKey_.setTo(cv::Scalar::all(0));
+    mean_.setTo(cv::Scalar::all(0));
+    var_.setTo(cv::Scalar::all(0));
+
+    nframes_ = 0;
+}
+
+void cv::ocl::MOG::operator()(const cv::ocl::oclMat& frame, cv::ocl::oclMat& fgmask, float learningRate)
+{
+    using namespace cv::ocl::device::mog;
+
+    CV_Assert(frame.depth() == CV_8U);
+
+    int ch = frame.oclchannels();
+    int work_ch = ch;
+
+    if (nframes_ == 0 || learningRate >= 1.0 || frame.size() != frameSize_ || work_ch != mean_.oclchannels())
+        initialize(frame.size(), frame.type());
+
+    fgmask.create(frameSize_, CV_8UC1);
+
+    ++nframes_;
+    learningRate = learningRate >= 0.0f && nframes_ > 1 ? learningRate : 1.0f / std::min(nframes_, history);
+    CV_Assert(learningRate >= 0.0f);
+
+    mog_ocl(frame, ch, fgmask, weight_, sortKey_, mean_, var_, nmixtures_,
+        varThreshold, learningRate, backgroundRatio, noiseSigma);
+}
+
+void cv::ocl::MOG::getBackgroundImage(oclMat& backgroundImage) const
+{
+    using namespace cv::ocl::device::mog;
+
+    backgroundImage.create(frameSize_, frameType_);
+
+    cv::ocl::device::mog::getBackgroundImage_ocl(backgroundImage.oclchannels(), weight_, mean_, backgroundImage, nmixtures_, backgroundRatio);
+}
+
+void cv::ocl::MOG::release()
+{
+    frameSize_ = Size(0, 0);
+    frameType_ = 0;
+    nframes_ = 0;
+
+    weight_.release();
+    sortKey_.release();
+    mean_.release();
+    var_.release();
+    clReleaseMemObject(cl_constants);
+}
+
+static void mog_withoutLearning(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& mean, oclMat& var,
+    int nmixtures, float varThreshold, float backgroundRatio)
+{
+    Context* clCxt = Context::getContext();
+
+    size_t local_thread[] = {32, 8, 1};
+    size_t global_thread[] = {frame.cols, frame.rows, 1};
+
+    int frame_step = (int)(frame.step/frame.elemSize());
+    int fgmask_step = (int)(fgmask.step/fgmask.elemSize());
+    int weight_step = (int)(weight.step/weight.elemSize());
+    int mean_step = (int)(mean.step/mean.elemSize());
+    int var_step = (int)(var.step/var.elemSize());
+
+    int fgmask_offset_y = (int)(fgmask.offset/fgmask.step);
+    int fgmask_offset_x = (int)(fgmask.offset%fgmask.step);
+    fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize();
+
+    int frame_offset_y = (int)(frame.offset/frame.step);
+    int frame_offset_x = (int)(frame.offset%frame.step);
+    frame_offset_x = frame_offset_x/(int)frame.elemSize();
+
+    char build_option[50];
+    if(cn == 1)
+    {
+        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
+    }else
+    {
+        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
+    }
+
+    String kernel_name = "mog_withoutLearning_kernel";
+    std::vector<std::pair<size_t, const void*> > args;
+
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&frame.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&fgmask.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&var.data));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.rows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.cols));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&var_step));
+
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&varThreshold));
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&backgroundRatio));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_x));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_y));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_x));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_y));
+
+    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
+}
+
+
+static void mog_withLearning(const oclMat& frame, int cn, oclMat& fgmask_raw, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var,
+    int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar)
+{
+    Context* clCxt = Context::getContext();
+
+    size_t local_thread[] = {32, 8, 1};
+    size_t global_thread[] = {frame.cols, frame.rows, 1};
+
+    oclMat fgmask(fgmask_raw.size(), CV_32SC1);
+
+    int frame_step = (int)(frame.step/frame.elemSize());
+    int fgmask_step = (int)(fgmask.step/fgmask.elemSize());
+    int weight_step = (int)(weight.step/weight.elemSize());
+    int sortKey_step = (int)(sortKey.step/sortKey.elemSize());
+    int mean_step = (int)(mean.step/mean.elemSize());
+    int var_step = (int)(var.step/var.elemSize());
+
+    int fgmask_offset_y = (int)(fgmask.offset/fgmask.step);
+    int fgmask_offset_x = (int)(fgmask.offset%fgmask.step);
+    fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize();
+
+    int frame_offset_y = (int)(frame.offset/frame.step);
+    int frame_offset_x = (int)(frame.offset%frame.step);
+    frame_offset_x = frame_offset_x/(int)frame.elemSize();
+
+    char build_option[50];
+    if(cn == 1)
+    {
+        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
+    }else
+    {
+        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
+    }
+
+    String kernel_name = "mog_withLearning_kernel";
+    std::vector<std::pair<size_t, const void*> > args;
+
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&frame.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&fgmask.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&sortKey.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&var.data));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.rows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.cols));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&sortKey_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&var_step));
+
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&varThreshold));
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&backgroundRatio));
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&learningRate));
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&minVar));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_x));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_y));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_x));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_y));
+
+    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
+    fgmask.convertTo(fgmask, CV_8U);
+    fgmask.copyTo(fgmask_raw);
+}
+
+void cv::ocl::device::mog::mog_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var,
+    int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma)
+{
+    const float minVar = noiseSigma * noiseSigma;
+
+    if(learningRate > 0.0f)
+        mog_withLearning(frame, cn, fgmask, weight, sortKey, mean, var, nmixtures,
+                         varThreshold, backgroundRatio, learningRate, minVar);
+    else
+        mog_withoutLearning(frame, cn, fgmask, weight, mean, var, nmixtures, varThreshold, backgroundRatio);
+}
+
+void cv::ocl::device::mog::getBackgroundImage_ocl(int cn, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures, float backgroundRatio)
+{
+    Context* clCxt = Context::getContext();
+
+    size_t local_thread[] = {32, 8, 1};
+    size_t global_thread[] = {dst.cols, dst.rows, 1};
+
+    int weight_step = (int)(weight.step/weight.elemSize());
+    int mean_step = (int)(mean.step/mean.elemSize());
+    int dst_step = (int)(dst.step/dst.elemSize());
+
+    char build_option[50];
+    if(cn == 1)
+    {
+        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
+    }else
+    {
+        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
+    }
+
+    String kernel_name = "getBackgroundImage_kernel";
+    std::vector<std::pair<size_t, const void*> > args;
+
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&dst.data));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst.rows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst.cols));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_step));
+
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&backgroundRatio));
+
+    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
+}
+
+void cv::ocl::device::mog::loadConstants(float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal)
+{
+    varMin = cv::min(varMin, varMax);
+    varMax = cv::max(varMin, varMax);
+
+    c_TB = TB;
+
+    _contant_struct *constants = new _contant_struct;
+    constants->c_Tb = Tb;
+    constants->c_TB = TB;
+    constants->c_Tg = Tg;
+    constants->c_varInit = varInit;
+    constants->c_varMin = varMin;
+    constants->c_varMax = varMax;
+    constants->c_tau = tau;
+    constants->c_shadowVal = shadowVal;
+
+    cl_constants = load_constant(*((cl_context*)getoclContext()), *((cl_command_queue*)getoclCommandQueue()),
+        (void *)constants, sizeof(_contant_struct));
+}
+
+void cv::ocl::device::mog::mog2_ocl(const oclMat& frame, int cn, oclMat& fgmaskRaw, oclMat& modesUsed, oclMat& weight, oclMat& variance,
+                                oclMat& mean, float alphaT, float prune, bool detectShadows, int nmixtures)
+{
+    oclMat fgmask(fgmaskRaw.size(), CV_32SC1);
+
+    Context* clCxt = Context::getContext();
+
+    const float alpha1 = 1.0f - alphaT;
+
+    cl_int detectShadows_flag = 0;
+    if(detectShadows)
+        detectShadows_flag = 1;
+
+    size_t local_thread[] = {32, 8, 1};
+    size_t global_thread[] = {frame.cols, frame.rows, 1};
+
+    int frame_step = (int)(frame.step/frame.elemSize());
+    int fgmask_step = (int)(fgmask.step/fgmask.elemSize());
+    int weight_step = (int)(weight.step/weight.elemSize());
+    int modesUsed_step = (int)(modesUsed.step/modesUsed.elemSize());
+    int mean_step = (int)(mean.step/mean.elemSize());
+    int var_step = (int)(variance.step/variance.elemSize());
+
+    int fgmask_offset_y = (int)(fgmask.offset/fgmask.step);
+    int fgmask_offset_x = (int)(fgmask.offset%fgmask.step);
+    fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize();
+
+    int frame_offset_y = (int)(frame.offset/frame.step);
+    int frame_offset_x = (int)(frame.offset%frame.step);
+    frame_offset_x = frame_offset_x/(int)frame.elemSize();
+
+    String kernel_name = "mog2_kernel";
+    std::vector<std::pair<size_t, const void*> > args;
+
+    char build_option[50];
+    if(cn == 1)
+    {
+        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
+    }else
+    {
+        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
+    }
+
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&frame.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&fgmask.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&modesUsed.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&variance.data));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.rows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.cols));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&var_step));
+
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&alphaT));
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&alpha1));
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&prune));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&detectShadows_flag));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_x));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_y));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_x));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_y));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&cl_constants));
+
+    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
+
+    fgmask.convertTo(fgmask, CV_8U);
+    fgmask.copyTo(fgmaskRaw);
+}
+
+void cv::ocl::device::mog::getBackgroundImage2_ocl(int cn, const oclMat& modesUsed, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures)
+{
+    Context* clCxt = Context::getContext();
+
+    size_t local_thread[] = {32, 8, 1};
+    size_t global_thread[] = {modesUsed.cols, modesUsed.rows, 1};
+
+    int weight_step = (int)(weight.step/weight.elemSize());
+    int modesUsed_step = (int)(modesUsed.step/modesUsed.elemSize());
+    int mean_step = (int)(mean.step/mean.elemSize());
+    int dst_step = (int)(dst.step/dst.elemSize());
+
+    int dst_y = (int)(dst.offset/dst.step);
+    int dst_x = (int)(dst.offset%dst.step);
+    dst_x = dst_x/(int)dst.elemSize();
+
+    String kernel_name = "getBackgroundImage2_kernel";
+    std::vector<std::pair<size_t, const void*> > args;
+
+    char build_option[50];
+    if(cn == 1)
+    {
+        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
+    }else
+    {
+        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
+    }
+
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&modesUsed.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&dst.data));
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&c_TB));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed.rows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed.cols));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_step));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_x));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_y));
+
+    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
+}
+
+/////////////////////////////////////////////////////////////////
+// MOG2
+
+namespace mog2
+{
+    // default parameters of gaussian background detection algorithm
+    const int defaultHistory = 500; // Learning rate; alpha = 1/defaultHistory2
+    const float defaultVarThreshold = 4.0f * 4.0f;
+    const int defaultNMixtures = 5; // maximal number of Gaussians in mixture
+    const float defaultBackgroundRatio = 0.9f; // threshold sum of weights for background test
+    const float defaultVarThresholdGen = 3.0f * 3.0f;
+    const float defaultVarInit = 15.0f; // initial variance for new components
+    const float defaultVarMax = 5.0f * defaultVarInit;
+    const float defaultVarMin = 4.0f;
+
+    // additional parameters
+    const float defaultfCT = 0.05f; // complexity reduction prior constant 0 - no reduction of number of components
+    const unsigned char defaultnShadowDetection = 127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection
+    const float defaultfTau = 0.5f; // Tau - shadow threshold, see the paper for explanation
+}
+
+cv::ocl::MOG2::MOG2(int nmixtures) : frameSize_(0, 0), frameType_(0), nframes_(0)
+{
+    nmixtures_ = nmixtures > 0 ? nmixtures : mog2::defaultNMixtures;
+
+    history = mog2::defaultHistory;
+    varThreshold = mog2::defaultVarThreshold;
+    bShadowDetection = true;
+
+    backgroundRatio = mog2::defaultBackgroundRatio;
+    fVarInit = mog2::defaultVarInit;
+    fVarMax  = mog2::defaultVarMax;
+    fVarMin = mog2::defaultVarMin;
+
+    varThresholdGen = mog2::defaultVarThresholdGen;
+    fCT = mog2::defaultfCT;
+    nShadowDetection =  mog2::defaultnShadowDetection;
+    fTau = mog2::defaultfTau;
+}
+
+void cv::ocl::MOG2::initialize(cv::Size frameSize, int frameType)
+{
+    using namespace cv::ocl::device::mog;
+    CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4);
+
+    frameSize_ = frameSize;
+    frameType_ = frameType;
+    nframes_ = 0;
+
+    int ch = CV_MAT_CN(frameType);
+    int work_ch = ch;
+
+    // for each gaussian mixture of each pixel bg model we store ...
+    // the mixture weight (w),
+    // the mean (nchannels values) and
+    // the covariance
+    weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
+    weight_.setTo(Scalar::all(0));
+
+    variance_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
+    variance_.setTo(Scalar::all(0));
+
+    mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch)); //4 channels
+    mean_.setTo(Scalar::all(0));
+
+    //make the array for keeping track of the used modes per pixel - all zeros at start
+    bgmodelUsedModes_.create(frameSize_, CV_32FC1);
+    bgmodelUsedModes_.setTo(cv::Scalar::all(0));
+
+    loadConstants(varThreshold, backgroundRatio, varThresholdGen, fVarInit, fVarMin, fVarMax, fTau, nShadowDetection);
+}
+
+void cv::ocl::MOG2::operator()(const oclMat& frame, oclMat& fgmask, float learningRate)
+{
+    using namespace cv::ocl::device::mog;
+
+    int ch = frame.oclchannels();
+    int work_ch = ch;
+
+    if (nframes_ == 0 || learningRate >= 1.0f || frame.size() != frameSize_ || work_ch != mean_.oclchannels())
+        initialize(frame.size(), frame.type());
+
+    fgmask.create(frameSize_, CV_8UC1);
+    fgmask.setTo(cv::Scalar::all(0));
+
+    ++nframes_;
+    learningRate = learningRate >= 0.0f && nframes_ > 1 ? learningRate : 1.0f / std::min(2 * nframes_, history);
+    CV_Assert(learningRate >= 0.0f);
+
+    mog2_ocl(frame, frame.oclchannels(), fgmask, bgmodelUsedModes_, weight_, variance_, mean_, learningRate, -learningRate * fCT, bShadowDetection, nmixtures_);
+}
+
+void cv::ocl::MOG2::getBackgroundImage(oclMat& backgroundImage) const
+{
+    using namespace cv::ocl::device::mog;
+
+    backgroundImage.create(frameSize_, frameType_);
+
+    cv::ocl::device::mog::getBackgroundImage2_ocl(backgroundImage.oclchannels(), bgmodelUsedModes_, weight_, mean_, backgroundImage, nmixtures_);
+}
+
+void cv::ocl::MOG2::release()
+{
+    frameSize_ = Size(0, 0);
+    frameType_ = 0;
+    nframes_ = 0;
+
+    weight_.release();
+    variance_.release();
+    mean_.release();
+
+    bgmodelUsedModes_.release();
+}
--- a/modules/ocl/src/filtering.cpp
+++ b/modules/ocl/src/filtering.cpp
@ -63,6 +63,7 @@ extern const char *filter_sep_row;
 extern const char *filter_sep_col;
 extern const char *filtering_laplacian;
 extern const char *filtering_morph;
+extern const char *filtering_adaptive_bilateral;
 }
 }

@ -1616,3 +1617,100 @@ void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double si
    Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype);
    f->apply(src, dst);
 }
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Adaptive Bilateral Filter
+
+void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, Point anchor, int borderType)
+{
+    CV_Assert((ksize.width & 1) && (ksize.height & 1));  // ksize must be odd
+    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3);  // source must be 8bit RGB image
+    if( sigmaSpace <= 0 )
+        sigmaSpace = 1;
+    Mat lut(Size(ksize.width, ksize.height), CV_32FC1);
+    double sigma2 = sigmaSpace * sigmaSpace;
+    int idx = 0;
+    int w = ksize.width / 2;
+    int h = ksize.height / 2;
+    for(int y=-h; y<=h; y++)
+        for(int x=-w; x<=w; x++)
+    {
+        lut.at<float>(idx++) = sigma2 / (sigma2 + x * x + y * y);
+    }
+    oclMat dlut(lut);
+    int depth = src.depth();
+    int cn = src.oclchannels();
+
+    normalizeAnchor(anchor, ksize);
+    const static String kernelName = "edgeEnhancingFilter";
+
+    dst.create(src.size(), src.type());
+
+    char btype[30];
+    switch(borderType)
+    {
+    case BORDER_CONSTANT:
+        sprintf(btype, "BORDER_CONSTANT");
+        break;
+    case BORDER_REPLICATE:
+        sprintf(btype, "BORDER_REPLICATE");
+        break;
+    case BORDER_REFLECT:
+        sprintf(btype, "BORDER_REFLECT");
+        break;
+    case BORDER_WRAP:
+        sprintf(btype, "BORDER_WRAP");
+        break;
+    case BORDER_REFLECT101:
+        sprintf(btype, "BORDER_REFLECT_101");
+        break;
+    default:
+        CV_Error(CV_StsBadArg, "This border type is not supported");
+        break;
+    }
+
+    //the following constants may be adjusted for performance concerns
+    const static size_t blockSizeX = 64, blockSizeY = 1, EXTRA = ksize.height - 1;
+
+    //Normalize the result by default
+    const float alpha = ksize.height * ksize.width;
+
+    const size_t gSize = blockSizeX - ksize.width / 2 * 2;
+    const size_t globalSizeX = (src.cols) % gSize == 0 ?
+        src.cols / gSize * blockSizeX :
+        (src.cols / gSize + 1) * blockSizeX;
+    const size_t rows_per_thread = 1 + EXTRA;
+    const size_t globalSizeY = ((src.rows + rows_per_thread - 1) / rows_per_thread) % blockSizeY == 0 ?
+        ((src.rows + rows_per_thread - 1) / rows_per_thread) :
+        (((src.rows + rows_per_thread - 1) / rows_per_thread) / blockSizeY + 1) * blockSizeY;
+
+    size_t globalThreads[3] = { globalSizeX, globalSizeY, 1};
+    size_t localThreads[3]  = { blockSizeX, blockSizeY, 1};
+
+    char build_options[250];
+
+    //LDATATYPESIZE is sizeof local data store. This is to exemplify effect of LDS on kernel performance
+    sprintf(build_options,
+        "-D VAR_PER_CHANNEL=1 -D CALCVAR=1 -D FIXED_WEIGHT=0 -D EXTRA=%d"
+        " -D THREADS=%d -D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s",
+        static_cast<int>(EXTRA), static_cast<int>(blockSizeX), anchor.x, anchor.y, ksize.width, ksize.height, btype);
+
+    std::vector<std::pair<size_t , const void *> > args;
+    args.push_back(std::make_pair(sizeof(cl_mem), &src.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), &dst.data));
+    args.push_back(std::make_pair(sizeof(cl_float), (void *)&alpha));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.offset));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholerows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholecols));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.offset));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
+    args.push_back(std::make_pair(sizeof(cl_mem), &dlut.data));
+    int lut_step = dlut.step1();
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&lut_step));
+
+    openCLExecuteKernel(Context::getContext(), &filtering_adaptive_bilateral, kernelName,
+        globalThreads, localThreads, args, cn, depth, build_options);
+}
--- a/modules/ocl/src/gemm.cpp
+++ b/modules/ocl/src/gemm.cpp
@ -46,16 +46,62 @@
 #include <iomanip>
 #include "precomp.hpp"

+namespace cv { namespace ocl {
+
+// used for clAmdBlas library to avoid redundant setup/teardown
+void clBlasSetup();
+void clBlasTeardown();
+
+}} /* namespace cv { namespace ocl */
+
+
 #if !defined HAVE_CLAMDBLAS
 void cv::ocl::gemm(const oclMat&, const oclMat&, double,
                   const oclMat&, double, oclMat&, int)
 {
    CV_Error(Error::StsNotImplemented, "OpenCL BLAS is not implemented");
 }
+
+void cv::ocl::clBlasSetup()
+{
+    CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented");
+}
+
+void cv::ocl::clBlasTeardown()
+{
+    //intentionally do nothing
+}
+
 #else
 #include "clAmdBlas.h"
 using namespace cv;

+static bool clBlasInitialized = false;
+static Mutex cs;
+
+void cv::ocl::clBlasSetup()
+{
+    if(!clBlasInitialized)
+    {
+        AutoLock al(cs);
+        if(!clBlasInitialized)
+        {
+            openCLSafeCall(clAmdBlasSetup());
+            clBlasInitialized = true;
+        }
+    }
+}
+
+void cv::ocl::clBlasTeardown()
+{
+    AutoLock al(cs);
+    if(clBlasInitialized)
+    {
+        clAmdBlasTeardown();
+        clBlasInitialized = false;
+    }
+}
+
 void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
                   const oclMat &src3, double beta, oclMat &dst, int flags)
 {
@ -71,7 +117,8 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
        dst.create(src1.rows, src2.cols, src1.type());
        dst.setTo(Scalar::all(0));
    }
-    openCLSafeCall( clAmdBlasSetup() );
+
+    clBlasSetup();

    const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans;
    const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans;
@ -156,6 +203,5 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
    }
    break;
    }
-    clAmdBlasTeardown();
 }
 #endif
--- a/modules/ocl/src/initialization.cpp
+++ b/modules/ocl/src/initialization.cpp
@ -65,6 +65,7 @@ namespace cv
    namespace ocl
    {
        extern void fft_teardown();
+        extern void clBlasTeardown();
        /*
         * The binary caching system to eliminate redundant program source compilation.
         * Strictly, this is not a cache because we do not implement evictions right now.
@ -1058,6 +1059,7 @@ namespace cv
        void Info::release()
        {
            fft_teardown();
+            clBlasTeardown();
            impl->release();
            impl = new Impl;
            DeviceName.clear();
@ -1067,6 +1069,7 @@ namespace cv
        Info::~Info()
        {
            fft_teardown();
+            clBlasTeardown();
            impl->release();
        }

--- a/modules/ocl/src/opencl/bgfg_mog.cl
+++ b/modules/ocl/src/opencl/bgfg_mog.cl
@ -0,0 +1,535 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Jin Ma jin@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if defined (CN1)
+#define T_FRAME uchar
+#define T_MEAN_VAR float
+#define CONVERT_TYPE convert_uchar_sat
+#define F_ZERO (0.0f)
+float cvt(uchar val)
+{
+    return val;
+}
+
+float sqr(float val)
+{
+    return val * val;
+}
+
+float sum(float val)
+{
+    return val;
+}
+
+float clamp1(float var, float learningRate, float diff, float minVar)
+{
+    return fmax(var + learningRate * (diff * diff - var), minVar);
+}
+#else
+#define T_FRAME uchar4
+#define T_MEAN_VAR float4
+#define CONVERT_TYPE convert_uchar4_sat
+#define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f)
+float4 cvt(const uchar4 val)
+{
+    float4 result;
+    result.x = val.x;
+    result.y = val.y;
+    result.z = val.z;
+    result.w = val.w;
+
+    return result;
+}
+
+float sqr(const float4 val)
+{
+    return val.x * val.x + val.y * val.y + val.z * val.z;
+}
+
+float sum(const float4 val)
+{
+    return (val.x + val.y + val.z);
+}
+
+float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar)
+{
+    float4 result;
+    result.x = fmax(var.x + learningRate * (diff.x * diff.x - var.x), minVar);
+    result.y = fmax(var.y + learningRate * (diff.y * diff.y - var.y), minVar);
+    result.z = fmax(var.z + learningRate * (diff.z * diff.z - var.z), minVar);
+    result.w = 0.0f;
+    return result;
+}
+#endif
+
+typedef struct
+{
+    float c_Tb;
+    float c_TB;
+    float c_Tg;
+    float c_varInit;
+    float c_varMin;
+    float c_varMax;
+    float c_tau;
+    uchar c_shadowVal;
+}con_srtuct_t;
+
+void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
+{
+    float val = ptr[(k * rows + y) * ptr_step + x];
+    ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
+    ptr[((k + 1) * rows + y) * ptr_step + x] = val;
+}
+
+void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
+{
+    float4 val = ptr[(k * rows + y) * ptr_step + x];
+    ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
+    ptr[((k + 1) * rows + y) * ptr_step + x] = val;
+}
+
+__kernel void mog_withoutLearning_kernel(__global T_FRAME* frame, __global uchar* fgmask,
+    __global float* weight, __global T_MEAN_VAR* mean, __global T_MEAN_VAR* var,
+    int frame_row, int frame_col, int frame_step, int fgmask_step,
+    int weight_step, int mean_step, int var_step,
+    float varThreshold, float backgroundRatio, int fgmask_offset_x,
+    int fgmask_offset_y, int frame_offset_x, int frame_offset_y)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (x < frame_col && y < frame_row)
+    {
+        T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + (x + frame_offset_x)]);
+
+        int kHit = -1;
+        int kForeground = -1;
+
+        for (int k = 0; k < (NMIXTURES); ++k)
+        {
+            if (weight[(k * frame_row + y) * weight_step + x] < 1.192092896e-07f)
+                break;
+
+            T_MEAN_VAR mu = mean[(k * frame_row + y) * mean_step + x];
+            T_MEAN_VAR _var = var[(k * frame_row + y) + var_step + x];
+
+            T_MEAN_VAR diff = pix - mu;
+
+            if (sqr(diff) < varThreshold * sum(_var))
+            {
+                kHit = k;
+                break;
+            }
+        }
+
+        if (kHit >= 0)
+        {
+            float wsum = 0.0f;
+            for (int k = 0; k < (NMIXTURES); ++k)
+            {
+                wsum += weight[(k * frame_row + y) * weight_step + x];
+
+                if (wsum > backgroundRatio)
+                {
+                    kForeground = k + 1;
+                    break;
+                }
+            }
+        }
+        if(kHit < 0 || kHit >= kForeground)
+            fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar) (-1);
+        else
+            fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar) (0);
+    }
+}
+
+__kernel void mog_withLearning_kernel(__global T_FRAME* frame, __global int* fgmask,
+    __global float* weight, __global float* sortKey, __global T_MEAN_VAR* mean,
+    __global T_MEAN_VAR* var, int frame_row, int frame_col, int frame_step, int fgmask_step,
+    int weight_step, int sortKey_step, int mean_step, int var_step,
+    float varThreshold, float backgroundRatio, float learningRate, float minVar,
+    int fgmask_offset_x, int fgmask_offset_y, int frame_offset_x, int frame_offset_y)
+{
+    const float w0 = 0.05f;
+    const float sk0 = w0 / 30.0f;
+    const float var0 = 900.f;
+
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x >= frame_col || y >= frame_row) return;
+    float wsum = 0.0f;
+    int kHit = -1;
+    int kForeground = -1;
+    int k = 0;
+
+    T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + (x + frame_offset_x)]);
+
+    for (; k < (NMIXTURES); ++k)
+    {
+        float w = weight[(k * frame_row + y) * weight_step + x];
+        wsum += w;
+
+        if (w < 1.192092896e-07f)
+            break;
+
+        T_MEAN_VAR mu = mean[(k * frame_row + y) * mean_step + x];
+        T_MEAN_VAR _var = var[(k * frame_row + y) * var_step + x];
+
+        float sortKey_prev, weight_prev;
+        T_MEAN_VAR mean_prev, var_prev;
+        if (sqr(pix - mu) < varThreshold * sum(_var))
+        {
+            wsum -= w;
+            float dw = learningRate * (1.0f - w);
+
+            _var = clamp1(_var, learningRate, pix - mu, minVar);
+
+            sortKey_prev = w / sqr(sum(_var));
+            sortKey[(k * frame_row + y) * sortKey_step + x] = sortKey_prev;
+
+            weight_prev = w + dw;
+            weight[(k * frame_row + y) * weight_step + x] = weight_prev;
+
+            mean_prev = mu + learningRate * (pix - mu);
+            mean[(k * frame_row + y) * mean_step + x] = mean_prev;
+
+            var_prev = _var;
+            var[(k * frame_row + y) * var_step + x] = var_prev;
+        }
+
+        int k1 = k - 1;
+
+        if (k1 >= 0 && sqr(pix - mu) < varThreshold * sum(_var))
+        {
+            float sortKey_next = sortKey[(k1 * frame_row + y) * sortKey_step + x];
+            float weight_next = weight[(k1 * frame_row + y) * weight_step + x];
+            T_MEAN_VAR mean_next = mean[(k1 * frame_row + y) * mean_step + x];
+            T_MEAN_VAR var_next = var[(k1 * frame_row + y) * var_step + x];
+
+            for (; sortKey_next < sortKey_prev && k1 >= 0; --k1)
+            {
+                sortKey[(k1 * frame_row + y) * sortKey_step + x] = sortKey_prev;
+                sortKey[((k1 + 1) * frame_row + y) * sortKey_step + x] = sortKey_next;
+
+                weight[(k1 * frame_row + y) * weight_step + x] = weight_prev;
+                weight[((k1 + 1) * frame_row + y) * weight_step + x] = weight_next;
+
+                mean[(k1 * frame_row + y) * mean_step + x] = mean_prev;
+                mean[((k1 + 1) * frame_row + y) * mean_step + x] = mean_next;
+
+                var[(k1 * frame_row + y) * var_step + x] = var_prev;
+                var[((k1 + 1) * frame_row + y) * var_step + x] = var_next;
+
+                sortKey_prev = sortKey_next;
+                sortKey_next = k1 > 0 ? sortKey[((k1 - 1) * frame_row + y) * sortKey_step + x] : 0.0f;
+
+                weight_prev = weight_next;
+                weight_next = k1 > 0 ? weight[((k1 - 1) * frame_row + y) * weight_step + x] : 0.0f;
+
+                mean_prev = mean_next;
+                mean_next = k1 > 0 ? mean[((k1 - 1) * frame_row + y) * mean_step + x] : (T_MEAN_VAR)F_ZERO;
+
+                var_prev = var_next;
+                var_next = k1 > 0 ? var[((k1 - 1) * frame_row + y) * var_step + x] : (T_MEAN_VAR)F_ZERO;
+            }
+        }
+
+        kHit = k1 + 1;
+        break;
+    }
+
+    if (kHit < 0)
+    {
+        kHit = k = k < ((NMIXTURES) - 1) ? k : ((NMIXTURES) - 1);
+        wsum += w0 - weight[(k * frame_row + y) * weight_step + x];
+
+        weight[(k * frame_row + y) * weight_step + x] = w0;
+        mean[(k * frame_row + y) * mean_step + x] = pix;
+#if defined (CN1)
+        var[(k * frame_row + y) * var_step + x] = (T_MEAN_VAR)(var0);
+#else
+        var[(k * frame_row + y) * var_step + x] = (T_MEAN_VAR)(var0, var0, var0, var0);
+#endif
+        sortKey[(k * frame_row + y) * sortKey_step + x] = sk0;
+    }
+    else
+    {
+        for( ; k < (NMIXTURES); k++)
+            wsum += weight[(k * frame_row + y) * weight_step + x];
+    }
+
+    float wscale = 1.0f / wsum;
+    wsum = 0;
+    for (k = 0; k < (NMIXTURES); ++k)
+    {
+        float w = weight[(k * frame_row + y) * weight_step + x];
+        w *= wscale;
+        wsum += w;
+
+        weight[(k * frame_row + y) * weight_step + x] = w;
+        sortKey[(k * frame_row + y) * sortKey_step + x] *= wscale;
+
+        kForeground = select(kForeground, k + 1, wsum > backgroundRatio && kForeground < 0);
+    }
+    fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar)(-(kHit >= kForeground));
+}
+
+
+__kernel void getBackgroundImage_kernel(__global float* weight, __global T_MEAN_VAR* mean, __global T_FRAME* dst,
+    int dst_row, int dst_col, int weight_step, int mean_step, int dst_step,
+    float backgroundRatio)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x < dst_col && y < dst_row)
+    {
+        T_MEAN_VAR meanVal = (T_MEAN_VAR)F_ZERO;
+        float totalWeight = 0.0f;
+
+        for (int mode = 0; mode < (NMIXTURES); ++mode)
+        {
+            float _weight = weight[(mode * dst_row + y) * weight_step + x];
+
+            T_MEAN_VAR _mean = mean[(mode * dst_row + y) * mean_step + x];
+            meanVal = meanVal + _weight * _mean;
+
+            totalWeight += _weight;
+
+            if(totalWeight > backgroundRatio)
+                break;
+        }
+        meanVal = meanVal * (1.f / totalWeight);
+        dst[y * dst_step + x] = CONVERT_TYPE(meanVal);
+    }
+}
+
+__kernel void mog2_kernel(__global T_FRAME * frame, __global int* fgmask, __global float* weight, __global T_MEAN_VAR * mean,
+        __global int* modesUsed, __global float* variance, int frame_row, int frame_col, int frame_step,
+        int fgmask_step, int weight_step, int mean_step, int modesUsed_step, int var_step, float alphaT, float alpha1, float prune,
+        int detectShadows_flag, int fgmask_offset_x, int fgmask_offset_y, int frame_offset_x, int frame_offset_y, __constant con_srtuct_t* constants)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x < frame_col && y < frame_row)
+    {
+        T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + x + frame_offset_x]);
+
+        bool background = false; // true - the pixel classified as background
+
+        bool fitsPDF = false; //if it remains zero a new GMM mode will be added
+
+        int nmodes = modesUsed[y * modesUsed_step + x];
+        int nNewModes = nmodes; //current number of modes in GMM
+
+        float totalWeight = 0.0f;
+
+        for (int mode = 0; mode < nmodes; ++mode)
+        {
+            float _weight = alpha1 * weight[(mode * frame_row + y) * weight_step + x] + prune;
+
+            if (!fitsPDF)
+            {
+                float var = variance[(mode * frame_row + y) * var_step + x];
+
+                T_MEAN_VAR _mean = mean[(mode * frame_row + y) * mean_step + x];
+
+                T_MEAN_VAR diff = _mean - pix;
+                float dist2 = sqr(diff);
+
+                if (totalWeight < constants -> c_TB && dist2 < constants -> c_Tb * var)
+                    background = true;
+
+                if (dist2 < constants -> c_Tg * var)
+                {
+                    fitsPDF = true;
+                    _weight += alphaT;
+                    float k = alphaT / _weight;
+                    mean[(mode * frame_row + y) * mean_step + x] = _mean - k * diff;
+                    float varnew = var + k * (dist2 - var);
+                    varnew = fmax(varnew, constants -> c_varMin);
+                    varnew = fmin(varnew, constants -> c_varMax);
+
+                    variance[(mode * frame_row + y) * var_step + x] = varnew;
+                    for (int i = mode; i > 0; --i)
+                    {
+                        if (_weight < weight[((i - 1) * frame_row + y) * weight_step + x])
+                            break;
+                        swap(weight, x, y, i - 1, frame_row, weight_step);
+                        swap(variance, x, y, i - 1, frame_row, var_step);
+                        #if defined (CN1)
+                        swap(mean, x, y, i - 1, frame_row, mean_step);
+                        #else
+                        swap4(mean, x, y, i - 1, frame_row, mean_step);
+                        #endif
+                    }
+                }
+            } // !fitsPDF
+
+            if (_weight < -prune)
+            {
+                _weight = 0.0;
+                nmodes--;
+            }
+
+            weight[(mode * frame_row + y) * weight_step + x] = _weight; //update weight by the calculated value
+            totalWeight += _weight;
+        }
+
+        totalWeight = 1.f / totalWeight;
+        for (int mode = 0; mode < nmodes; ++mode)
+            weight[(mode * frame_row + y) * weight_step + x] *= totalWeight;
+
+        nmodes = nNewModes;
+
+        if (!fitsPDF)
+        {
+            int mode = nmodes == (NMIXTURES) ? (NMIXTURES) - 1 : nmodes++;
+
+            if (nmodes == 1)
+                weight[(mode * frame_row + y) * weight_step + x] = 1.f;
+            else
+            {
+                weight[(mode * frame_row + y) * weight_step + x] = alphaT;
+
+                for (int i = 0; i < nmodes - 1; ++i)
+                    weight[(i * frame_row + y) * weight_step + x] *= alpha1;
+            }
+
+            mean[(mode * frame_row + y) * mean_step + x] = pix;
+            variance[(mode * frame_row + y) * var_step + x] = constants -> c_varInit;
+
+            for (int i = nmodes - 1; i > 0; --i)
+            {
+                // check one up
+                if (alphaT < weight[((i - 1) * frame_row + y) * weight_step + x])
+                    break;
+
+                swap(weight, x, y, i - 1, frame_row, weight_step);
+                swap(variance, x, y, i - 1, frame_row, var_step);
+                #if defined (CN1)
+                swap(mean, x, y, i - 1, frame_row, mean_step);
+                #else
+                swap4(mean, x, y, i - 1, frame_row, mean_step);
+                #endif
+            }
+        }
+
+        modesUsed[y * modesUsed_step + x] = nmodes;
+
+        bool isShadow = false;
+        if (detectShadows_flag && !background)
+        {
+            float tWeight = 0.0f;
+
+            for (int mode = 0; mode < nmodes; ++mode)
+            {
+                T_MEAN_VAR _mean = mean[(mode * frame_row + y) * mean_step + x];
+
+                T_MEAN_VAR pix_mean = pix * _mean;
+
+                float numerator = sum(pix_mean);
+                float denominator = sqr(_mean);
+
+                if (denominator == 0)
+                    break;
+
+                if (numerator <= denominator && numerator >= constants -> c_tau * denominator)
+                {
+                    float a = numerator / denominator;
+
+                    T_MEAN_VAR dD = a * _mean - pix;
+
+                    if (sqr(dD) < constants -> c_Tb * variance[(mode * frame_row + y) * var_step + x] * a * a)
+                    {
+                        isShadow = true;
+                        break;
+                    }
+                }
+
+                tWeight += weight[(mode * frame_row + y) * weight_step + x];
+                if (tWeight > constants -> c_TB)
+                    break;
+            }
+        }
+
+        fgmask[(y + fgmask_offset_y) * fgmask_step + x + fgmask_offset_x] = background ? 0 : isShadow ? constants -> c_shadowVal : 255;
+    }
+}
+
+__kernel void getBackgroundImage2_kernel(__global int* modesUsed, __global float* weight, __global T_MEAN_VAR* mean,
+    __global T_FRAME* dst, float c_TB, int modesUsed_row, int modesUsed_col, int modesUsed_step, int weight_step,
+    int mean_step, int dst_step, int dst_x, int dst_y)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x < modesUsed_col && y < modesUsed_row)
+    {
+        int nmodes = modesUsed[y * modesUsed_step + x];
+
+        T_MEAN_VAR meanVal = (T_MEAN_VAR)F_ZERO;
+
+        float totalWeight = 0.0f;
+
+        for (int mode = 0; mode < nmodes; ++mode)
+        {
+            float _weight = weight[(mode * modesUsed_row + y) * weight_step + x];
+
+            T_MEAN_VAR _mean = mean[(mode * modesUsed_row + y) * mean_step + x];
+            meanVal = meanVal + _weight * _mean;
+
+            totalWeight += _weight;
+
+            if(totalWeight > c_TB)
+                break;
+        }
+
+        meanVal = meanVal * (1.f / totalWeight);
+        dst[(y + dst_y) * dst_step + x + dst_x] = CONVERT_TYPE(meanVal);
+    }
+}
--- a/modules/ocl/src/opencl/filtering_adaptive_bilateral.cl
+++ b/modules/ocl/src/opencl/filtering_adaptive_bilateral.cl
@ -0,0 +1,424 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Harris Gasparakis, harris.gasparakis@amd.com
+//    Xiaopeng Fu, fuxiaopeng2222@163.com
+//    Yao Wang, bitwangyaoyao@gmail.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#ifdef BORDER_REPLICATE
+//BORDER_REPLICATE:     aaaaaa|abcdefgh|hhhhhhh
+#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? (l_edge)   : (i))
+#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? (r_edge)-1 : (addr))
+#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? (t_edge)   :(i))
+#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? (b_edge)-1 :(addr))
+#endif
+
+#ifdef BORDER_REFLECT
+//BORDER_REFLECT:       fedcba|abcdefgh|hgfedcb
+#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? -(i)-1               : (i))
+#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? -(i)-1+((r_edge)<<1) : (addr))
+#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? -(i)-1 : (i))
+#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? -(i)-1+((b_edge)<<1) : (addr))
+#endif
+
+#ifdef BORDER_REFLECT_101
+//BORDER_REFLECT_101:   gfedcb|abcdefgh|gfedcba
+#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? -(i)                 : (i))
+#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? -(i)-2+((r_edge)<<1) : (addr))
+#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? -(i)                 : (i))
+#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? -(i)-2+((b_edge)<<1) : (addr))
+#endif
+
+//blur function does not support BORDER_WRAP
+#ifdef BORDER_WRAP
+//BORDER_WRAP:          cdefgh|abcdefgh|abcdefg
+#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? (i)+(r_edge) : (i))
+#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? (i)-(r_edge) : (addr))
+#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? (i)+(b_edge) : (i))
+#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? (i)-(b_edge) : (addr))
+#endif
+
+__kernel void
+edgeEnhancingFilter_C4_D0(
+    __global const uchar4 * restrict src,
+    __global uchar4 *dst,
+    float alpha,
+    int src_offset,
+    int src_whole_rows,
+    int src_whole_cols,
+    int src_step,
+    int dst_offset,
+    int dst_rows,
+    int dst_cols,
+    int dst_step,
+    __global const float* lut,
+    int lut_step)
+{
+    int col = get_local_id(0);
+    const int gX = get_group_id(0);
+    const int gY = get_group_id(1);
+
+    int src_x_off = (src_offset % src_step) >> 2;
+    int src_y_off = src_offset / src_step;
+    int dst_x_off = (dst_offset % dst_step) >> 2;
+    int dst_y_off = dst_offset / dst_step;
+
+    int startX = gX * (THREADS-ksX+1) - anX + src_x_off;
+    int startY = (gY * (1+EXTRA)) - anY + src_y_off;
+
+    int dst_startX = gX * (THREADS-ksX+1) + dst_x_off;
+    int dst_startY = (gY * (1+EXTRA)) + dst_y_off;
+
+    int posX = dst_startX - dst_x_off + col;
+    int posY = (gY * (1+EXTRA))	;
+
+    __local uchar4 data[ksY+EXTRA][THREADS];
+
+    float4 tmp_sum[1+EXTRA];
+    for(int tmpint = 0; tmpint < 1+EXTRA; tmpint++)
+    {
+        tmp_sum[tmpint] = (float4)(0,0,0,0);
+    }
+
+#ifdef BORDER_CONSTANT
+    bool con;
+    uchar4 ss;
+    for(int j = 0;	j < ksY+EXTRA; j++)
+    {
+        con = (startX+col >= 0 && startX+col < src_whole_cols && startY+j >= 0 && startY+j < src_whole_rows);
+
+        int cur_col = clamp(startX + col, 0, src_whole_cols);
+        if(con)
+        {
+            ss = src[(startY+j)*(src_step>>2) + cur_col];
+        }
+
+        data[j][col] = con ? ss : (uchar4)0;
+    }
+#else
+    for(int j= 0; j < ksY+EXTRA; j++)
+    {
+        int selected_row;
+        int selected_col;
+        selected_row = ADDR_H(startY+j, 0, src_whole_rows);
+        selected_row = ADDR_B(startY+j, src_whole_rows, selected_row);
+
+        selected_col = ADDR_L(startX+col, 0, src_whole_cols);
+        selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
+
+        data[j][col] = src[selected_row * (src_step>>2) + selected_col];
+    }
+#endif
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    float4 var[1+EXTRA];
+
+#if VAR_PER_CHANNEL
+    float4 weight;
+    float4 totalWeight = (float4)(0,0,0,0);
+#else
+    float weight;
+    float totalWeight = 0;
+#endif
+
+    int4 currValCenter;
+    int4 currWRTCenter;
+
+    int4 sumVal = 0;
+    int4 sumValSqr = 0;
+
+    if(col < (THREADS-(ksX-1)))
+    {
+        int4 currVal;
+
+        int howManyAll = (2*anX+1)*(ksY);
+
+        //find variance of all data
+        int startLMj;
+        int endLMj ;
+#if CALCVAR
+        // Top row: don't sum the very last element
+        for(int extraCnt = 0; extraCnt <=EXTRA; extraCnt++)
+        {
+            startLMj = extraCnt;
+            endLMj =  ksY+extraCnt-1;
+            sumVal =0;
+            sumValSqr=0;
+            for(int j = startLMj; j < endLMj; j++)
+            {
+                for(int i=-anX; i<=anX; i++)
+                {
+                    currVal	= convert_int4(data[j][col+anX+i])	;
+
+                    sumVal += currVal;
+                    sumValSqr += mul24(currVal, currVal);
+                }
+            }
+            var[extraCnt] = convert_float4( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) /  ( (float)(howManyAll*howManyAll) ) ;
+#else
+        var[extraCnt] = (float4)(900.0, 900.0, 900.0, 0.0);
+#endif
+        }
+
+        for(int extraCnt = 0; extraCnt <= EXTRA; extraCnt++)
+        {
+
+            // top row: include the very first element, even on first time
+            startLMj = extraCnt;
+            // go all the way, unless this is the last local mem chunk,
+            // then stay within limits - 1
+            endLMj =  extraCnt + ksY;
+
+            // Top row: don't sum the very last element
+            currValCenter = convert_int4( data[ (startLMj + endLMj)/2][col+anX] );
+
+            for(int j = startLMj, lut_j = 0; j < endLMj; j++, lut_j++)
+            {
+                for(int i=-anX; i<=anX; i++)
+                {
+#if FIXED_WEIGHT
+#if VAR_PER_CHANNEL
+                    weight.x = 1.0f;
+                    weight.y = 1.0f;
+                    weight.z = 1.0f;
+                    weight.w = 1.0f;
+#else
+                    weight = 1.0f;
+#endif
+#else
+                    currVal	= convert_int4(data[j][col+anX+i])	;
+                    currWRTCenter = currVal-currValCenter;
+
+#if VAR_PER_CHANNEL
+                    weight = var[extraCnt] / (var[extraCnt] + convert_float4(currWRTCenter * currWRTCenter)) * (float4)(lut[lut_j*lut_step+anX+i]);
+                    //weight.x = var[extraCnt].x / ( var[extraCnt].x + (float) mul24(currWRTCenter.x , currWRTCenter.x) ) ;
+                    //weight.y = var[extraCnt].y / ( var[extraCnt].y + (float) mul24(currWRTCenter.y , currWRTCenter.y) ) ;
+                    //weight.z = var[extraCnt].z / ( var[extraCnt].z + (float) mul24(currWRTCenter.z , currWRTCenter.z) ) ;
+                    //weight.w = 0;
+#else
+                    weight = 1.0f/(1.0f+( mul24(currWRTCenter.x, currWRTCenter.x) + mul24(currWRTCenter.y, currWRTCenter.y) +  mul24(currWRTCenter.z, currWRTCenter.z))/(var.x+var.y+var.z));
+#endif
+#endif
+                    tmp_sum[extraCnt] += convert_float4(data[j][col+anX+i]) * weight;
+                    totalWeight += weight;
+                }
+            }
+
+            tmp_sum[extraCnt] /= totalWeight;
+
+            if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows)
+            {
+                dst[(dst_startY+extraCnt) * (dst_step>>2)+ dst_startX + col] = convert_uchar4(tmp_sum[extraCnt]);
+            }
+
+#if VAR_PER_CHANNEL
+            totalWeight = (float4)(0,0,0,0);
+#else
+            totalWeight = 0;
+#endif
+        }
+    }
+}
+
+
+__kernel void
+edgeEnhancingFilter_C1_D0(
+    __global const uchar * restrict src,
+    __global uchar *dst,
+    float alpha,
+    int src_offset,
+    int src_whole_rows,
+    int src_whole_cols,
+    int src_step,
+    int dst_offset,
+    int dst_rows,
+    int dst_cols,
+    int dst_step,
+    __global const float * lut,
+    int lut_step)
+{
+    int col = get_local_id(0);
+    const int gX = get_group_id(0);
+    const int gY = get_group_id(1);
+
+    int src_x_off = (src_offset % src_step);
+    int src_y_off = src_offset / src_step;
+    int dst_x_off = (dst_offset % dst_step);
+    int dst_y_off = dst_offset / dst_step;
+
+    int startX = gX * (THREADS-ksX+1) - anX + src_x_off;
+    int startY = (gY * (1+EXTRA)) - anY + src_y_off;
+
+    int dst_startX = gX * (THREADS-ksX+1) + dst_x_off;
+    int dst_startY = (gY * (1+EXTRA)) + dst_y_off;
+
+    int posX = dst_startX - dst_x_off + col;
+    int posY = (gY * (1+EXTRA))	;
+
+    __local uchar data[ksY+EXTRA][THREADS];
+
+    float tmp_sum[1+EXTRA];
+    for(int tmpint = 0; tmpint < 1+EXTRA; tmpint++)
+    {
+        tmp_sum[tmpint] = (float)(0);
+    }
+
+#ifdef BORDER_CONSTANT
+    bool con;
+    uchar ss;
+    for(int j = 0;	j < ksY+EXTRA; j++)
+    {
+        con = (startX+col >= 0 && startX+col < src_whole_cols && startY+j >= 0 && startY+j < src_whole_rows);
+
+        int cur_col = clamp(startX + col, 0, src_whole_cols);
+        if(con)
+        {
+            ss = src[(startY+j)*(src_step) + cur_col];
+        }
+
+        data[j][col] = con ? ss : 0;
+    }
+#else
+    for(int j= 0; j < ksY+EXTRA; j++)
+    {
+        int selected_row;
+        int selected_col;
+        selected_row = ADDR_H(startY+j, 0, src_whole_rows);
+        selected_row = ADDR_B(startY+j, src_whole_rows, selected_row);
+
+        selected_col = ADDR_L(startX+col, 0, src_whole_cols);
+        selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
+
+        data[j][col] = src[selected_row * (src_step) + selected_col];
+    }
+#endif
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    float var[1+EXTRA];
+
+    float weight;
+    float totalWeight = 0;
+
+    int currValCenter;
+    int currWRTCenter;
+
+    int sumVal = 0;
+    int sumValSqr = 0;
+
+    if(col < (THREADS-(ksX-1)))
+    {
+        int currVal;
+
+        int howManyAll = (2*anX+1)*(ksY);
+
+        //find variance of all data
+        int startLMj;
+        int endLMj;
+#if CALCVAR
+        // Top row: don't sum the very last element
+        for(int extraCnt=0; extraCnt<=EXTRA; extraCnt++)
+        {
+            startLMj = extraCnt;
+            endLMj =  ksY+extraCnt-1;
+            sumVal = 0;
+            sumValSqr =0;
+            for(int j = startLMj; j < endLMj; j++)
+            {
+                for(int i=-anX; i<=anX; i++)
+                {
+                    currVal	= (uint)(data[j][col+anX+i])	;
+
+                    sumVal += currVal;
+                    sumValSqr += mul24(currVal, currVal);
+                }
+            }
+            var[extraCnt] = (float)( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) /  ( (float)(howManyAll*howManyAll) ) ;
+#else
+        var[extraCnt] = (float)(900.0);
+#endif
+        }
+
+        for(int extraCnt = 0; extraCnt <= EXTRA; extraCnt++)
+        {
+
+            // top row: include the very first element, even on first time
+            startLMj = extraCnt;
+            // go all the way, unless this is the last local mem chunk,
+            // then stay within limits - 1
+            endLMj =  extraCnt + ksY;
+
+            // Top row: don't sum the very last element
+            currValCenter = (int)( data[ (startLMj + endLMj)/2][col+anX] );
+
+            for(int j = startLMj, lut_j = 0; j < endLMj; j++, lut_j++)
+            {
+                for(int i=-anX; i<=anX; i++)
+                {
+#if FIXED_WEIGHT
+                    weight = 1.0f;
+#else
+                    currVal	= (int)(data[j][col+anX+i])	;
+                    currWRTCenter = currVal-currValCenter;
+
+                    weight = var[extraCnt] / (var[extraCnt] + (float)mul24(currWRTCenter,currWRTCenter)) * lut[lut_j*lut_step+anX+i] ;
+#endif
+                    tmp_sum[extraCnt] += (float)(data[j][col+anX+i] * weight);
+                    totalWeight += weight;
+                }
+            }
+
+            tmp_sum[extraCnt] /= totalWeight;
+
+
+            if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows)
+            {
+                dst[(dst_startY+extraCnt) * (dst_step)+ dst_startX + col] = (uchar)(tmp_sum[extraCnt]);
+            }
+
+            totalWeight = 0;
+        }
+    }
+}
--- a/modules/ocl/test/test_bgfg.cpp
+++ b/modules/ocl/test/test_bgfg.cpp
@ -0,0 +1,227 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Jin Ma, jin@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+using namespace cv;
+using namespace cv::ocl;
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+
+extern string workdir;
+//////////////////////////////////////////////////////
+// MOG
+
+namespace
+{
+    IMPLEMENT_PARAM_CLASS(UseGray, bool)
+    IMPLEMENT_PARAM_CLASS(LearningRate, double)
+}
+
+PARAM_TEST_CASE(mog, UseGray, LearningRate, bool)
+{
+    bool useGray;
+    double learningRate;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        useGray = GET_PARAM(0);
+
+        learningRate = GET_PARAM(1);
+
+        useRoi = GET_PARAM(2);
+    }
+};
+
+TEST_P(mog, Update)
+{
+    std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/video/768x576.avi";
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    cv::ocl::MOG mog;
+    cv::ocl::oclMat foreground = createMat_ocl(frame.size(), CV_8UC1, useRoi);
+
+    Ptr<cv::BackgroundSubtractorMOG> mog_gold = createBackgroundSubtractorMOG();
+    cv::Mat foreground_gold;
+
+    for (int i = 0; i < 10; ++i)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+
+        if (useGray)
+        {
+            cv::Mat temp;
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            cv::swap(temp, frame);
+        }
+
+        mog(loadMat_ocl(frame, useRoi), foreground, (float)learningRate);
+
+        mog_gold->apply(frame, foreground_gold, learningRate);
+
+        EXPECT_MAT_NEAR(foreground_gold, foreground, 0.0);
+    }
+}
+INSTANTIATE_TEST_CASE_P(OCL_Video, mog, testing::Combine(
+    testing::Values(UseGray(false), UseGray(true)),
+    testing::Values(LearningRate(0.0), LearningRate(0.01)),
+    Values(true, false)));
+
+//////////////////////////////////////////////////////
+// MOG2
+
+namespace
+{
+    IMPLEMENT_PARAM_CLASS(DetectShadow, bool)
+}
+
+PARAM_TEST_CASE(mog2, UseGray, DetectShadow, bool)
+{
+    bool useGray;
+    bool detectShadow;
+    bool useRoi;
+    virtual void SetUp()
+    {
+        useGray = GET_PARAM(0);
+        detectShadow = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
+    }
+};
+
+TEST_P(mog2, Update)
+{
+    std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/video/768x576.avi";
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    cv::ocl::MOG2 mog2;
+    mog2.bShadowDetection = detectShadow;
+    cv::ocl::oclMat foreground = createMat_ocl(frame.size(), CV_8UC1, useRoi);
+
+    cv::Ptr<cv::BackgroundSubtractorMOG2> mog2_gold = createBackgroundSubtractorMOG2();
+    mog2_gold->set("detectShadows", detectShadow);
+    cv::Mat foreground_gold;
+
+    for (int i = 0; i < 10; ++i)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+
+        if (useGray)
+        {
+            cv::Mat temp;
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            cv::swap(temp, frame);
+        }
+
+        mog2(loadMat_ocl(frame, useRoi), foreground);
+
+        mog2_gold->apply(frame, foreground_gold);
+
+        if (detectShadow)
+            EXPECT_MAT_SIMILAR(foreground_gold, foreground, 15e-3)
+        else
+            EXPECT_MAT_NEAR(foreground_gold, foreground, 0)
+    }
+}
+
+TEST_P(mog2, getBackgroundImage)
+{
+    if (useGray)
+        return;
+
+    std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/video/768x576.avi";
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+
+    cv::ocl::MOG2 mog2;
+    mog2.bShadowDetection = detectShadow;
+    cv::ocl::oclMat foreground;
+
+    cv::Ptr<cv::BackgroundSubtractorMOG2> mog2_gold = createBackgroundSubtractorMOG2();
+    mog2_gold->set("detectShadows", detectShadow);
+    cv::Mat foreground_gold;
+
+    for (int i = 0; i < 10; ++i)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+
+        mog2(loadMat_ocl(frame, useRoi), foreground);
+
+        mog2_gold->apply(frame, foreground_gold);
+    }
+
+    cv::ocl::oclMat background = createMat_ocl(frame.size(), frame.type(), useRoi);
+    mog2.getBackgroundImage(background);
+
+    cv::Mat background_gold;
+    mog2_gold->getBackgroundImage(background_gold);
+
+    EXPECT_MAT_NEAR(background_gold, background, 1.0);
+}
+
+INSTANTIATE_TEST_CASE_P(OCL_Video, mog2, testing::Combine(
+    testing::Values(UseGray(true), UseGray(false)),
+    testing::Values(DetectShadow(true), DetectShadow(false)),
+    Values(true, false)));
+
+#endif
--- a/modules/ocl/test/test_filters.cpp
+++ b/modules/ocl/test/test_filters.cpp
@ -353,6 +353,69 @@ TEST_P(Filter2D, Mat)
        Near(1);
    }
 }
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Bilateral
+struct Bilateral : FilterTestBase
+{
+    int type;
+    cv::Size ksize;
+    int bordertype;
+    double sigmacolor, sigmaspace;
+
+    virtual void SetUp()
+    {
+        type = GET_PARAM(0);
+        ksize = GET_PARAM(1);
+        bordertype = GET_PARAM(3);
+        Init(type);
+        cv::RNG &rng = TS::ptr()->get_rng();
+        sigmacolor = rng.uniform(20, 100);
+        sigmaspace = rng.uniform(10, 40);
+    }
+};
+
+TEST_P(Bilateral, Mat)
+{
+    for(int j = 0; j < LOOP_TIMES; j++)
+    {
+        random_roi();
+        cv::bilateralFilter(mat1_roi, dst_roi, ksize.width, sigmacolor, sigmaspace, bordertype);
+        cv::ocl::bilateralFilter(gmat1, gdst, ksize.width, sigmacolor, sigmaspace, bordertype);
+        Near(1);
+    }
+
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// AdaptiveBilateral
+struct AdaptiveBilateral : FilterTestBase
+{
+    int type;
+    cv::Size ksize;
+    int bordertype;
+    Point anchor;
+    virtual void SetUp()
+    {
+        type = GET_PARAM(0);
+        ksize = GET_PARAM(1);
+        bordertype = GET_PARAM(3);
+        Init(type);
+        anchor = Point(-1,-1);
+    }
+};
+
+TEST_P(AdaptiveBilateral, Mat)
+{
+    for(int j = 0; j < LOOP_TIMES; j++)
+    {
+        random_roi();
+        cv::adaptiveBilateralFilter(mat1_roi, dst_roi, ksize, 5, anchor, bordertype);
+        cv::ocl::adaptiveBilateralFilter(gmat1, gdst, ksize, 5, anchor, bordertype);
+        Near(1);
+    }
+
+}
+
 INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(
                        Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4),
                        Values(cv::Size(3, 3), cv::Size(5, 5), cv::Size(7, 7)),
@ -400,4 +463,17 @@ INSTANTIATE_TEST_CASE_P(Filter, Filter2D, testing::Combine(
                        Values(Size(0, 0)), //not use
                        Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REFLECT101, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT)));

+INSTANTIATE_TEST_CASE_P(Filter, Bilateral, Combine(
+                        Values(CV_8UC1, CV_8UC3),
+                        Values(Size(5, 5), Size(9, 9)),
+                        Values(Size(0, 0)), //not use
+                        Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE,
+                               (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_WRAP, (MatType)cv::BORDER_REFLECT_101)));
+
+INSTANTIATE_TEST_CASE_P(Filter, AdaptiveBilateral, Combine(
+                        Values(CV_8UC1, CV_8UC3),
+                        Values(Size(5, 5), Size(9, 9)),
+                        Values(Size(0, 0)), //not use
+                        Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE,
+                               (MatType)cv::BORDER_REFLECT,  (MatType)cv::BORDER_REFLECT_101)));
 #endif // HAVE_OPENCL
--- a/modules/ocl/test/test_imgproc.cpp
+++ b/modules/ocl/test/test_imgproc.cpp
@ -475,56 +475,6 @@ TEST_P(equalizeHist, Mat)
 }


-
-
-
-////////////////////////////////bilateralFilter////////////////////////////////////////////
-
-struct bilateralFilter : ImgprocTestBase {};
-
-TEST_P(bilateralFilter, Mat)
-{
-    double sigmacolor = 50.0;
-    int radius = 9;
-    int d = 2 * radius + 1;
-    double sigmaspace = 20.0;
-    int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, cv::BORDER_REFLECT, cv::BORDER_WRAP, cv::BORDER_REFLECT_101};
-    //const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"};
-
-    if (mat1.depth() != CV_8U || mat1.type() != dst.type())
-    {
-        cout << "Unsupported type" << endl;
-        EXPECT_DOUBLE_EQ(0.0, 0.0);
-    }
-    else
-    {
-        for(size_t i = 0; i < sizeof(bordertype) / sizeof(int); i++)
-            for(int j = 0; j < LOOP_TIMES; j++)
-            {
-                random_roi();
-                if(((bordertype[i] != cv::BORDER_CONSTANT) && (bordertype[i] != cv::BORDER_REPLICATE) && (mat1_roi.cols <= radius)) || (mat1_roi.cols <= radius) || (mat1_roi.rows <= radius) || (mat1_roi.rows <= radius))
-                {
-                    continue;
-                }
-                //if((dstx>=radius) && (dsty >= radius) && (dstx+cldst_roi.cols+radius <=cldst_roi.wholecols) && (dsty+cldst_roi.rows+radius <= cldst_roi.wholerows))
-                //{
-                //	dst_roi.adjustROI(radius, radius, radius, radius);
-                //	cldst_roi.adjustROI(radius, radius, radius, radius);
-                //}
-                //else
-                //{
-                //	continue;
-                //}
-
-                cv::bilateralFilter(mat1_roi, dst_roi, d, sigmacolor, sigmaspace, bordertype[i] | cv::BORDER_ISOLATED);
-                cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d, sigmacolor, sigmaspace, bordertype[i] | cv::BORDER_ISOLATED);
-                Near(1.);
-            }
-    }
-}
-
-
-
 ////////////////////////////////copyMakeBorder////////////////////////////////////////////

 struct CopyMakeBorder : ImgprocTestBase {};
@ -1396,14 +1346,10 @@ TEST_P(calcHist, Mat)
 }
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // CLAHE
-namespace
-{
-    IMPLEMENT_PARAM_CLASS(ClipLimit, double)
-}

-PARAM_TEST_CASE(CLAHE, cv::Size, ClipLimit)
+PARAM_TEST_CASE(CLAHE, cv::Size, double)
 {
-    cv::Size size;
+    cv::Size gridSize;
    double clipLimit;

    cv::Mat src;
@ -1414,22 +1360,22 @@ PARAM_TEST_CASE(CLAHE, cv::Size, ClipLimit)

    virtual void SetUp()
    {
-        size = GET_PARAM(0);
+        gridSize = GET_PARAM(0);
        clipLimit = GET_PARAM(1);

        cv::RNG &rng = TS::ptr()->get_rng();
-        src = randomMat(rng, size, CV_8UC1, 0, 256, false);
+        src = randomMat(rng, cv::Size(MWIDTH, MHEIGHT), CV_8UC1, 0, 256, false);
        g_src.upload(src);
    }
 };

 TEST_P(CLAHE, Accuracy)
 {
-    cv::Ptr<cv::CLAHE> clahe = cv::ocl::createCLAHE(clipLimit);
+    cv::Ptr<cv::CLAHE> clahe = cv::ocl::createCLAHE(clipLimit, gridSize);
    clahe->apply(g_src, g_dst);
    cv::Mat dst(g_dst);

-    cv::Ptr<cv::CLAHE> clahe_gold = cv::createCLAHE(clipLimit);
+    cv::Ptr<cv::CLAHE> clahe_gold = cv::createCLAHE(clipLimit, gridSize);
    clahe_gold->apply(src, dst_gold);

    EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
@ -1622,21 +1568,6 @@ INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine(
                            NULL_TYPE,
                            Values(false))); // Values(false) is the reserved parameter

-//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine(
-//	ONE_TYPE(CV_8UC1),
-//	NULL_TYPE,
-//	ONE_TYPE(CV_8UC1),
-//	NULL_TYPE,
-//	NULL_TYPE,
-//	Values(false))); // Values(false) is the reserved parameter
-INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine(
-                            Values(CV_8UC1, CV_8UC3),
-                            NULL_TYPE,
-                            Values(CV_8UC1, CV_8UC3),
-                            NULL_TYPE,
-                            NULL_TYPE,
-                            Values(false))); // Values(false) is the reserved parameter
-

 INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine(
                            Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
@ -1725,10 +1656,10 @@ INSTANTIATE_TEST_CASE_P(histTestBase, calcHist, Combine(
                            ONE_TYPE(CV_32SC1) //no use
                        ));

-INSTANTIATE_TEST_CASE_P(ImgProc, CLAHE, Combine(
-                        Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(1300, 1300)),
-                        Values(0.0, 40.0)));
+INSTANTIATE_TEST_CASE_P(Imgproc, CLAHE, Combine(
+                        Values(cv::Size(4, 4), cv::Size(32, 8), cv::Size(8, 64)),
+                        Values(0.0, 10.0, 62.0, 300.0)));

-INSTANTIATE_TEST_CASE_P(OCL_ImgProc, ColumnSum, DIFFERENT_SIZES);
+INSTANTIATE_TEST_CASE_P(Imgproc, ColumnSum, DIFFERENT_SIZES);

 #endif // HAVE_OPENCL
--- a/modules/ocl/test/test_optflow.cpp
+++ b/modules/ocl/test/test_optflow.cpp
@ -164,7 +164,7 @@ TEST_P(TVL1, DISABLED_Accuracy) // TODO implementations of TV1 in video module a
    EXPECT_MAT_SIMILAR(gold[0], d_flowx, 3e-3);
    EXPECT_MAT_SIMILAR(gold[1], d_flowy, 3e-3);
 }
-INSTANTIATE_TEST_CASE_P(OCL_Video, TVL1, Values(true, false));
+INSTANTIATE_TEST_CASE_P(OCL_Video, TVL1, Values(false, true));


 /////////////////////////////////////////////////////////////////////////////////////////////////
--- a/modules/ocl/test/utility.cpp
+++ b/modules/ocl/test/utility.cpp
@ -100,6 +100,44 @@ Mat randomMat(Size size, int type, double minVal, double maxVal)
    return randomMat(TS::ptr()->get_rng(), size, type, minVal, maxVal, false);
 }

+cv::ocl::oclMat createMat_ocl(Size size, int type, bool useRoi)
+{
+    Size size0 = size;
+
+    if (useRoi)
+    {
+        size0.width += randomInt(5, 15);
+        size0.height += randomInt(5, 15);
+    }
+
+    cv::ocl::oclMat d_m(size0, type);
+
+    if (size0 != size)
+        d_m = d_m(Rect((size0.width - size.width) / 2, (size0.height - size.height) / 2, size.width, size.height));
+
+    return d_m;
+}
+
+cv::ocl::oclMat loadMat_ocl(const Mat& m, bool useRoi)
+{
+    CV_Assert(m.type() == CV_8UC1 || m.type() == CV_8UC3);
+    cv::ocl::oclMat d_m;
+    d_m = createMat_ocl(m.size(), m.type(), useRoi);
+
+    Size ls;
+    Point pt;
+
+    d_m.locateROI(ls, pt);
+
+    Rect roi(pt.x, pt.y, d_m.size().width, d_m.size().height);
+
+    cv::ocl::oclMat m_ocl(m);
+
+    cv::ocl::oclMat d_m_roi(d_m, roi);
+
+    m_ocl.copyTo(d_m);
+    return d_m;
+}
 /*
 void showDiff(InputArray gold_, InputArray actual_, double eps)
 {
--- a/modules/ocl/test/utility.hpp
+++ b/modules/ocl/test/utility.hpp
@ -72,6 +72,9 @@ double checkNorm(const cv::Mat &m);
 double checkNorm(const cv::Mat &m1, const cv::Mat &m2);
 double checkSimilarity(const cv::Mat &m1, const cv::Mat &m2);

+//oclMat create
+cv::ocl::oclMat createMat_ocl(cv::Size size, int type, bool useRoi = false);
+cv::ocl::oclMat loadMat_ocl(const cv::Mat& m, bool useRoi = false);
 #define EXPECT_MAT_NORM(mat, eps) \
 { \
    EXPECT_LE(checkNorm(cv::Mat(mat)), eps) \
--- a/samples/ocl/CMakeLists.txt
+++ b/samples/ocl/CMakeLists.txt
@ -54,6 +54,6 @@ endif()
 if (INSTALL_C_EXAMPLES AND NOT WIN32)
  file(GLOB install_list *.c *.cpp *.jpg *.png *.data makefile.* build_all.sh *.dsp *.cmd )
  install(FILES ${install_list}
-          DESTINATION share/opencv/samples/${project}
+          DESTINATION share/OpenCV/samples/${project}
          PERMISSIONS OWNER_READ GROUP_READ WORLD_READ)
 endif()
--- a/samples/ocl/adaptive_bilateral_filter.cpp
+++ b/samples/ocl/adaptive_bilateral_filter.cpp
@ -0,0 +1,52 @@
+// This sample shows the difference of adaptive bilateral filter and bilateral filter.
+#include "opencv2/core.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/ocl.hpp"
+
+using namespace cv;
+using namespace std;
+
+
+int main( int argc, const char** argv )
+{
+    const char* keys =
+        "{ i input   |          | specify input image }"
+        "{ k ksize   |     5    | specify kernel size }";
+    CommandLineParser cmd(argc, argv, keys);
+    string src_path = cmd.get<string>("i");
+    int ks = cmd.get<int>("k");
+    const char * winName[] = {"input", "adaptive bilateral CPU", "adaptive bilateral OpenCL", "bilateralFilter OpenCL"};
+
+    Mat src = imread(src_path);
+    Mat abFilterCPU;
+    if(src.empty()){
+        //cout << "error read image: " << src_path << endl;
+        return -1;
+    }
+
+    std::vector<ocl::Info> infos;
+    ocl::getDevice(infos);
+
+    ocl::oclMat dsrc(src), dABFilter, dBFilter;
+
+    Size ksize(ks, ks);
+    adaptiveBilateralFilter(src,abFilterCPU, ksize, 10);
+    ocl::adaptiveBilateralFilter(dsrc, dABFilter, ksize, 10);
+    ocl::bilateralFilter(dsrc, dBFilter, ks, 30, 9);
+
+    Mat abFilter = dABFilter;
+    Mat bFilter = dBFilter;
+    imshow(winName[0], src);
+
+    imshow(winName[1], abFilterCPU);
+
+    imshow(winName[2], abFilter);
+
+    imshow(winName[3], bFilter);
+
+    waitKey();
+    return 0;
+
+}
--- a/samples/ocl/bgfg_segm.cpp
+++ b/samples/ocl/bgfg_segm.cpp
@ -0,0 +1,136 @@
+#include <iostream>
+#include <string>
+
+#include "opencv2/core.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/ocl.hpp"
+#include "opencv2/highgui.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace cv::ocl;
+
+#define M_MOG  1
+#define M_MOG2 2
+
+int main(int argc, const char** argv)
+{
+
+    cv::CommandLineParser cmd(argc, argv,
+        "{ c camera | false       | use camera }"
+        "{ f file   | 768x576.avi | input video file }"
+        "{ m method | mog         | method (mog, mog2) }"
+        "{ h help   | false       | print help message }");
+
+    if (cmd.get<bool>("help"))
+    {
+        cout << "Usage : bgfg_segm [options]" << endl;
+        cout << "Avaible options:" << endl;
+        cmd.printMessage();
+        return 0;
+    }
+
+    bool useCamera = cmd.get<bool>("camera");
+    string file = cmd.get<string>("file");
+    string method = cmd.get<string>("method");
+
+    if (method != "mog" && method != "mog2")
+    {
+        cerr << "Incorrect method" << endl;
+        return -1;
+    }
+
+    int m = method == "mog" ? M_MOG : M_MOG2;
+
+    VideoCapture cap;
+
+    if (useCamera)
+        cap.open(0);
+    else
+        cap.open(file);
+
+    if (!cap.isOpened())
+    {
+        cerr << "can not open camera or video file" << endl;
+        return -1;
+    }
+
+    std::vector<cv::ocl::Info>info;
+    cv::ocl::getDevice(info);
+
+    Mat frame;
+    cap >> frame;
+
+    oclMat d_frame(frame);
+
+    cv::ocl::MOG mog;
+    cv::ocl::MOG2 mog2;
+
+    oclMat d_fgmask;
+    oclMat d_fgimg;
+    oclMat d_bgimg;
+
+    d_fgimg.create(d_frame.size(), d_frame.type());
+
+    Mat fgmask;
+    Mat fgimg;
+    Mat bgimg;
+
+    switch (m)
+    {
+    case M_MOG:
+        mog(d_frame, d_fgmask, 0.01f);
+        break;
+
+    case M_MOG2:
+        mog2(d_frame, d_fgmask);
+        break;
+    }
+
+    for(;;)
+    {
+        cap >> frame;
+        if (frame.empty())
+            break;
+        d_frame.upload(frame);
+
+        int64 start = cv::getTickCount();
+
+        //update the model
+        switch (m)
+        {
+        case M_MOG:
+            mog(d_frame, d_fgmask, 0.01f);
+            mog.getBackgroundImage(d_bgimg);
+            break;
+
+        case M_MOG2:
+            mog2(d_frame, d_fgmask);
+            mog2.getBackgroundImage(d_bgimg);
+            break;
+        }
+
+        double fps = cv::getTickFrequency() / (cv::getTickCount() - start);
+        std::cout << "FPS : " << fps << std::endl;
+
+        d_fgimg.setTo(Scalar::all(0));
+        d_frame.copyTo(d_fgimg, d_fgmask);
+
+        d_fgmask.download(fgmask);
+        d_fgimg.download(fgimg);
+        if (!d_bgimg.empty())
+            d_bgimg.download(bgimg);
+
+        imshow("image", frame);
+        imshow("foreground mask", fgmask);
+        imshow("foreground image", fgimg);
+        if (!bgimg.empty())
+            imshow("mean background image", bgimg);
+
+        int key = waitKey(30);
+        if (key == 27)
+            break;
+    }
+
+    return 0;
+}
--- a/samples/ocl/clahe.cpp
+++ b/samples/ocl/clahe.cpp
@ -45,6 +45,10 @@ int main(int argc, char** argv)
    namedWindow("CLAHE");
    createTrackbar("Tile Size", "CLAHE", &tilesize, 32, (TrackbarCallback)TSize_Callback);
    createTrackbar("Clip Limit", "CLAHE", &cliplimit, 20, (TrackbarCallback)Clip_Callback);
+
+    vector<ocl::Info> info;
+    CV_Assert(ocl::getDevice(info));
+
    Mat frame, outframe;
    ocl::oclMat d_outframe;

--- a/samples/python2/facerec_demo.py
+++ b/samples/python2/facerec_demo.py
@ -31,6 +31,11 @@
 # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.

+# ------------------------------------------------------------------------------------------------
+# Note:
+# When using the FaceRecognizer interface in combination with Python, please stick to Python 2.
+# Some underlying scripts like create_csv will not work in other versions, like Python 3.
+# ------------------------------------------------------------------------------------------------

 import os
 import sys