Merge branch '2.4'

12 years ago · 0b8a6da817
parent 0ccdc5b4af e6dd4e840d
commit 0b8a6da817
47 changed files with 670 additions and 377 deletions
--- a/3rdparty/ffmpeg/opencv_ffmpeg.dll
+++ b/3rdparty/ffmpeg/opencv_ffmpeg.dll
--- a/3rdparty/ffmpeg/opencv_ffmpeg_64.dll
+++ b/3rdparty/ffmpeg/opencv_ffmpeg_64.dll
--- a/android/service/doc/BaseLoaderCallback.rst
+++ b/android/service/doc/BaseLoaderCallback.rst
@ -48,7 +48,7 @@ See the "15-puzzle" OpenCV sample for details.
        super.onResume();

        Log.i(TAG, "Trying to load OpenCV library");
-        if (!OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_3, this, mOpenCVCallBack))
+        if (!OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_4, this, mOpenCVCallBack))
        {
            Log.e(TAG, "Cannot connect to OpenCV Manager");
        }
--- a/android/service/doc/JavaHelper.rst
+++ b/android/service/doc/JavaHelper.rst
@ -47,3 +47,7 @@ OpenCV version constants
 .. data:: OPENCV_VERSION_2_4_3

    OpenCV Library version 2.4.3
+
+.. data:: OPENCV_VERSION_2_4_4
+
+    OpenCV Library version 2.4.4
--- a/android/service/engine/AndroidManifest.xml
+++ b/android/service/engine/AndroidManifest.xml
@ -1,8 +1,8 @@
 <?xml version="1.0" encoding="utf-8"?>
 <manifest xmlns:android="http://schemas.android.com/apk/res/android"
    package="org.opencv.engine"
-    android:versionCode="25@ANDROID_PLATFORM_VERSION_CODE@"
-    android:versionName="2.5" >
+    android:versionCode="26@ANDROID_PLATFORM_VERSION_CODE@"
+    android:versionName="2.6" >

    <uses-sdk android:minSdkVersion="@ANDROID_NATIVE_API_LEVEL@" />
    <uses-feature android:name="android.hardware.touchscreen" android:required="false"/>
--- a/android/service/readme.txt
+++ b/android/service/readme.txt
@ -14,20 +14,20 @@ manually using adb tool:

 .. code-block:: sh

-    adb install OpenCV-2.4.3-android-sdk/apk/OpenCV_2.4.3.2_Manager_2.4_<platform>.apk
+    adb install OpenCV-2.4.4-android-sdk/apk/OpenCV_2.4.4_Manager_2.6_<platform>.apk

 Use the table below to determine proper OpenCV Manager package for your device:

-+------------------------------+--------------+-----------------------------------------------------+
-| Hardware Platform            | Android ver. | Package name                                        |
-+==============================+==============+=====================================================+
-| armeabi-v7a (ARMv7-A + NEON) |    >= 2.3    | OpenCV_2.4.3.2_Manager_2.4_armv7a-neon.apk          |
-+------------------------------+--------------+-----------------------------------------------------+
-| armeabi-v7a (ARMv7-A + NEON) |     = 2.2    | OpenCV_2.4.3.2_Manager_2.4_armv7a-neon-android8.apk |
-+------------------------------+--------------+-----------------------------------------------------+
-| armeabi (ARMv5, ARMv6)       |    >= 2.3    | OpenCV_2.4.3.2_Manager_2.4_armeabi.apk              |
-+------------------------------+--------------+-----------------------------------------------------+
-| Intel x86                    |    >= 2.3    | OpenCV_2.4.3.2_Manager_2.4_x86.apk                  |
-+------------------------------+--------------+-----------------------------------------------------+
-| MIPS                         |    >= 2.3    | OpenCV_2.4.3.2_Manager_2.4_mips.apk                 |
-+------------------------------+--------------+-----------------------------------------------------+
+------------------------------+--------------+---------------------------------------------------+
+| Hardware Platform            | Android ver. | Package name                                      |
+==============================+==============+===================================================+
+| armeabi-v7a (ARMv7-A + NEON) |    >= 2.3    | OpenCV_2.4.4_Manager_2.6_armv7a-neon.apk          |
+------------------------------+--------------+---------------------------------------------------+
+| armeabi-v7a (ARMv7-A + NEON) |     = 2.2    | OpenCV_2.4.4_Manager_2.6_armv7a-neon-android8.apk |
+------------------------------+--------------+---------------------------------------------------+
+| armeabi (ARMv5, ARMv6)       |    >= 2.3    | OpenCV_2.4.4_Manager_2.6_armeabi.apk              |
+------------------------------+--------------+---------------------------------------------------+
+| Intel x86                    |    >= 2.3    | OpenCV_2.4.4_Manager_2.6_x86.apk                  |
+------------------------------+--------------+---------------------------------------------------+
+| MIPS                         |    >= 2.3    | OpenCV_2.4.4_Manager_2.6_mips.apk                 |
+------------------------------+--------------+---------------------------------------------------+
--- a/cmake/OpenCVDetectOpenCL.cmake
+++ b/cmake/OpenCVDetectOpenCL.cmake
@ -81,6 +81,7 @@ else()
    set(ENV_AMDAPPSDKROOT $ENV{AMDAPPSDKROOT})
    set(ENV_OPENCLROOT $ENV{OPENCLROOT})
    set(ENV_CUDA_PATH $ENV{CUDA_PATH})
+    set(ENV_INTELOCLSDKROOT $ENV{INTELOCLSDKROOT})
    if(ENV_AMDSTREAMSDKROOT)
      set(OPENCL_INCLUDE_SEARCH_PATH ${ENV_AMDAPPSDKROOT}/include)
      if(CMAKE_SIZEOF_VOID_P EQUAL 4)
@ -109,6 +110,13 @@ else()
      else()
        set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} /usr/lib64)
      endif()
+    elseif(ENV_INTELOCLSDKROOT)
+      set(OPENCL_INCLUDE_SEARCH_PATH ${ENV_INTELOCLSDKROOT}/include)
+      if(CMAKE_SIZEOF_VOID_P EQUAL 4)
+        set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} ${ENV_INTELOCLSDKROOT}/lib/x86)
+      else()
+        set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} ${ENV_INTELOCLSDKROOT}/lib/x64)
+      endif()
    endif()

    if(OPENCL_INCLUDE_SEARCH_PATH)
--- a/cmake/OpenCVGenAndroidMK.cmake
+++ b/cmake/OpenCVGenAndroidMK.cmake
@ -20,7 +20,7 @@ if(ANDROID)
  endif()

  # setup lists of camera libs
-  foreach(abi ARMEABI ARMEABI_V7A X86)
+  foreach(abi ARMEABI ARMEABI_V7A X86 MIPS)
    ANDROID_GET_ABI_RAWNAME(${abi} ndkabi)
    if(BUILD_ANDROID_CAMERA_WRAPPER)
      if(ndkabi STREQUAL ANDROID_NDK_ABI_NAME)
--- a/cmake/templates/OpenCV.mk.in
+++ b/cmake/templates/OpenCV.mk.in
@ -57,6 +57,9 @@ ifeq (${OPENCV_CAMERA_MODULES},on)
    ifeq ($(TARGET_ARCH_ABI),x86)
        OPENCV_CAMERA_MODULES:=@OPENCV_CAMERA_LIBS_X86_CONFIGCMAKE@
    endif
+    ifeq ($(TARGET_ARCH_ABI),mips)
+        OPENCV_CAMERA_MODULES:=@OPENCV_CAMERA_LIBS_MIPS_CONFIGCMAKE@
+    endif
 else
    OPENCV_CAMERA_MODULES:=
 endif
--- a/doc/tutorials/definitions/tocDefinitions.rst
+++ b/doc/tutorials/definitions/tocDefinitions.rst
@ -10,3 +10,4 @@
 .. |Author_AlexB| unicode:: Alexandre U+0020 Benoit
 .. |Author_EricCh| unicode:: Eric U+0020 Christiansen
 .. |Author_AndreyP| unicode:: Andrey U+0020 Pavlenko
+.. |Author_AlexS| unicode:: Alexander U+0020 Smorkalov
--- a/doc/tutorials/introduction/crosscompilation/arm_crosscompile_with_cmake.rst
+++ b/doc/tutorials/introduction/crosscompilation/arm_crosscompile_with_cmake.rst
@ -0,0 +1,115 @@
+
+.. _ARM-Linux-cross-compile:
+
+Cross compilation for ARM based Linux systems
+*********************************************
+
+This steps are tested on Ubuntu Linux 12.04, but should work for other Linux distributions.
+I case of other distributions package names and names of cross compilation tools may differ.
+There are several popular EABI versions that are used on ARM platform. This tutorial is
+written for *gnueabi* and *gnueabihf*, but other variants should work with minimal changes.
+
+
+Prerequisites
+=============
+
+  * Host computer with Linux;
+  * Git;
+  * CMake 2.6 or higher;
+  * Cross compilation tools for ARM: gcc, libstc++, etc. Depending on target platform you need
+    to choose *gnueabi* or *gnueabihf* tools.
+    Install command for *gnueabi*:
+
+    .. code-block:: bash
+
+       sudo apt-get install gcc-arm-linux-gnueabi
+
+    Install command for *gnueabihf*:
+
+    .. code-block:: bash
+
+       sudo apt-get install gcc-arm-linux-gnueabihf
+
+  * pkgconfig;
+  * Python 2.6 for host system;
+  * [optional] ffmpeg or libav development packages for armeabi(hf): libavcodec-dev, libavformat-dev, libswscale-dev;
+  * [optional] GTK+2.x or higher, including headers (libgtk2.0-dev) for armeabi(hf);
+  * [optional] libdc1394 2.x;
+  * [optional] libjpeg-dev, libpng-dev, libtiff-dev, libjasper-dev for armeabi(hf).
+
+
+Getting OpenCV Source Code
+==========================
+
+You can use the latest stable OpenCV version available in *sourceforge* or you can grab the latest
+snapshot from our `Git repository <https://github.com/Itseez/opencv.git>`_.
+
+
+Getting the Latest Stable OpenCV Version
+----------------------------------------
+
+* Go to our `page on Sourceforge <http://sourceforge.net/projects/opencvlibrary>`_;
+
+* Download the source tarball and unpack it.
+
+
+Getting the Cutting-edge OpenCV from the Git Repository
+-------------------------------------------------------
+
+Launch Git client and clone `OpenCV repository <http://github.com/itseez/opencv>`_
+
+In Linux it can be achieved with the following command in Terminal:
+
+.. code-block:: bash
+
+   cd ~/<my_working _directory>
+   git clone https://github.com/Itseez/opencv.git
+
+
+Building OpenCV
+===============
+
+#. Create a build directory, make it current and run the following command:
+
+   .. code-block:: bash
+
+      cmake [<some optional parameters>] -DCMAKE_TOOLCHAIN_FILE=<path to the OpenCV source directory>/platforms/linux/arm-gnueabi.toolchain.cmake <path to the OpenCV source directory>
+
+   Toolchain uses *gnueabihf* EABI convention by default. Add ``-DSOFTFP=ON`` cmake argument to switch on softfp compiler.
+
+   .. code-block:: bash
+
+      cmake [<some optional parameters>] -DSOFTFP=ON -DCMAKE_TOOLCHAIN_FILE=<path to the OpenCV source directory>/platforms/linux/arm-gnueabi.toolchain.cmake <path to the OpenCV source directory>
+
+   For example:
+
+   .. code-block:: bash
+
+      cd ~/opencv/platforms/linux
+      mkdir -p build_hardfp
+      cd build_hardfp
+
+      cmake -DCMAKE_TOOLCHAIN_FILE=../arm-gnueabi.toolchain.cmake ../../..
+
+#. Run make in build (<cmake_binary_dir>) directory:
+
+   .. code-block:: bash
+
+      make
+
+.. note::
+
+   Optionally you can strip symbols info from the created library via install/strip make target.
+   This option produces smaller binary (~ twice smaller) but makes further debugging harder.
+
+Enable hardware optimizations
+-----------------------------
+
+Depending on target platfrom architecture different instruction sets can be used. By default
+compiler generates code for armv5l without VFPv3 and NEON extensions. Add ``-DUSE_VFPV3=ON``
+to cmake command line to enable code generation for VFPv3 and ``-DUSE_NEON=ON`` for using
+NEON SIMD extensions.
+
+TBB is supported on multi core ARM SoCs also.
+Add ``-DWITH_TBB=ON`` and ``-DBUILD_TBB=ON`` to enable it. Cmake scripts download TBB sources
+from official project site `<http://threadingbuildingblocks.org/>`_ and build it.
--- a/doc/tutorials/introduction/table_of_content_introduction/table_of_content_introduction.rst
+++ b/doc/tutorials/introduction/table_of_content_introduction/table_of_content_introduction.rst
@ -3,7 +3,9 @@
 Introduction to OpenCV
 -----------------------------------------------------------

-Here you can read tutorials about how to set up your computer to work with the OpenCV library. Additionaly you can find a few very basic sample source code that will let introduce you to the world of the OpenCV.
+Here you can read tutorials about how to set up your computer to work with the OpenCV library.
+Additionally you can find a few very basic sample source code that will let introduce you to the
+world of the OpenCV.

 .. include:: ../../definitions/tocDefinitions.rst

@ -189,6 +191,24 @@ Here you can read tutorials about how to set up your computer to work with the O
     .. |Install_iOS| image:: images/opencv_ios.png
                 :width:  90pt

+* **Embedded Linux**
+
+  .. tabularcolumns:: m{100pt} m{300pt}
+  .. cssclass:: toctableopencv
+
+  =========== ======================================================
+  |Usage_1|   **Title:** :ref:`ARM-Linux-cross-compile`
+
+              *Compatibility:* > OpenCV 2.4.4
+
+              *Author:* |Author_AlexS|
+
+              We will learn how to setup OpenCV cross compilation environment for ARM Linux.
+
+  =========== ======================================================
+
+* **Common**
+
  .. tabularcolumns:: m{100pt} m{300pt}
  .. cssclass:: toctableopencv

@ -249,7 +269,7 @@ Here you can read tutorials about how to set up your computer to work with the O

   \pagebreak

-.. We use a custom table of content format and as the table of content only imforms Sphinx about the hierarchy of the files, no need to show it.
+.. We use a custom table of content format and as the table of content only informs Sphinx about the hierarchy of the files, no need to show it.
 .. toctree::
   :hidden:

@ -263,6 +283,7 @@ Here you can read tutorials about how to set up your computer to work with the O
   ../android_binary_package/O4A_SDK
   ../android_binary_package/dev_with_OCV_on_Android
   ../ios_install/ios_install
+   ../crosscompilation/arm_crosscompile_with_cmake
   ../display_image/display_image
   ../load_save_image/load_save_image
   ../how_to_write_a_tutorial/how_to_write_a_tutorial
--- a/modules/core/doc/basic_structures.rst
+++ b/modules/core/doc/basic_structures.rst
@ -238,7 +238,7 @@ The constructors.
    :param epsilon: The desired accuracy or change in parameters at which the iterative algorithm stops.

    :param criteria: Termination criteria in the deprecated ``CvTermCriteria`` format.
-   
+
 TermCriteria::operator CvTermCriteria
 -------------------------------------
 Converts to the deprecated ``CvTermCriteria`` format.
@ -418,27 +418,47 @@ Template class for smart reference-counting pointers ::
    };


-The ``Ptr<_Tp>`` class is a template class that wraps pointers of the corresponding type. It is similar to ``shared_ptr`` that is part of the Boost library (
-http://www.boost.org/doc/libs/1_40_0/libs/smart_ptr/shared_ptr.htm
-) and also part of the `C++0x <http://en.wikipedia.org/wiki/C++0x>`_
-standard.
+The ``Ptr<_Tp>`` class is a template class that wraps pointers of the corresponding type. It is
+similar to ``shared_ptr`` that is part of the Boost library
+(http://www.boost.org/doc/libs/1_40_0/libs/smart_ptr/shared_ptr.htm) and also part of the
+`C++0x <http://en.wikipedia.org/wiki/C++0x>`_ standard.

 This class provides the following options:

 *
-    Default constructor, copy constructor, and assignment operator for an arbitrary C++ class or a C structure. For some objects, like files, windows, mutexes, sockets, and others, a copy constructor or an assignment operator are difficult to define. For some other objects, like complex classifiers in OpenCV, copy constructors are absent and not easy to implement. Finally, some of complex OpenCV and your own data structures may be written in C. However, copy constructors and default constructors can simplify programming a lot. Besides, they are often required (for example, by STL containers). By wrapping a pointer to such a complex object ``TObj``     to ``Ptr<TObj>`` , you automatically get all of the necessary constructors and the assignment operator.
+    Default constructor, copy constructor, and assignment operator for an arbitrary C++ class
+    or a C structure. For some objects, like files, windows, mutexes, sockets, and others, a copy
+    constructor or an assignment operator are difficult to define. For some other objects, like
+    complex classifiers in OpenCV, copy constructors are absent and not easy to implement. Finally,
+    some of complex OpenCV and your own data structures may be written in C.
+    However, copy constructors and default constructors can simplify programming a lot.Besides,
+    they are often required (for example, by STL containers). By wrapping a pointer to such a
+    complex object ``TObj`` to ``Ptr<TObj>``, you automatically get all of the necessary
+    constructors and the assignment operator.

 *
-    *O(1)* complexity of the above-mentioned operations. While some structures, like ``std::vector``,   provide a copy constructor and an assignment operator, the operations may take a considerable amount of time if the data structures are large. But if the structures are put into ``Ptr<>``     , the overhead is small and independent of the data size.
+    *O(1)* complexity of the above-mentioned operations. While some structures, like ``std::vector``,
+    provide a copy constructor and an assignment operator, the operations may take a considerable
+    amount of time if the data structures are large. But if the structures are put into ``Ptr<>``,
+    the overhead is small and independent of the data size.

 *
-    Automatic destruction, even for C structures. See the example below with ``FILE*``     .
+    Automatic destruction, even for C structures. See the example below with ``FILE*``.

 *
-    Heterogeneous collections of objects. The standard STL and most other C++ and OpenCV containers can store only objects of the same type and the same size. The classical solution to store objects of different types in the same container is to store pointers to the base class ``base_class_t*``     instead but then you loose the automatic memory management. Again, by using ``Ptr<base_class_t>()``     instead of the raw pointers, you can solve the problem.
-
-The ``Ptr`` class treats the wrapped object as a black box. The reference counter is allocated and managed separately. The only thing the pointer class needs to know about the object is how to deallocate it. This knowledge is encapsulated in the ``Ptr::delete_obj()`` method that is called when the reference counter becomes 0. If the object is a C++ class instance, no additional coding is needed, because the default implementation of this method calls ``delete obj;`` .
-However, if the object is deallocated in a different way, the specialized method should be created. For example, if you want to wrap ``FILE`` , the ``delete_obj`` may be implemented as follows: ::
+    Heterogeneous collections of objects. The standard STL and most other C++ and OpenCV containers
+    can store only objects of the same type and the same size. The classical solution to store objects
+    of different types in the same container is to store pointers to the base class ``base_class_t*``
+    instead but then you loose the automatic memory management. Again, by using ``Ptr<base_class_t>()``
+    instead of the raw pointers, you can solve the problem.
+
+The ``Ptr`` class treats the wrapped object as a black box. The reference counter is allocated and
+managed separately. The only thing the pointer class needs to know about the object is how to
+deallocate it. This knowledge is encapsulated in the ``Ptr::delete_obj()`` method that is called when
+the reference counter becomes 0. If the object is a C++ class instance, no additional coding is
+needed, because the default implementation of this method calls ``delete obj;``. However, if the
+object is deallocated in a different way, the specialized method should be created. For example,
+if you want to wrap ``FILE``, the ``delete_obj`` may be implemented as follows: ::

    template<> inline void Ptr<FILE>::delete_obj()
    {
@ -456,7 +476,73 @@ However, if the object is deallocated in a different way, the specialized method
    // the file will be closed automatically by the Ptr<FILE> destructor.


-.. note:: The reference increment/decrement operations are implemented as atomic operations, and therefore it is normally safe to use the classes in multi-threaded applications. The same is true for :ocv:class:`Mat` and other C++ OpenCV classes that operate on the reference counters.
+.. note:: The reference increment/decrement operations are implemented as atomic operations,
+          and therefore it is normally safe to use the classes in multi-threaded applications.
+          The same is true for :ocv:class:`Mat` and other C++ OpenCV classes that operate on
+          the reference counters.
+
+Ptr::Ptr
+--------
+Various Ptr constructors.
+
+.. ocv:function:: Ptr::Ptr()
+.. ocv:function:: Ptr::Ptr(_Tp* _obj)
+.. ocv:function:: Ptr::Ptr(const Ptr& ptr)
+
+Ptr::~Ptr
+---------
+The Ptr destructor.
+
+.. ocv:function:: Ptr::~Ptr()
+
+Ptr::operator =
+----------------
+Assignment operator.
+
+.. ocv:function:: Ptr& Ptr::operator = (const Ptr& ptr)
+
+Decrements own reference counter (with ``release()``) and increments ptr's reference counter.
+
+Ptr::addref
+-----------
+Increments reference counter.
+
+.. ocv:function:: void Ptr::addref()
+
+Ptr::release
+------------
+Decrements reference counter; when it becomes 0, ``delete_obj()`` is called.
+
+.. ocv:function:: void Ptr::release()
+
+Ptr::delete_obj
+---------------
+User-specified custom object deletion operation. By default, ``delete obj;`` is called.
+
+.. ocv:function:: void Ptr::delete_obj()
+
+Ptr::empty
+----------
+Returns true if obj == 0;
+
+bool empty() const;
+
+Ptr::operator ->
+----------------
+Provide access to the object fields and methods.
+
+ .. ocv:function:: template<typename _Tp> _Tp* Ptr::operator -> ()
+ .. ocv:function:: template<typename _Tp> const _Tp* Ptr::operator -> () const
+
+
+Ptr::operator _Tp*
+------------------
+Returns the underlying object pointer. Thanks to the methods, the ``Ptr<_Tp>`` can be used instead
+of ``_Tp*``.
+
+ .. ocv:function:: template<typename _Tp> Ptr::operator _Tp* ()
+ .. ocv:function:: template<typename _Tp> Ptr::operator const _Tp*() const
+

 Mat
 ---
@ -494,9 +580,9 @@ OpenCV C++ n-dimensional dense array class ::


 The class ``Mat`` represents an n-dimensional dense numerical single-channel or multi-channel array. It can be used to store real or complex-valued vectors and matrices, grayscale or color images, voxel volumes, vector fields, point clouds, tensors, histograms (though, very high-dimensional histograms may be better stored in a ``SparseMat`` ). The data layout of the array
-:math:`M` is defined by the array ``M.step[]`` , so that the address of element
-:math:`(i_0,...,i_{M.dims-1})` , where
-:math:`0\leq i_k<M.size[k]` , is computed as:
+:math:`M` is defined by the array ``M.step[]``, so that the address of element
+:math:`(i_0,...,i_{M.dims-1})`, where
+:math:`0\leq i_k<M.size[k]`, is computed as:

 .. math::

@ -529,7 +615,7 @@ There are many different ways to create a ``Mat`` object. The most popular optio

    ..

-    As noted in the introduction to this chapter, ``create()``      allocates only  a new array when the shape or type of the current array are different from the specified ones.
+    As noted in the introduction to this chapter, ``create()`` allocates only  a new array when the shape or type of the current array are different from the specified ones.

 *

@ -543,7 +629,7 @@ There are many different ways to create a ``Mat`` object. The most popular optio

    ..

-    It passes the number of dimensions =1 to the ``Mat``     constructor but the created array will be 2-dimensional with the number of columns set to 1. So, ``Mat::dims``     is always >= 2 (can also be 0 when the array is empty).
+    It passes the number of dimensions =1 to the ``Mat`` constructor but the created array will be 2-dimensional with the number of columns set to 1. So, ``Mat::dims``     is always >= 2 (can also be 0 when the array is empty).

 *

@ -573,7 +659,7 @@ There are many different ways to create a ``Mat`` object. The most popular optio

    ..

-    Due to the additional ``datastart``     and ``dataend``     members, it is possible to compute a relative sub-array position in the main *container* array using ``locateROI()``:
+    Due to the additional ``datastart`` and ``dataend`` members, it is possible to compute a relative sub-array position in the main *container* array using ``locateROI()``:

    ::

@ -589,7 +675,7 @@ There are many different ways to create a ``Mat`` object. The most popular optio

    ..

-    As in case of whole matrices, if you need a deep copy, use the ``clone()``     method of the extracted sub-matrices.
+    As in case of whole matrices, if you need a deep copy, use the ``clone()`` method of the extracted sub-matrices.

 *

@ -619,7 +705,7 @@ There are many different ways to create a ``Mat`` object. The most popular optio

        ..

-    Partial yet very common cases of this *user-allocated data* case are conversions from ``CvMat``     and ``IplImage`` to ``Mat``. For this purpose, there are special constructors taking pointers to ``CvMat``     or ``IplImage`` and the optional flag indicating whether to copy the data or not.
+    Partial yet very common cases of this *user-allocated data* case are conversions from ``CvMat`` and ``IplImage`` to ``Mat``. For this purpose, there are special constructors taking pointers to ``CvMat``     or ``IplImage`` and the optional flag indicating whether to copy the data or not.

        Backward conversion from ``Mat`` to ``CvMat`` or ``IplImage`` is provided via cast operators ``Mat::operator CvMat() const`` and ``Mat::operator IplImage()``. The operators do NOT copy the data.

@ -905,7 +991,7 @@ Provides matrix assignment operators.

    :param m: Assigned, right-hand-side matrix. Matrix assignment is an O(1) operation. This means that no data is copied but the data is shared and the reference counter, if any, is incremented. Before assigning new data, the old data is de-referenced via  :ocv:func:`Mat::release` .

-    :param expr: Assigned matrix expression object. As opposite to the first form of the assignment operation, the second form can reuse already allocated matrix if it has the right size and type to fit the matrix expression result. It is automatically handled by the real function that the matrix expressions is expanded to. For example,  ``C=A+B``  is expanded to  ``add(A, B, C)`` , and  :func:`add`  takes care of automatic  ``C``  reallocation.
+    :param expr: Assigned matrix expression object. As opposite to the first form of the assignment operation, the second form can reuse already allocated matrix if it has the right size and type to fit the matrix expression result. It is automatically handled by the real function that the matrix expressions is expanded to. For example,  ``C=A+B``  is expanded to  ``add(A, B, C)``, and  :func:`add`  takes care of automatic  ``C``  reallocation.

    :param s: Scalar assigned to each matrix element. The matrix size or type is not changed.

@ -970,7 +1056,7 @@ Creates a matrix header for the specified row span.

    :param endrow: An exclusive 0-based ending index of the row span.

-    :param r: :ocv:class:`Range`  structure containing both the start and the end indices.
+    :param r: :ocv:class:`Range` structure containing both the start and the end indices.

 The method makes a new header for the specified row span of the matrix. Similarly to
 :ocv:func:`Mat::row` and
@ -1983,7 +2069,7 @@ The class ``SparseMat`` represents multi-dimensional sparse numerical arrays. Su
 :ocv:class:`Mat` can store. *Sparse* means that only non-zero elements are stored (though, as a result of operations on a sparse matrix, some of its stored elements can actually become 0. It is up to you to detect such elements and delete them using ``SparseMat::erase`` ). The non-zero elements are stored in a hash table that grows when it is filled so that the search time is O(1) in average (regardless of whether element is there or not). Elements can be accessed using the following methods:

 *
-    Query operations ( ``SparseMat::ptr``     and the higher-level ``SparseMat::ref``,    ``SparseMat::value``     and ``SparseMat::find``     ), for example:
+    Query operations (``SparseMat::ptr`` and the higher-level ``SparseMat::ref``, ``SparseMat::value`` and ``SparseMat::find``), for example:

    ::

@ -2001,7 +2087,7 @@ The class ``SparseMat`` represents multi-dimensional sparse numerical arrays. Su
    ..

 *
-    Sparse matrix iterators. They are similar to ``MatIterator`` but different from :ocv:class:`NAryMatIterator`.     That is, the iteration loop is familiar to STL users:
+    Sparse matrix iterators. They are similar to ``MatIterator`` but different from :ocv:class:`NAryMatIterator`. That is, the iteration loop is familiar to STL users:

    ::

--- a/modules/core/include/opencv2/core/gpumat.hpp
+++ b/modules/core/include/opencv2/core/gpumat.hpp
@ -73,12 +73,16 @@ namespace cv { namespace gpu
        FEATURE_SET_COMPUTE_20 = 20,
        FEATURE_SET_COMPUTE_21 = 21,
        FEATURE_SET_COMPUTE_30 = 30,
+        FEATURE_SET_COMPUTE_35 = 35,
+
        GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
        SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
        NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13,
-        WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30
+        WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30,
+        DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
    };

+    // Checks whether current device supports the given feature
    CV_EXPORTS bool deviceSupports(FeatureSet feature_set);

    // Gives information about what GPU archs this OpenCV GPU module was
@ -116,8 +120,9 @@ namespace cv { namespace gpu

        int multiProcessorCount() const { return multi_processor_count_; }

-        size_t sharedMemPerBlock() const { return sharedMemPerBlock_; }
+        size_t sharedMemPerBlock() const;

+        void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
        size_t freeMemory() const;
        size_t totalMemory() const;

@ -131,7 +136,6 @@ namespace cv { namespace gpu

    private:
        void query();
-        void queryMemory(size_t& free_memory, size_t& total_memory) const;

        int device_id_;

@ -139,7 +143,6 @@ namespace cv { namespace gpu
        int multi_processor_count_;
        int majorVersion_;
        int minorVersion_;
-        size_t sharedMemPerBlock_;
    };

    CV_EXPORTS void printCudaDeviceInfo(int device);
@ -546,13 +549,6 @@ namespace cv { namespace gpu
    {
        ensureSizeIsEnough(size.height, size.width, type, m);
    }
-
-    inline GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat &mat)
-    {
-        if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols)
-            return mat(Rect(0, 0, cols, rows));
-        return mat = GpuMat(rows, cols, type);
-    }
 }}

 #endif // __cplusplus
--- a/modules/core/perf/perf_compare.cpp
+++ b/modules/core/perf/perf_compare.cpp
@ -52,7 +52,8 @@ PERF_TEST_P( Size_MatType_CmpType, compareScalar,

    declare.in(src1, src2, WARMUP_RNG).out(dst);

-    TEST_CYCLE() cv::compare(src1, src2, dst, cmpType);
+    int runs = (sz.width <= 640) ? 8 : 1;
+    TEST_CYCLE_MULTIRUN(runs) cv::compare(src1, src2, dst, cmpType);

    SANITY_CHECK(dst);
 }
--- a/modules/core/perf/perf_convertTo.cpp
+++ b/modules/core/perf/perf_convertTo.cpp
@ -29,9 +29,9 @@ PERF_TEST_P( Size_DepthSrc_DepthDst_Channels_alpha, convertTo,
    Mat src(sz, CV_MAKETYPE(depthSrc, channels));
    randu(src, 0, 255);
    Mat dst(sz, CV_MAKETYPE(depthDst, channels));
-    declare.iterations(500);

-    TEST_CYCLE() src.convertTo(dst, depthDst, alpha);
+    int runs = (sz.width <= 640) ? 8 : 1;
+    TEST_CYCLE_MULTIRUN(runs) src.convertTo(dst, depthDst, alpha);

    SANITY_CHECK(dst, alpha == 1.0 ? 1e-12 : 1e-7);
 }
--- a/modules/core/perf/perf_mat.cpp
+++ b/modules/core/perf/perf_mat.cpp
@ -18,7 +18,8 @@ PERF_TEST_P(Size_MatType, Mat_Eye,

    declare.out(diagonalMatrix);

-    TEST_CYCLE()
+    int runs = (size.width <= 640) ? 15 : 5;
+    TEST_CYCLE_MULTIRUN(runs)
    {
        diagonalMatrix = Mat::eye(size, type);
    }
@ -38,7 +39,8 @@ PERF_TEST_P(Size_MatType, Mat_Zeros,

    declare.out(zeroMatrix);

-    TEST_CYCLE()
+    int runs = (size.width <= 640) ? 15 : 5;
+    TEST_CYCLE_MULTIRUN(runs)
    {
        zeroMatrix = Mat::zeros(size, type);
    }
--- a/modules/core/perf/perf_merge.cpp
+++ b/modules/core/perf/perf_merge.cpp
@ -30,7 +30,8 @@ PERF_TEST_P( Size_SrcDepth_DstChannels, merge,
    }

    Mat dst;
-    TEST_CYCLE() merge( (vector<Mat> &)mv, dst );
+    int runs = (sz.width <= 640) ? 8 : 1;
+    TEST_CYCLE_MULTIRUN(runs) merge( (vector<Mat> &)mv, dst );

    SANITY_CHECK(dst, 1e-12);
 }
--- a/modules/core/perf/perf_split.cpp
+++ b/modules/core/perf/perf_split.cpp
@ -26,8 +26,8 @@ PERF_TEST_P( Size_Depth_Channels, split,
    randu(m, 0, 255);

    vector<Mat> mv;
-
-    TEST_CYCLE() split(m, (vector<Mat>&)mv);
+    int runs = (sz.width <= 640) ? 8 : 1;
+    TEST_CYCLE_MULTIRUN(runs) split(m, (vector<Mat>&)mv);

    SANITY_CHECK(mv, 1e-12);
 }
--- a/modules/core/perf/perf_stat.cpp
+++ b/modules/core/perf/perf_stat.cpp
@ -97,7 +97,8 @@ PERF_TEST_P(Size_MatType, countNonZero, testing::Combine( testing::Values( TYPIC

    declare.in(src, WARMUP_RNG);

-    TEST_CYCLE() cnt = countNonZero(src);
+    int runs = (sz.width <= 640) ? 8 : 1;
+    TEST_CYCLE_MULTIRUN(runs) cnt = countNonZero(src);

    SANITY_CHECK(cnt);
 }
--- a/modules/core/src/gpumat.cpp
+++ b/modules/core/src/gpumat.cpp
@ -48,8 +48,8 @@
    #include <cuda_runtime.h>
    #include <npp.h>

-    #define CUDART_MINIMUM_REQUIRED_VERSION 4010
-    #define NPP_MINIMUM_REQUIRED_VERSION 4100
+    #define CUDART_MINIMUM_REQUIRED_VERSION 4020
+    #define NPP_MINIMUM_REQUIRED_VERSION 4200

    #if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
        #error "Insufficient Cuda Runtime library version, please update it."
--- a/modules/gpu/doc/feature_detection_and_description.rst
+++ b/modules/gpu/doc/feature_detection_and_description.rst
@ -640,4 +640,3 @@ Converts matrices obtained via :ocv:func:`gpu::BFMatcher_GPU::radiusMatchSingle`
 .. ocv:function:: void gpu::BFMatcher_GPU::radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches, std::vector< std::vector<DMatch> >& matches, bool compactResult = false)

 If ``compactResult`` is ``true`` , the ``matches`` vector does not contain matches for fully masked-out query descriptors.
-
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@ -1507,6 +1507,7 @@ public:

    /* returns number of detected objects */
    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor = 1.2, int minNeighbors = 4, Size minSize = Size());
+    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4);

    bool findLargestObject;
    bool visualizeInPlace;
@ -1519,9 +1520,6 @@ private:
    struct HaarCascade;
    struct LbpCascade;
    friend class CascadeClassifier_GPU_LBP;
-
-public:
-    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4);
 };

 // ======================== GPU version for soft cascade ===================== //
--- a/modules/gpu/src/cascadeclassifier.cpp
+++ b/modules/gpu/src/cascadeclassifier.cpp
@ -778,6 +778,8 @@ NCVStatus loadFromXML(const std::string &filename,
    haar.bNeedsTiltedII = false;
    Ncv32u curMaxTreeDepth;

+    std::vector<char> xmlFileCont;
+
    std::vector<HaarClassifierNode128> h_TmpClassifierNotRootNodes;
    haarStages.resize(0);
    haarClassifierNodes.resize(0);
--- a/modules/gpu/src/hough.cpp
+++ b/modules/gpu/src/hough.cpp
@ -121,9 +121,7 @@ void cv::gpu::HoughLines(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf, f
    buf.accum.setTo(Scalar::all(0));

    DeviceInfo devInfo;
-    cudaDeviceProp prop;
-    cudaSafeCall(cudaGetDeviceProperties(&prop, devInfo.deviceID()));
-    linesAccum_gpu(srcPoints, pointsCount, buf.accum, rho, theta, prop.sharedMemPerBlock, devInfo.supports(FEATURE_SET_COMPUTE_20));
+    linesAccum_gpu(srcPoints, pointsCount, buf.accum, rho, theta, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20));

    ensureSizeIsEnough(2, maxLines, CV_32FC2, lines);

@ -196,9 +194,7 @@ void cv::gpu::HoughLinesP(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf,
    buf.accum.setTo(Scalar::all(0));

    DeviceInfo devInfo;
-    cudaDeviceProp prop;
-    cudaSafeCall(cudaGetDeviceProperties(&prop, devInfo.deviceID()));
-    linesAccum_gpu(srcPoints, pointsCount, buf.accum, rho, theta, prop.sharedMemPerBlock, devInfo.supports(FEATURE_SET_COMPUTE_20));
+    linesAccum_gpu(srcPoints, pointsCount, buf.accum, rho, theta, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20));

    ensureSizeIsEnough(1, maxLines, CV_32SC4, lines);

--- a/modules/highgui/include/opencv2/highgui/highgui.hpp
+++ b/modules/highgui/include/opencv2/highgui/highgui.hpp
@ -125,7 +125,7 @@ CV_EXPORTS_W void setTrackbarPos(const string& trackbarname, const string& winna

 // OpenGL support

-typedef void (CV_CDECL *OpenGlDrawCallback)(void* userdata);
+typedef void (*OpenGlDrawCallback)(void* userdata);
 CV_EXPORTS void setOpenGlDrawCallback(const string& winname, OpenGlDrawCallback onOpenGlDraw, void* userdata = 0);

 CV_EXPORTS void setOpenGlContext(const string& winname);
--- a/modules/highgui/src/cap_ffmpeg_impl.hpp
+++ b/modules/highgui/src/cap_ffmpeg_impl.hpp
@ -560,6 +560,10 @@ bool CvCapture_FFMPEG::open( const char* _filename )

        if( AVMEDIA_TYPE_VIDEO == enc->codec_type && video_stream < 0)
        {
+            // backup encoder' width/height
+            int enc_width = enc->width;
+            int enc_height = enc->height;
+
            AVCodec *codec = avcodec_find_decoder(enc->codec_id);
            if (!codec ||
 #if LIBAVCODEC_VERSION_INT >= ((53<<16)+(8<<8)+0)
@ -570,6 +574,10 @@ bool CvCapture_FFMPEG::open( const char* _filename )
                < 0)
                goto exit_func;

+            // checking width/height (since decoder can sometimes alter it, eg. vp6f)
+            if (enc_width && (enc->width != enc_width)) { enc->width = enc_width; }
+            if (enc_height && (enc->height != enc_height)) { enc->height = enc_height; }
+
            video_stream = i;
            video_st = ic->streams[i];
            picture = avcodec_alloc_frame();
--- a/modules/imgproc/perf/perf_cvt_color.cpp
+++ b/modules/imgproc/perf/perf_cvt_color.cpp
@ -299,10 +299,10 @@ PERF_TEST_P(Size_CvtMode2, cvtColorYUV420,
    Mat src(sz.height + sz.height / 2, sz.width, CV_8UC(ch.scn));
    Mat dst(sz, CV_8UC(ch.dcn));

-    declare.time(100);
    declare.in(src, WARMUP_RNG).out(dst);

-    TEST_CYCLE() cvtColor(src, dst, mode, ch.dcn);
+    int runs = (sz.width <= 640) ? 8 : 1;
+    TEST_CYCLE_MULTIRUN(runs) cvtColor(src, dst, mode, ch.dcn);

    SANITY_CHECK(dst, 1);
 }
--- a/modules/imgproc/perf/perf_matchTemplate.cpp
+++ b/modules/imgproc/perf/perf_matchTemplate.cpp
@ -33,7 +33,8 @@ PERF_TEST_P(ImgSize_TmplSize_Method, matchTemplateSmall,
    declare
        .in(img, WARMUP_RNG)
        .in(tmpl, WARMUP_RNG)
-        .out(result);
+        .out(result)
+        .time(30);

    TEST_CYCLE() matchTemplate(img, tmpl, result, method);

@ -66,7 +67,8 @@ PERF_TEST_P(ImgSize_TmplSize_Method, matchTemplateBig,
    declare
        .in(img, WARMUP_RNG)
        .in(tmpl, WARMUP_RNG)
-        .out(result);
+        .out(result)
+        .time(30);

    TEST_CYCLE() matchTemplate(img, tmpl, result, method);

--- a/modules/imgproc/perf/perf_threshold.cpp
+++ b/modules/imgproc/perf/perf_threshold.cpp
@ -31,9 +31,9 @@ PERF_TEST_P(Size_MatType_ThreshType, threshold,
    double maxval = theRNG().uniform(1, 254);

    declare.in(src, WARMUP_RNG).out(dst);
-    declare.iterations(500);

-    TEST_CYCLE() threshold(src, dst, thresh, maxval, threshType);
+    int runs = (sz.width <= 640) ? 8 : 1;
+    TEST_CYCLE_MULTIRUN(runs) threshold(src, dst, thresh, maxval, threshType);

    SANITY_CHECK(dst);
 }
--- a/modules/java/generator/src/java/android+OpenCVLoader.java
+++ b/modules/java/generator/src/java/android+OpenCVLoader.java
@ -17,6 +17,11 @@ public class OpenCVLoader
     */
    public static final String OPENCV_VERSION_2_4_3 = "2.4.3";

+    /**
+     * OpenCV Library version 2.4.4.
+     */
+    public static final String OPENCV_VERSION_2_4_4 = "2.4.4";
+
    /**
     * Loads and initializes OpenCV library from current application package. Roughly, it's an analog of system.loadLibrary("opencv_java").
     * @return Returns true is initialization of OpenCV was successful.
--- a/modules/ocl/include/opencv2/ocl/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl/ocl.hpp
@ -102,7 +102,7 @@ namespace cv

        //this function enable ocl module to use customized cl_context and cl_command_queue
        //getDevice also need to be called before this function
-        CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0); 
+        CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0);

        //////////////////////////////// Error handling ////////////////////////
        CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);
@ -125,6 +125,24 @@ namespace cv
            Impl *impl;
        };

+        //! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
+        CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
+                                                        const char **source, string kernelName,
+                                                        size_t globalThreads[3], size_t localThreads[3],
+                                                        std::vector< std::pair<size_t, const void *> > &args,
+                                                        int channels, int depth, const char *build_options,
+                                                        bool finish = true, bool measureKernelTime = false,
+                                                        bool cleanUp = true);
+
+        //! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
+        CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
+                                                        const char **fileName, const int numFiles, string kernelName,
+                                                        size_t globalThreads[3], size_t localThreads[3],
+                                                        std::vector< std::pair<size_t, const void *> > &args,
+                                                        int channels, int depth, const char *build_options,
+                                                        bool finish = true, bool measureKernelTime = false,
+                                                        bool cleanUp = true);
+
        class CV_EXPORTS oclMatExpr;
        //////////////////////////////// oclMat ////////////////////////////////
        class CV_EXPORTS oclMat
@ -469,21 +487,22 @@ namespace cv
        CV_EXPORTS void bitwise_xor(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());

        //! Logical operators
-        CV_EXPORTS oclMatExpr operator ~ (const oclMat &src);
-        CV_EXPORTS oclMatExpr operator | (const oclMat &src1, const oclMat &src2);
-        CV_EXPORTS oclMatExpr operator & (const oclMat &src1, const oclMat &src2);
-        CV_EXPORTS oclMatExpr operator ^ (const oclMat &src1, const oclMat &src2);
+        CV_EXPORTS oclMat operator ~ (const oclMat &);
+        CV_EXPORTS oclMat operator | (const oclMat &, const oclMat &);
+        CV_EXPORTS oclMat operator & (const oclMat &, const oclMat &);
+        CV_EXPORTS oclMat operator ^ (const oclMat &, const oclMat &);
+

        //! Mathematics operators
        CV_EXPORTS oclMatExpr operator + (const oclMat &src1, const oclMat &src2);
        CV_EXPORTS oclMatExpr operator - (const oclMat &src1, const oclMat &src2);
        CV_EXPORTS oclMatExpr operator * (const oclMat &src1, const oclMat &src2);
        CV_EXPORTS oclMatExpr operator / (const oclMat &src1, const oclMat &src2);
- 
+
        //! computes convolution of two images
        //! support only CV_32FC1 type
        CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result);
- 
+
        CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0);

        //////////////////////////////// Filter Engine ////////////////////////////////
--- a/modules/ocl/perf/perf_gemm.cpp
+++ b/modules/ocl/perf/perf_gemm.cpp
@ -109,5 +109,5 @@ TEST_P(Gemm, Performance)
 INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
                            testing::Values(CV_32FC1, CV_32FC2/* , CV_64FC1, CV_64FC2*/),
                            testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)),
-                            testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
+                            testing::Values(0, (int)cv::GEMM_1_T, (int)cv::GEMM_2_T, (int)(cv::GEMM_1_T + cv::GEMM_2_T))));
 #endif
--- a/modules/ocl/src/arithm.cpp
+++ b/modules/ocl/src/arithm.cpp
@ -2125,22 +2125,22 @@ void cv::ocl::bitwise_xor(const oclMat &src1, const Scalar &src2, oclMat &dst, c
        bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_xor_scalar);
 }

-oclMatExpr cv::ocl::operator ~ (const oclMat &src)
+oclMat cv::ocl::operator ~ (const oclMat &src)
 {
    return oclMatExpr(src, oclMat(), MAT_NOT);
 }

-oclMatExpr cv::ocl::operator | (const oclMat &src1, const oclMat &src2)
+oclMat cv::ocl::operator | (const oclMat &src1, const oclMat &src2)
 {
    return oclMatExpr(src1, src2, MAT_OR);
 }

-oclMatExpr cv::ocl::operator & (const oclMat &src1, const oclMat &src2)
+oclMat cv::ocl::operator & (const oclMat &src1, const oclMat &src2)
 {
    return oclMatExpr(src1, src2, MAT_AND);
 }

-oclMatExpr cv::ocl::operator ^ (const oclMat &src1, const oclMat &src2)
+oclMat cv::ocl::operator ^ (const oclMat &src1, const oclMat &src2)
 {
    return oclMatExpr(src1, src2, MAT_XOR);
 }
--- a/modules/ocl/src/imgproc.cpp
+++ b/modules/ocl/src/imgproc.cpp
@ -12,6 +12,7 @@
 //
 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // @Authors
@ -23,6 +24,7 @@
 //    Zhang Ying, zhangying913@gmail.com
 //    Xu Pang, pangxu010@163.com
 //    Wu Zailong, bullet@yeah.net
+//    Wenju He, wenju@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@ -1524,7 +1526,7 @@ namespace cv
            mat_dst.create(mat_src.rows, mat_src.cols, CV_8UC1);

            oclMat mat_hist(1, 256, CV_32SC1);
-            //mat_hist.setTo(0);
+
            calcHist(mat_src, mat_hist);

            Context *clCxt = mat_src.clCxt;
@ -1533,10 +1535,10 @@ namespace cv
            size_t globalThreads[3] = { 256, 1, 1};
            oclMat lut(1, 256, CV_8UC1);
            vector<pair<size_t , const void *> > args;
-            float scale = 255.f / (mat_src.rows * mat_src.cols);
+            int total = mat_src.rows * mat_src.cols;
            args.push_back( make_pair( sizeof(cl_mem), (void *)&lut.data));
            args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_hist.data));
-            args.push_back( make_pair( sizeof(cl_float), (void *)&scale));
+            args.push_back( make_pair( sizeof(int), (void *)&total));
            openCLExecuteKernel(clCxt, &imgproc_histogram, kernelName, globalThreads, localThreads, args, -1, -1);
            LUT(mat_src, lut, mat_dst);
        }
--- a/modules/ocl/src/initialization.cpp
+++ b/modules/ocl/src/initialization.cpp
@ -47,6 +47,7 @@

 #include "precomp.hpp"
 #include <iomanip>
+#include <fstream>
 #include "binarycaching.hpp"

 using namespace cv;
@ -730,7 +731,138 @@ namespace cv
 #endif
        }

-        cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
+       double openCLExecuteKernelInterop(Context *clCxt , const char **source, string kernelName,
+                                 size_t globalThreads[3], size_t localThreads[3],
+                                 vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
+                                 bool finish, bool measureKernelTime, bool cleanUp)
+
+        {
+            //construct kernel name
+            //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
+            //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char)
+            stringstream idxStr;
+            if(channels != -1)
+                idxStr << "_C" << channels;
+            if(depth != -1)
+                idxStr << "_D" << depth;
+            kernelName += idxStr.str();
+
+            cl_kernel kernel;
+            kernel = openCLGetKernelFromSource(clCxt, source, kernelName, build_options);
+
+            double kernelTime = 0.0;
+
+            if( globalThreads != NULL)
+            {
+                if ( localThreads != NULL)
+                {
+                    globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0];
+                    globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
+                    globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
+
+                    //size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
+                    cv::ocl::openCLVerifyKernel(clCxt, kernel, localThreads);
+                }
+                for(size_t i = 0; i < args.size(); i ++)
+                    openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
+
+                if(measureKernelTime == false)
+                {
+                    openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
+                                    localThreads, 0, NULL, NULL));
+                }
+                else
+                {
+                    cl_event event = NULL;
+                    openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
+                                    localThreads, 0, NULL, &event));
+
+                    cl_ulong end_time, queue_time;
+
+                    openCLSafeCall(clWaitForEvents(1, &event));
+
+                    openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
+                                    sizeof(cl_ulong), &end_time, 0));
+
+                    openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED,
+                                    sizeof(cl_ulong), &queue_time, 0));
+
+                    kernelTime = (double)(end_time - queue_time) / (1000 * 1000);
+
+                    clReleaseEvent(event);
+                }
+            }
+
+            if(finish)
+            {
+                clFinish(clCxt->impl->clCmdQueue);
+            }
+
+            if(cleanUp)
+            {
+                openCLSafeCall(clReleaseKernel(kernel));
+            }
+
+            return kernelTime;
+        }
+
+        // Converts the contents of a file into a string
+        static int convertToString(const char *filename, std::string& s)
+        {
+            size_t size;
+            char*  str;
+
+            std::fstream f(filename, (std::fstream::in | std::fstream::binary));
+            if(f.is_open())
+            {
+                size_t fileSize;
+                f.seekg(0, std::fstream::end);
+                size = fileSize = (size_t)f.tellg();
+                f.seekg(0, std::fstream::beg);
+
+                str = new char[size+1];
+                if(!str)
+                {
+                    f.close();
+                    return -1;
+                }
+
+                f.read(str, fileSize);
+                f.close();
+                str[size] = '\0';
+
+                s = str;
+                delete[] str;
+                return 0;
+            }
+            printf("Error: Failed to open file %s\n", filename);
+            return -1;
+        }
+
+        double openCLExecuteKernelInterop(Context *clCxt , const char **fileName, const int numFiles, string kernelName,
+                                 size_t globalThreads[3], size_t localThreads[3],
+                                 vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
+                                 bool finish, bool measureKernelTime, bool cleanUp)
+
+        {
+            std::vector<std::string> fsource;
+            for (int i = 0 ; i < numFiles ; i++)
+            {
+                std::string str;
+                if (convertToString(fileName[i], str) >= 0)
+                    fsource.push_back(str);
+            }
+            const char **source = new const char *[numFiles];
+            for (int i = 0 ; i < numFiles ; i++)
+                source[i] = fsource[i].c_str();
+            double kernelTime = openCLExecuteKernelInterop(clCxt ,source, kernelName, globalThreads, localThreads,
+                                 args, channels, depth, build_options, finish, measureKernelTime, cleanUp);
+            fsource.clear();
+            delete []source;
+            return kernelTime;
+        }
+
+       cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
                             const size_t size)
        {
            int status;
--- a/modules/ocl/src/kernels/imgproc_histogram.cl
+++ b/modules/ocl/src/kernels/imgproc_histogram.cl
@ -3,12 +3,14 @@
 //
 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // @Authors
 //    Niko Li, newlife20080214@gmail.com
 //    Jia Haipeng, jiahaipeng95@gmail.com
 //    Xu Pang, pangxu010@163.com
+//    Wenju He, wenju@multicorewareinc.com
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
@ -189,24 +191,27 @@ __kernel __attribute__((reqd_work_group_size(256,1,1)))void merge_hist(__global
 __kernel __attribute__((reqd_work_group_size(256,1,1)))void calLUT(
                            __global uchar * dst,
                            __constant int * hist,
-                            float scale)
+                            int total)
 {
    int lid = get_local_id(0);
-    __local int sumhist[HISTOGRAM256_BIN_COUNT];
-    //__local uchar lut[HISTOGRAM256_BIN_COUNT+1];
+    __local int sumhist[HISTOGRAM256_BIN_COUNT+1];

    sumhist[lid]=hist[lid];
    barrier(CLK_LOCAL_MEM_FENCE);
    if(lid==0)
    {
        int sum = 0;
-        for(int i=0;i<HISTOGRAM256_BIN_COUNT;i++)
+        int i = 0;
+        while (!sumhist[i]) ++i;
+        sumhist[HISTOGRAM256_BIN_COUNT] = sumhist[i];
+        for(sumhist[i++] = 0; i<HISTOGRAM256_BIN_COUNT; i++)
        {
            sum+=sumhist[i];
            sumhist[i]=sum;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
+    float scale = 255.f/(total - sumhist[HISTOGRAM256_BIN_COUNT]);
    dst[lid]= lid == 0 ? 0 : convert_uchar_sat(convert_float(sumhist[lid])*scale);
 }
 /*
--- a/modules/ocl/src/kernels/meanShift.cl
+++ b/modules/ocl/src/kernels/meanShift.cl
@ -12,11 +12,13 @@
 //
 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // @Authors
 //    Shengen Yan,yanshengen@gmail.com
 //    Xu Pang, pangxu010@163.com
+//    Wenju He, wenju@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@ -43,12 +45,6 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-#if defined (DOUBLE_SUPPORT)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-typedef double F;
-#else
-typedef float F;
-#endif

 short2 do_mean_shift(int x0, int y0, __global uchar4* out,int out_step,
               __global uchar4* in, int in_step, int dst_off, int src_off,
@ -184,12 +180,11 @@ short2 do_mean_shift(int x0, int y0, __global uchar4* out,int out_step,
        if( count == 0 )
            break;

-        F  icount = 1.0/count;
-        int x1 = convert_int_rtz(sx*icount);
-        int y1 = convert_int_rtz(sy*icount);
-        s.x = convert_int_rtz(s.x*icount);
-        s.y = convert_int_rtz(s.y*icount);
-        s.z = convert_int_rtz(s.z*icount);
+        int x1 = sx/count;
+        int y1 = sy/count;
+        s.x = s.x/count;
+        s.y = s.y/count;
+        s.z = s.z/count;

        int4 tmp = s - convert_int4(c);
        int norm2 = tmp.x * tmp.x + tmp.y *  tmp.y +
--- a/modules/ocl/src/mcwutil.cpp
+++ b/modules/ocl/src/mcwutil.cpp
@ -46,6 +46,9 @@
 #include "mcwutil.hpp"

 #if defined (HAVE_OPENCL)
+#ifndef CL_VERSION_1_2
+#define CL_VERSION_1_2 0
+#endif

 using namespace std;

@ -123,6 +126,101 @@ namespace cv
            openCLExecuteKernel_2(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,
                                  build_options, finish_mode);
        }
+
+       cl_mem bindTexture(const oclMat &mat)
+        {
+            cl_mem texture;
+            cl_image_format format;
+            int err;
+            int depth    = mat.depth();
+            int channels = mat.channels();
+
+            switch(depth)
+            {
+            case CV_8U:
+                format.image_channel_data_type = CL_UNSIGNED_INT8;
+                break;
+            case CV_32S:
+                format.image_channel_data_type = CL_UNSIGNED_INT32;
+                break;
+            case CV_32F:
+                format.image_channel_data_type = CL_FLOAT;
+                break;
+            default:
+                throw std::exception();
+                break;
+            }
+            switch(channels)
+            {
+            case 1:
+                format.image_channel_order     = CL_R;
+                break;
+            case 3:
+                format.image_channel_order     = CL_RGB;
+                break;
+            case 4:
+                format.image_channel_order     = CL_RGBA;
+                break;
+            default:
+                throw std::exception();
+                break;
+            }
+#if CL_VERSION_1_2
+            cl_image_desc desc;
+            desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
+            desc.image_width      = mat.cols;
+            desc.image_height     = mat.rows;
+            desc.image_depth      = 0;
+            desc.image_array_size = 1;
+            desc.image_row_pitch  = 0;
+            desc.image_slice_pitch = 0;
+            desc.buffer           = NULL;
+            desc.num_mip_levels   = 0;
+            desc.num_samples      = 0;
+            texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
+#else
+            texture = clCreateImage2D(
+                mat.clCxt->impl->clContext,
+                CL_MEM_READ_WRITE,
+                &format,
+                mat.cols,
+                mat.rows,
+                0,
+                NULL,
+                &err);
+#endif
+            size_t origin[] = { 0, 0, 0 };
+            size_t region[] = { mat.cols, mat.rows, 1 };
+
+            cl_mem devData;
+            if (mat.cols * mat.elemSize() != mat.step)
+            {
+                devData = clCreateBuffer(mat.clCxt->impl->clContext, CL_MEM_READ_ONLY, mat.cols * mat.rows
+                    * mat.elemSize(), NULL, NULL);
+                const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1};
+                clEnqueueCopyBufferRect(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, devData, origin, origin,
+                    regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL);
+            }
+            else
+            {
+                devData = (cl_mem)mat.data;
+            }
+
+            clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, devData, texture, 0, origin, region, 0, NULL, 0);
+            if ((mat.cols * mat.elemSize() != mat.step))
+            {
+                clFinish(mat.clCxt->impl->clCmdQueue);
+                clReleaseMemObject(devData);
+            }
+
+            openCLSafeCall(err);
+            return texture;
+        }
+
+        void releaseTexture(cl_mem& texture)
+        {
+            openCLFree(texture);
+        }
    }//namespace ocl

 }//namespace cv
--- a/modules/ocl/src/mcwutil.hpp
+++ b/modules/ocl/src/mcwutil.hpp
@ -67,6 +67,12 @@ namespace cv
        void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
                                  size_t localThreads[3],  vector< pair<size_t, const void *> > &args, int channels,
                                  int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
+        // bind oclMat to OpenCL image textures
+        // note:
+        //   1. there is no memory management. User need to explicitly release the resource
+        //   2. for faster clamping, there is no buffer padding for the constructed texture
+        cl_mem bindTexture(const oclMat &mat);
+        void releaseTexture(cl_mem& texture);
    }//namespace ocl

 }//namespace cv
--- a/modules/ocl/src/pyrlk.cpp
+++ b/modules/ocl/src/pyrlk.cpp
@ -10,10 +10,15 @@
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
+// @Authors
+//		Dachuan Zhao, dachuan@multicorewareinc.com
+//		Yao Wang, yao@multicorewareinc.com
+//      Nathan, liujun@multicorewareinc.com
+//
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
@ -22,13 +27,13 @@
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
-//     and/or other GpuMaterials provided with the distribution.
+//     and/or other oclMaterials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
-// any express or bpied warranties, including, but not limited to, the bpied
+// any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
@ -40,6 +45,7 @@
 //
 //M*/

+
 #include "precomp.hpp"
 #include "mcwutil.hpp"
 using namespace std;
@ -568,197 +574,16 @@ static void pyrDown_cus(const oclMat &src, oclMat &dst)
    pyrdown_run_cus(src, dst);
 }

-
-//struct MultiplyScalar
-//{
-//    MultiplyScalar(double val_, double scale_) : val(val_), scale(scale_) {}
-//    double operator ()(double a) const
-//    {
-//        return (scale * a * val);
-//    }
-//    const double val;
-//    const double scale;
-//};
-//
-//void callF(const oclMat& src, oclMat& dst, MultiplyScalar op, int mask)
-//{
-//	Mat srcTemp;
-//	Mat dstTemp;
-//	src.download(srcTemp);
-//	dst.download(dstTemp);
-//
-//	int i;
-//	int j;
-//	int k;
-//	for(i = 0; i < srcTemp.rows; i++)
-//	{
-//		for(j = 0; j < srcTemp.cols; j++)
-//		{
-//			for(k = 0; k < srcTemp.channels(); k++)
-//			{
-//				((float*)dstTemp.data)[srcTemp.channels() * (i * srcTemp.rows + j) + k] = (float)op(((float*)srcTemp.data)[srcTemp.channels() * (i * srcTemp.rows + j) + k]);
-//			}
-//		}
-//	}
-//
-//	dst = dstTemp;
-//}
-//
-//static inline bool isAligned(const unsigned char* ptr, size_t size)
-//{
-//    return reinterpret_cast<size_t>(ptr) % size == 0;
-//}
-//
-//static inline bool isAligned(size_t step, size_t size)
-//{
-//    return step % size == 0;
-//}
-//
-//void callT(const oclMat& src, oclMat& dst, MultiplyScalar op, int mask)
-//{
-//    if (!isAligned(src.data, 4 * sizeof(double)) || !isAligned(src.step, 4 * sizeof(double)) ||
-//        !isAligned(dst.data, 4 * sizeof(double)) || !isAligned(dst.step, 4 * sizeof(double)))
-//    {
-//        callF(src, dst, op, mask);
-//        return;
-//    }
-//
-//	Mat srcTemp;
-//	Mat dstTemp;
-//	src.download(srcTemp);
-//	dst.download(dstTemp);
-//
-//	int x_shifted;
-//
-//	int i;
-//	int j;
-//	for(i = 0; i < srcTemp.rows; i++)
-//	{
-//		const double* srcRow = (const double*)srcTemp.data + i * srcTemp.rows;
-//        double* dstRow = (double*)dstTemp.data + i * dstTemp.rows;;
-//
-//		for(j = 0; j < srcTemp.cols; j++)
-//		{
-//			x_shifted = j * 4;
-//
-//			if(x_shifted + 4 - 1 < srcTemp.cols)
-//			{
-//				dstRow[x_shifted    ] = op(srcRow[x_shifted    ]);
-//				dstRow[x_shifted + 1] = op(srcRow[x_shifted + 1]);
-//				dstRow[x_shifted + 2] = op(srcRow[x_shifted + 2]);
-//				dstRow[x_shifted + 3] = op(srcRow[x_shifted + 3]);
-//			}
-//			else
-//			{
-//				for (int real_x = x_shifted; real_x < srcTemp.cols; ++real_x)
-//				{
-//					((float*)dstTemp.data)[i * srcTemp.rows + real_x] = op(((float*)srcTemp.data)[i * srcTemp.rows + real_x]);
-//				}
-//			}
-//		}
-//	}
-//}
-//
-//void multiply(const oclMat& src1, double val, oclMat& dst, double scale = 1.0f);
-//void multiply(const oclMat& src1, double val, oclMat& dst, double scale)
-//{
-//    MultiplyScalar op(val, scale);
-//	//if(src1.channels() == 1 && dst.channels() == 1)
-//	//{
-//	//    callT(src1, dst, op, 0);
-//	//}
-//	//else
-//	//{
-//	    callF(src1, dst, op, 0);
-//	//}
-//}
-
-static cl_mem bindTexture(const oclMat &mat, int depth, int channels)
-{
-    cl_mem texture;
-    cl_image_format format;
-    int err;
-    if(depth == 0)
-    {
-        format.image_channel_data_type = CL_UNSIGNED_INT8;
-    }
-    else if(depth == 5)
-    {
-        format.image_channel_data_type = CL_FLOAT;
-    }
-    if(channels == 1)
-    {
-        format.image_channel_order     = CL_R;
-    }
-    else if(channels == 3)
-    {
-        format.image_channel_order     = CL_RGB;
-    }
-    else if(channels == 4)
-    {
-        format.image_channel_order     = CL_RGBA;
-    }
-#ifdef CL_VERSION_1_2
-    cl_image_desc desc;
-    desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
-    desc.image_width      = mat.step / mat.elemSize();
-    desc.image_height     = mat.rows;
-    desc.image_depth      = 0;
-    desc.image_array_size = 1;
-    desc.image_row_pitch  = 0;
-    desc.image_slice_pitch = 0;
-    desc.buffer           = NULL;
-    desc.num_mip_levels   = 0;
-    desc.num_samples      = 0;
-    texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
-#else
-    texture = clCreateImage2D(
-                  mat.clCxt->impl->clContext,
-                  CL_MEM_READ_WRITE,
-                  &format,
-                  mat.step / mat.elemSize(),
-                  mat.rows,
-                  0,
-                  NULL,
-                  &err);
-#endif
-    size_t origin[] = { 0, 0, 0 };
-    size_t region[] = { mat.step / mat.elemSize(), mat.rows, 1 };
-    clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, texture, 0, origin, region, 0, NULL, 0);
-    openCLSafeCall(err);
-
-    return texture;
-}
-
-static void releaseTexture(cl_mem texture)
-{
-    openCLFree(texture);
-}
-
 static void lkSparse_run(oclMat &I, oclMat &J,
                  const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat& err, bool /*GET_MIN_EIGENVALS*/, int ptcount,
                  int level, /*dim3 block, */dim3 patch, Size winSize, int iters)
 {
    Context  *clCxt = I.clCxt;
-    char platform[256] = {0};
-    cl_platform_id pid;
-    clGetDeviceInfo(clCxt->impl->devices, CL_DEVICE_PLATFORM, sizeof(pid), &pid, NULL);
-    clGetPlatformInfo(pid, CL_PLATFORM_NAME, 256, platform, NULL);
-    std::string namestr = platform;
-    bool isImageSupported = true;
-    if(namestr.find("NVIDIA")!=string::npos || namestr.find("Intel")!=string::npos)
-        isImageSupported = false;
-
    int elemCntPerRow = I.step / I.elemSize();
-
    string kernelName = "lkSparse";
-
-
-    size_t localThreads[3]  = { 8, isImageSupported?8:32, 1 };
-    size_t globalThreads[3] = { 8 * ptcount, isImageSupported?8:32, 1};
-
+    size_t localThreads[3]  = { 8, 8, 1 };
+    size_t globalThreads[3] = { 8 * ptcount, 8, 1};
    int cn = I.oclchannels();
-
    char calcErr;
    if (level == 0)
    {
@ -770,22 +595,11 @@ static void lkSparse_run(oclMat &I, oclMat &J,
    }

    vector<pair<size_t , const void *> > args;
-    cl_mem ITex;
-    cl_mem JTex;
-    if (isImageSupported)
-    {
-        ITex = bindTexture(I, I.depth(), cn);
-        JTex = bindTexture(J, J.depth(), cn);
-    }
-    else
-    {
-        ITex = (cl_mem)I.data;
-        JTex = (cl_mem)J.data;
-    }
+    cl_mem ITex = bindTexture(I);
+    cl_mem JTex = bindTexture(J);

    args.push_back( make_pair( sizeof(cl_mem), (void *)&ITex ));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&JTex ));
-    //cl_mem clmD = clCreateBuffer(clCxt, CL_MEM_READ_WRITE, ptcount * sizeof(float), NULL, NULL);
    args.push_back( make_pair( sizeof(cl_mem), (void *)&prevPts.data ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&prevPts.step ));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&nextPts.data ));
@ -795,10 +609,6 @@ static void lkSparse_run(oclMat &I, oclMat &J,
    args.push_back( make_pair( sizeof(cl_int), (void *)&level ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&I.rows ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&I.cols ));
-    if (!isImageSupported)
-    {
-        args.push_back( make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) );
-    }
    args.push_back( make_pair( sizeof(cl_int), (void *)&patch.x ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&patch.y ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&cn ));
@ -806,18 +616,20 @@ static void lkSparse_run(oclMat &I, oclMat &J,
    args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.height ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&iters ));
    args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr ));
-    //args.push_back( make_pair( sizeof(cl_char), (void *)&GET_MIN_EIGENVALS ));

-    if (isImageSupported)
+    try
    {
        openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
-
-        releaseTexture(ITex);
-        releaseTexture(JTex);
    }
-    else
+    catch(Exception&)
    {
-        //printf("Warning: The image2d_t is not supported by the device. Using alternative method!\n");
+        printf("Warning: The image2d_t is not supported by the device. Using alternative method!\n");
+        releaseTexture(ITex);
+        releaseTexture(JTex);
+        ITex = (cl_mem)I.data;
+        JTex = (cl_mem)J.data;
+        localThreads[1] = globalThreads[1] = 32;
+        args.insert( args.begin()+11, make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) );
        openCLExecuteKernel2(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
    }
 }
@ -927,8 +739,6 @@ static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v,
    size_t localThreads[3]  = { 16, 16, 1 };
    size_t globalThreads[3] = { I.cols, I.rows, 1};

-    int cn = I.oclchannels();
-
    bool calcErr;
    if (err)
    {
@ -944,8 +754,8 @@ static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v,

    if (isImageSupported)
    {
-        ITex = bindTexture(I, I.depth(), cn);
-        JTex = bindTexture(J, J.depth(), cn);
+        ITex = bindTexture(I);
+        JTex = bindTexture(J);
    }
    else
    {
--- a/modules/ocl/test/test_gemm.cpp
+++ b/modules/ocl/test/test_gemm.cpp
@ -81,5 +81,5 @@ TEST_P(Gemm, Accuracy)
 INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
                            testing::Values(CV_32FC1, CV_32FC2/*, CV_64FC1, CV_64FC2*/),
                            testing::Values(cv::Size(20, 20), cv::Size(300, 300)),
-                            testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
+                            testing::Values(0, (int)cv::GEMM_1_T, (int)cv::GEMM_2_T, (int)(cv::GEMM_1_T + cv::GEMM_2_T))));
 #endif
--- a/modules/ocl/test/test_imgproc.cpp
+++ b/modules/ocl/test/test_imgproc.cpp
@ -183,12 +183,11 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size
        if( count == 0 )
            break;

-        double icount = 1.0 / count;
-        int x1 = cvFloor(sx * icount);
-        int y1 = cvFloor(sy * icount);
-        s0 = cvFloor(s0 * icount);
-        s1 = cvFloor(s1 * icount);
-        s2 = cvFloor(s2 * icount);
+        int x1 = sx / count;
+        int y1 = sy / count;
+        s0 = s0 / count;
+        s1 = s1 / count;
+        s2 = s2 / count;

        bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) +
                        tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps);
@ -1370,9 +1369,7 @@ TEST_P(meanShiftFiltering, Mat)
        gdst.download(cpu_gdst);

        char sss[1024];
-        char warning[300] = "Warning: If the selected device doesn't support double, a deviation will exist.\nIf the accuracy is acceptable, please ignore it.\n";
        sprintf(sss, "roicols=%d,roirows=%d,srcx=%d,srcy=%d,dstx=%d,dsty=%d\n", roicols, roirows, srcx, srcy, dstx, dsty);
-        strcat(sss, warning);
        EXPECT_MAT_NEAR(dst, cpu_gdst, 0.0, sss);

    }
@ -1398,9 +1395,7 @@ TEST_P(meanShiftProc, Mat)
        gdstCoor.download(cpu_gdstCoor);

        char sss[1024];
-        char warning[300] = "Warning: If the selected device doesn't support double, a deviation will exist.\nIf the accuracy is acceptable, please ignore it.\n";
        sprintf(sss, "roicols=%d,roirows=%d,srcx=%d,srcy=%d,dstx=%d,dsty=%d\n", roicols, roirows, srcx, srcy, dstx, dsty);
-        strcat(sss, warning);
        EXPECT_MAT_NEAR(dst, cpu_gdst, 0.0, sss);
        EXPECT_MAT_NEAR(dstCoor, cpu_gdstCoor, 0.0, sss);
    }
--- a/modules/ts/src/ts_arrtest.cpp
+++ b/modules/ts/src/ts_arrtest.cpp
@ -296,37 +296,15 @@ int ArrayTest::validate_test_results( int test_case_idx )
        for( j = 0; j < sizei; j++ )
        {
            double err_level;
-            vector<int> idx;
-            double max_diff = 0;
            int code;
-            char msg[100];

            if( !test_array[i1][j] )
                continue;

            err_level = get_success_error_level( test_case_idx, i0, (int)j );
-            code = cmpEps( test_mat[i0][j], test_mat[i1][j], &max_diff, err_level, &idx, element_wise_relative_error );
+            code = cmpEps2(ts, test_mat[i0][j], test_mat[i1][j], err_level, element_wise_relative_error, arr_names[i0]);

-            switch( code )
-            {
-            case -1:
-                sprintf( msg, "Too big difference (=%g)", max_diff );
-                code = TS::FAIL_BAD_ACCURACY;
-                break;
-            case -2:
-                strcpy( msg, "Invalid output" );
-                code = TS::FAIL_INVALID_OUTPUT;
-                break;
-            case -3:
-                strcpy( msg, "Invalid output in the reference array" );
-                code = TS::FAIL_INVALID_OUTPUT;
-                break;
-            default:
-                continue;
-            }
-            string idxstr = vec2str(", ", &idx[0], idx.size());
-
-            ts->printf( TS::LOG, "%s in %s array %d at (%s)", msg, arr_names[i0], j, idxstr.c_str() );
+            if (code == 0) continue;

            for( i0 = 0; i0 < (int)test_array.size(); i0++ )
            {
--- a/modules/ts/src/ts_func.cpp
+++ b/modules/ts/src/ts_func.cpp
@ -1934,6 +1934,10 @@ int check( const Mat& a, double fmin, double fmax, vector<int>* _idx )
    return idx == 0 ? 0 : -1;
 }

+#define CMP_EPS_OK 0
+#define CMP_EPS_BIG_DIFF -1
+#define CMP_EPS_INVALID_TEST_DATA -2 // there is NaN or Inf value in test data
+#define CMP_EPS_INVALID_REF_DATA -3 // there is NaN or Inf value in reference data

 // compares two arrays. max_diff is the maximum actual difference,
 // success_err_level is maximum allowed difference, idx is the index of the first
@ -1946,7 +1950,7 @@ int cmpEps( const Mat& arr, const Mat& refarr, double* _realmaxdiff,
    CV_Assert( arr.type() == refarr.type() && arr.size == refarr.size );

    int ilevel = refarr.depth() <= CV_32S ? cvFloor(success_err_level) : 0;
-    int result = 0;
+    int result = CMP_EPS_OK;

    const Mat *arrays[]={&arr, &refarr, 0};
    Mat planes[2];
@ -1998,13 +2002,13 @@ int cmpEps( const Mat& arr, const Mat& refarr, double* _realmaxdiff,
                    continue;
                if( cvIsNaN(a_val) || cvIsInf(a_val) )
                {
-                    result = -2;
+                    result = CMP_EPS_INVALID_TEST_DATA;
                    idx = startidx + j;
                    break;
                }
                if( cvIsNaN(b_val) || cvIsInf(b_val) )
                {
-                    result = -3;
+                    result = CMP_EPS_INVALID_REF_DATA;
                    idx = startidx + j;
                    break;
                }
@ -2029,13 +2033,13 @@ int cmpEps( const Mat& arr, const Mat& refarr, double* _realmaxdiff,
                    continue;
                if( cvIsNaN(a_val) || cvIsInf(a_val) )
                {
-                    result = -2;
+                    result = CMP_EPS_INVALID_TEST_DATA;
                    idx = startidx + j;
                    break;
                }
                if( cvIsNaN(b_val) || cvIsInf(b_val) )
                {
-                    result = -3;
+                    result = CMP_EPS_INVALID_REF_DATA;
                    idx = startidx + j;
                    break;
                }
@ -2051,7 +2055,7 @@ int cmpEps( const Mat& arr, const Mat& refarr, double* _realmaxdiff,
            break;
        default:
            assert(0);
-            return -1;
+            return CMP_EPS_BIG_DIFF;
        }
        if(_realmaxdiff)
            *_realmaxdiff = MAX(*_realmaxdiff, realmaxdiff);
@ -2060,7 +2064,7 @@ int cmpEps( const Mat& arr, const Mat& refarr, double* _realmaxdiff,
    }

    if( result == 0 && idx != 0 )
-        result = -1;
+        result = CMP_EPS_BIG_DIFF;

    if( result < -1 && _realmaxdiff )
        *_realmaxdiff = exp(1000.);
@ -2081,15 +2085,15 @@ int cmpEps2( TS* ts, const Mat& a, const Mat& b, double success_err_level,

    switch( code )
    {
-    case -1:
+    case CMP_EPS_BIG_DIFF:
        sprintf( msg, "%s: Too big difference (=%g)", desc, diff );
        code = TS::FAIL_BAD_ACCURACY;
        break;
-    case -2:
+    case CMP_EPS_INVALID_TEST_DATA:
        sprintf( msg, "%s: Invalid output", desc );
        code = TS::FAIL_INVALID_OUTPUT;
        break;
-    case -3:
+    case CMP_EPS_INVALID_REF_DATA:
        sprintf( msg, "%s: Invalid reference output", desc );
        code = TS::FAIL_INVALID_OUTPUT;
        break;
--- a/modules/video/perf/perf_tvl1optflow.cpp
+++ b/modules/video/perf/perf_tvl1optflow.cpp
@ -13,7 +13,7 @@ pair<string, string> impair(const char* im1, const char* im2)

 PERF_TEST_P(ImagePair, OpticalFlowDual_TVL1, testing::Values(impair("cv/optflow/RubberWhale1.png", "cv/optflow/RubberWhale2.png")))
 {
-    declare.time(40);
+    declare.time(260);

    Mat frame1 = imread(getDataPath(GetParam().first), IMREAD_GRAYSCALE);
    Mat frame2 = imread(getDataPath(GetParam().second), IMREAD_GRAYSCALE);
--- a/samples/gpu/morphology.cpp
+++ b/samples/gpu/morphology.cpp