Merge pull request #1874 from SpecLad:merge-2.4

12 years ago · 9ca80961fd
parent 41ae5d5f66 2fed668390
commit 9ca80961fd
111 changed files with 1017 additions and 472 deletions
--- a/.gitignore
+++ b/.gitignore
@ -4,7 +4,6 @@
 .*.swp
 .DS_Store
 .sw[a-z]
-/modules/refman.rst
 Thumbs.db
 tags
 tegra/
--- a/3rdparty/ffmpeg/make.bat
+++ b/3rdparty/ffmpeg/make.bat
@ -1,2 +1,2 @@
-set path=c:\dev\msys32\bin;%path% & gcc -Wall -shared -o opencv_ffmpeg.dll -O2 -x c++ -I../include -I../include/ffmpeg_ -I../../modules/highgui/src ffopencv.c -L../lib -lavformat -lavcodec -lavdevice -lswscale -lavutil -liconv -lws2_32
+set path=c:\dev\msys32\bin;%path% & gcc -Wall -shared -o opencv_ffmpeg.dll -O2 -x c++ -I../include -I../include/ffmpeg_ -I../../modules/highgui/src ffopencv.c -L../lib -lavformat -lavcodec -lavdevice -lswscale -lavutil -lws2_32
 set path=c:\dev\msys64\bin;%path% & gcc -m64 -Wall -shared -o opencv_ffmpeg_64.dll -O2 -x c++ -I../include -I../include/ffmpeg_ -I../../modules/highgui/src ffopencv.c -L../lib -lavformat64 -lavcodec64 -lavdevice64 -lswscale64 -lavutil64 -lws2_32
--- a/3rdparty/ffmpeg/opencv_ffmpeg.dll
+++ b/3rdparty/ffmpeg/opencv_ffmpeg.dll
--- a/3rdparty/include/opencl/1.2/CL/cl_platform.h
+++ b/3rdparty/include/opencl/1.2/CL/cl_platform.h
@ -92,7 +92,7 @@ extern "C" {
            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED __attribute__((deprecated))
            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED    
        #endif
-    #elif _WIN32
+    #elif defined(_WIN32)
        #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED    
            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED    
--- a/3rdparty/lib/armeabi-v7a/libnative_camera_r2.2.0.so
+++ b/3rdparty/lib/armeabi-v7a/libnative_camera_r2.2.0.so
--- a/3rdparty/lib/armeabi-v7a/libnative_camera_r2.3.3.so
+++ b/3rdparty/lib/armeabi-v7a/libnative_camera_r2.3.3.so
--- a/3rdparty/lib/armeabi-v7a/libnative_camera_r3.0.1.so
+++ b/3rdparty/lib/armeabi-v7a/libnative_camera_r3.0.1.so
--- a/3rdparty/lib/armeabi-v7a/libnative_camera_r4.0.0.so
+++ b/3rdparty/lib/armeabi-v7a/libnative_camera_r4.0.0.so
--- a/3rdparty/lib/armeabi-v7a/libnative_camera_r4.0.3.so
+++ b/3rdparty/lib/armeabi-v7a/libnative_camera_r4.0.3.so
--- a/3rdparty/lib/armeabi-v7a/libnative_camera_r4.1.1.so
+++ b/3rdparty/lib/armeabi-v7a/libnative_camera_r4.1.1.so
--- a/3rdparty/lib/armeabi-v7a/libnative_camera_r4.2.0.so
+++ b/3rdparty/lib/armeabi-v7a/libnative_camera_r4.2.0.so
--- a/3rdparty/lib/armeabi-v7a/libnative_camera_r4.3.0.so
+++ b/3rdparty/lib/armeabi-v7a/libnative_camera_r4.3.0.so
--- a/3rdparty/lib/armeabi-v7a/libnative_camera_r4.4.0.so
+++ b/3rdparty/lib/armeabi-v7a/libnative_camera_r4.4.0.so
--- a/3rdparty/lib/armeabi/libnative_camera_r2.2.0.so
+++ b/3rdparty/lib/armeabi/libnative_camera_r2.2.0.so
--- a/3rdparty/lib/armeabi/libnative_camera_r2.3.3.so
+++ b/3rdparty/lib/armeabi/libnative_camera_r2.3.3.so
--- a/3rdparty/lib/armeabi/libnative_camera_r3.0.1.so
+++ b/3rdparty/lib/armeabi/libnative_camera_r3.0.1.so
--- a/3rdparty/lib/armeabi/libnative_camera_r4.0.0.so
+++ b/3rdparty/lib/armeabi/libnative_camera_r4.0.0.so
--- a/3rdparty/lib/armeabi/libnative_camera_r4.0.3.so
+++ b/3rdparty/lib/armeabi/libnative_camera_r4.0.3.so
--- a/3rdparty/lib/armeabi/libnative_camera_r4.1.1.so
+++ b/3rdparty/lib/armeabi/libnative_camera_r4.1.1.so
--- a/3rdparty/lib/armeabi/libnative_camera_r4.2.0.so
+++ b/3rdparty/lib/armeabi/libnative_camera_r4.2.0.so
--- a/3rdparty/lib/armeabi/libnative_camera_r4.3.0.so
+++ b/3rdparty/lib/armeabi/libnative_camera_r4.3.0.so
--- a/3rdparty/lib/armeabi/libnative_camera_r4.4.0.so
+++ b/3rdparty/lib/armeabi/libnative_camera_r4.4.0.so
--- a/3rdparty/lib/libavcodec.a
+++ b/3rdparty/lib/libavcodec.a
--- a/3rdparty/lib/libavdevice.a
+++ b/3rdparty/lib/libavdevice.a
--- a/3rdparty/lib/libavformat.a
+++ b/3rdparty/lib/libavformat.a
--- a/3rdparty/lib/libavutil.a
+++ b/3rdparty/lib/libavutil.a
--- a/3rdparty/lib/libswscale.a
+++ b/3rdparty/lib/libswscale.a
--- a/3rdparty/lib/mips/libnative_camera_r4.0.3.so
+++ b/3rdparty/lib/mips/libnative_camera_r4.0.3.so
--- a/3rdparty/lib/mips/libnative_camera_r4.1.1.so
+++ b/3rdparty/lib/mips/libnative_camera_r4.1.1.so
--- a/3rdparty/lib/mips/libnative_camera_r4.2.0.so
+++ b/3rdparty/lib/mips/libnative_camera_r4.2.0.so
--- a/3rdparty/lib/mips/libnative_camera_r4.3.0.so
+++ b/3rdparty/lib/mips/libnative_camera_r4.3.0.so
--- a/3rdparty/lib/mips/libnative_camera_r4.4.0.so
+++ b/3rdparty/lib/mips/libnative_camera_r4.4.0.so
--- a/3rdparty/lib/x86/libnative_camera_r2.3.3.so
+++ b/3rdparty/lib/x86/libnative_camera_r2.3.3.so
--- a/3rdparty/lib/x86/libnative_camera_r3.0.1.so
+++ b/3rdparty/lib/x86/libnative_camera_r3.0.1.so
--- a/3rdparty/lib/x86/libnative_camera_r4.0.3.so
+++ b/3rdparty/lib/x86/libnative_camera_r4.0.3.so
--- a/3rdparty/lib/x86/libnative_camera_r4.1.1.so
+++ b/3rdparty/lib/x86/libnative_camera_r4.1.1.so
--- a/3rdparty/lib/x86/libnative_camera_r4.2.0.so
+++ b/3rdparty/lib/x86/libnative_camera_r4.2.0.so
--- a/3rdparty/lib/x86/libnative_camera_r4.3.0.so
+++ b/3rdparty/lib/x86/libnative_camera_r4.3.0.so
--- a/3rdparty/lib/x86/libnative_camera_r4.4.0.so
+++ b/3rdparty/lib/x86/libnative_camera_r4.4.0.so
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -151,7 +151,7 @@ OCV_OPTION(WITH_MSMF           "Build HighGUI with Media Foundation support" OFF
 OCV_OPTION(WITH_XIMEA          "Include XIMEA cameras support"               OFF  IF (NOT ANDROID AND NOT APPLE) )
 OCV_OPTION(WITH_XINE           "Include Xine support (GPL)"                  OFF  IF (UNIX AND NOT APPLE AND NOT ANDROID) )
 OCV_OPTION(WITH_CLP            "Include Clp support (EPL)"                   OFF)
-OCV_OPTION(WITH_OPENCL         "Include OpenCL Runtime support"              ON  IF (NOT ANDROID AND NOT IOS) )
+OCV_OPTION(WITH_OPENCL         "Include OpenCL Runtime support"              ON   IF (NOT IOS) )
 OCV_OPTION(WITH_OPENCLAMDFFT   "Include AMD OpenCL FFT library support"      ON   IF (NOT ANDROID AND NOT IOS) )
 OCV_OPTION(WITH_OPENCLAMDBLAS  "Include AMD OpenCL BLAS library support"     ON   IF (NOT ANDROID AND NOT IOS) )

--- a/cmake/OpenCVConfig.cmake
+++ b/cmake/OpenCVConfig.cmake
@ -77,6 +77,8 @@ if(MSVC)
    set(OpenCV_RUNTIME vc10)
  elseif(MSVC_VERSION EQUAL 1700)
    set(OpenCV_RUNTIME vc11)
+  elseif(MSVC_VERSION EQUAL 1800)
+    set(OpenCV_RUNTIME vc12)
  endif()
 elseif(MINGW)
  set(OpenCV_RUNTIME mingw)
--- a/cmake/OpenCVDetectCXXCompiler.cmake
+++ b/cmake/OpenCVDetectCXXCompiler.cmake
@ -136,6 +136,8 @@ if(MSVC)
    set(OpenCV_RUNTIME vc10)
  elseif(MSVC_VERSION EQUAL 1700)
    set(OpenCV_RUNTIME vc11)
+  elseif(MSVC_VERSION EQUAL 1800)
+    set(OpenCV_RUNTIME vc12)
  endif()
 elseif(MINGW)
  set(OpenCV_RUNTIME mingw)
--- a/cmake/OpenCVFindXimea.cmake
+++ b/cmake/OpenCVFindXimea.cmake
@ -23,7 +23,7 @@ if(WIN32)
  if(EXISTS ${XIMEA_PATH})
    set(XIMEA_FOUND 1)
    # set LIB folders
-    if(CMAKE_CL_64)
+    if(X86_64)
      set(XIMEA_LIBRARY_DIR "${XIMEA_PATH}/x64")
    else()
      set(XIMEA_LIBRARY_DIR "${XIMEA_PATH}/x86")
--- a/cmake/OpenCVGenConfig.cmake
+++ b/cmake/OpenCVGenConfig.cmake
@ -134,11 +134,11 @@ if(WIN32)
  configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/OpenCVConfig.cmake.in" "${CMAKE_BINARY_DIR}/win-install/OpenCVConfig.cmake" IMMEDIATE @ONLY)
  configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/OpenCVConfig-version.cmake.in" "${CMAKE_BINARY_DIR}/win-install/OpenCVConfig-version.cmake" IMMEDIATE @ONLY)
  if(BUILD_SHARED_LIBS)
-    install(FILES "${CMAKE_BINARY_DIR}/win-install/OpenCVConfig.cmake" DESTINATION "${OpenCV_INSTALL_BINARIES_PREFIX}/lib")
-    install(EXPORT OpenCVModules DESTINATION "${OpenCV_INSTALL_BINARIES_PREFIX}/lib" FILE OpenCVModules${modules_file_suffix}.cmake)
+    install(FILES "${CMAKE_BINARY_DIR}/win-install/OpenCVConfig.cmake" DESTINATION "${OpenCV_INSTALL_BINARIES_PREFIX}lib")
+    install(EXPORT OpenCVModules DESTINATION "${OpenCV_INSTALL_BINARIES_PREFIX}lib" FILE OpenCVModules${modules_file_suffix}.cmake)
  else()
-    install(FILES "${CMAKE_BINARY_DIR}/win-install/OpenCVConfig.cmake" DESTINATION "${OpenCV_INSTALL_BINARIES_PREFIX}/staticlib")
-    install(EXPORT OpenCVModules DESTINATION "${OpenCV_INSTALL_BINARIES_PREFIX}/staticlib" FILE OpenCVModules${modules_file_suffix}.cmake)
+    install(FILES "${CMAKE_BINARY_DIR}/win-install/OpenCVConfig.cmake" DESTINATION "${OpenCV_INSTALL_BINARIES_PREFIX}staticlib")
+    install(EXPORT OpenCVModules DESTINATION "${OpenCV_INSTALL_BINARIES_PREFIX}staticlib" FILE OpenCVModules${modules_file_suffix}.cmake)
  endif()
  install(FILES "${CMAKE_BINARY_DIR}/win-install/OpenCVConfig-version.cmake" DESTINATION "${CMAKE_INSTALL_PREFIX}")
  install(FILES "${OpenCV_SOURCE_DIR}/cmake/OpenCVConfig.cmake" DESTINATION "${CMAKE_INSTALL_PREFIX}/")
--- a/cmake/templates/OpenCVConfig.cmake.in
+++ b/cmake/templates/OpenCVConfig.cmake.in
@ -184,6 +184,10 @@ foreach(__cvcomponent ${OpenCV_FIND_COMPONENTS})
    set(${__cvcomponent}_FOUND "${__cvcomponent}_FOUND-NOTFOUND")
  else()
    list(APPEND OpenCV_FIND_COMPONENTS_ ${__cvcomponent})
+    # Not using list(APPEND) here, because OpenCV_LIBS may not exist yet.
+    # Also not clearing OpenCV_LIBS anywhere, so that multiple calls
+    # to find_package(OpenCV) with different component lists add up.
+    set(OpenCV_LIBS ${OpenCV_LIBS} "${__cvcomponent}")
    #indicate that module is found
    string(TOUPPER "${__cvcomponent}" __cvcomponent)
    set(${__cvcomponent}_FOUND 1)
@ -200,8 +204,6 @@ else()
  set(OpenCV_LIB_SUFFIX "")
 endif()

-SET(OpenCV_LIBS "${OpenCV_LIB_COMPONENTS}")
-
 foreach(__opttype OPT DBG)
  SET(OpenCV_LIBS_${__opttype} "${OpenCV_LIBS}")
  SET(OpenCV_EXTRA_LIBS_${__opttype} "")
--- a/doc/CMakeLists.txt
+++ b/doc/CMakeLists.txt
@ -2,9 +2,6 @@
 #  CMake file for OpenCV docs
 #

-file(GLOB FILES_TEX *.tex *.sty *.bib)
-file(GLOB FILES_TEX_PICS pics/*.png pics/*.jpg)
-
 if(BUILD_DOCS AND HAVE_SPHINX)

  project(opencv_docs)
@ -23,55 +20,80 @@ if(BUILD_DOCS AND HAVE_SPHINX)
  set(OPTIONAL_DOC_LIST "")


-  set(OPENCV2_BASE_MODULES core imgproc highgui video calib3d features2d objdetect ml flann photo stitching nonfree contrib legacy bioinspired)
-
  # build lists of modules to be documented
-  set(OPENCV2_MODULES "")
-  set(OPENCV_MODULES "")
+  set(BASE_MODULES "")
+  set(EXTRA_MODULES "")

  foreach(mod ${OPENCV_MODULES_BUILD} ${OPENCV_MODULES_DISABLED_USER} ${OPENCV_MODULES_DISABLED_AUTO} ${OPENCV_MODULES_DISABLED_FORCE})
    string(REGEX REPLACE "^opencv_" "" mod "${mod}")
    if("${OPENCV_MODULE_opencv_${mod}_LOCATION}" STREQUAL "${OpenCV_SOURCE_DIR}/modules/${mod}")
-      list(APPEND OPENCV2_MODULES ${mod})
+      list(APPEND BASE_MODULES ${mod})
    else()
-      list(APPEND OPENCV_MODULES ${mod})
+      list(APPEND EXTRA_MODULES ${mod})
    endif()
  endforeach()
-  list(REMOVE_ITEM OPENCV2_MODULES ${OPENCV2_BASE_MODULES})
-  ocv_list_sort(OPENCV2_MODULES)
-  ocv_list_sort(OPENCV_MODULES)
+
+  set(FIXED_ORDER_MODULES core imgproc highgui video calib3d features2d objdetect ml flann photo stitching nonfree contrib legacy bioinspired)
+
+  list(REMOVE_ITEM BASE_MODULES ${FIXED_ORDER_MODULES})
+
+  ocv_list_sort(BASE_MODULES)
+  ocv_list_sort(EXTRA_MODULES)
+
+  set(BASE_MODULES ${FIXED_ORDER_MODULES} ${BASE_MODULES})

  # build lists of documentation files and generate table of contents for reference manual
-  set(OPENCV_FILES_REF "")
-  set(OPENCV_FILES_REF_PICT "")
-  set(OPENCV_REFMAN_TOC "")

-  foreach(mod ${OPENCV2_BASE_MODULES} ${OPENCV2_MODULES} ${OPENCV_MODULES})
-    file(GLOB_RECURSE _OPENCV_FILES_REF "${OPENCV_MODULE_opencv_${mod}_LOCATION}/doc/*.rst")
-    file(GLOB_RECURSE _OPENCV_FILES_REF_PICT "${OPENCV_MODULE_opencv_${mod}_LOCATION}/doc/*.png" "${OPENCV_MODULE_opencv_${mod}_LOCATION}/doc/*.jpg")
-    list(APPEND OPENCV_FILES_REF ${_OPENCV_FILES_REF})
-    list(APPEND OPENCV_FILES_REF_PICT ${_OPENCV_FILES_REF_PICT})
+  set(DOC_FAKE_ROOT "${CMAKE_CURRENT_BINARY_DIR}/fake-root")
+  set(DOC_FAKE_ROOT_FILES "")
+
+  function(ocv_doc_add_file_to_fake_root source destination)
+    add_custom_command(
+      OUTPUT "${DOC_FAKE_ROOT}/${destination}"
+      COMMAND "${CMAKE_COMMAND}" -E copy "${source}" "${DOC_FAKE_ROOT}/${destination}"
+      DEPENDS "${source}"
+      COMMENT "Copying ${destination} to fake root..."
+      VERBATIM
+    )
+    list(APPEND DOC_FAKE_ROOT_FILES "${DOC_FAKE_ROOT}/${destination}")
+    set(DOC_FAKE_ROOT_FILES "${DOC_FAKE_ROOT_FILES}" PARENT_SCOPE)
+  endfunction()

-    set(toc_file "${OPENCV_MODULE_opencv_${mod}_LOCATION}/doc/${mod}.rst")
-    if(EXISTS "${toc_file}")
-      file(RELATIVE_PATH toc_file "${OpenCV_SOURCE_DIR}/modules" "${toc_file}")
-      set(OPENCV_REFMAN_TOC "${OPENCV_REFMAN_TOC}   ${toc_file}\n")
+  function(ocv_doc_add_to_fake_root source)
+    if(ARGC GREATER 1)
+      set(destination "${ARGV1}")
+    else()
+      file(RELATIVE_PATH destination "${OpenCV_SOURCE_DIR}" "${source}")
    endif()
+
+    if(IS_DIRECTORY "${source}")
+      file(GLOB_RECURSE files RELATIVE "${source}" "${source}/*")
+
+      foreach(file ${files})
+        ocv_doc_add_file_to_fake_root("${source}/${file}" "${destination}/${file}")
      endforeach()
+    else()
+      ocv_doc_add_file_to_fake_root("${source}" "${destination}")
+    endif()

-  file(GLOB_RECURSE _OPENCV_FILES_REF "${OpenCV_SOURCE_DIR}/platforms/android/service/doc/*.rst")
-  file(GLOB_RECURSE _OPENCV_FILES_REF_PICT "${OpenCV_SOURCE_DIR}/platforms/android/service/doc/*.png" "${OpenCV_SOURCE_DIR}/platforms/android/service/doc/*.jpg")
-  list(APPEND OPENCV_FILES_REF ${_OPENCV_FILES_REF})
-  list(APPEND OPENCV_FILES_REF_PICT ${_OPENCV_FILES_REF_PICT})
+    set(DOC_FAKE_ROOT_FILES "${DOC_FAKE_ROOT_FILES}" PARENT_SCOPE)
+  endfunction()

-  configure_file("${OpenCV_SOURCE_DIR}/modules/refman.rst.in" "${OpenCV_SOURCE_DIR}/modules/refman.rst" IMMEDIATE @ONLY)
+  set(OPENCV_REFMAN_TOC "")
+
+  foreach(mod ${BASE_MODULES} ${EXTRA_MODULES})
+    if(EXISTS "${OPENCV_MODULE_opencv_${mod}_LOCATION}/doc/${mod}.rst")
+      ocv_doc_add_to_fake_root("${OPENCV_MODULE_opencv_${mod}_LOCATION}/doc" modules/${mod}/doc)
+      set(OPENCV_REFMAN_TOC "${OPENCV_REFMAN_TOC}   ${mod}/doc/${mod}.rst\n")
+    endif()
+  endforeach()

-  file(GLOB_RECURSE OPENCV_FILES_UG  user_guide/*.rst)
-  file(GLOB_RECURSE OPENCV_FILES_TUT tutorials/*.rst)
-  file(GLOB_RECURSE OPENCV_FILES_TUT_PICT tutorials/*.png tutorials/*.jpg)
+  configure_file("${OpenCV_SOURCE_DIR}/modules/refman.rst.in" "${DOC_FAKE_ROOT}/modules/refman.rst" @ONLY)

-  set(OPENCV_DOC_DEPS conf.py ${OPENCV_FILES_REF} ${OPENCV_FILES_REF_PICT}
-           ${OPENCV_FILES_UG} ${OPENCV_FILES_TUT} ${OPENCV_FILES_TUT_PICT})
+  ocv_doc_add_to_fake_root("${OpenCV_SOURCE_DIR}/index.rst")
+  ocv_doc_add_to_fake_root("${OpenCV_SOURCE_DIR}/doc")
+  ocv_doc_add_to_fake_root("${OpenCV_SOURCE_DIR}/platforms/android")
+  ocv_doc_add_to_fake_root("${OpenCV_SOURCE_DIR}/samples")

  set(BUILD_PLANTUML "")
  if(PLANTUML)
@ -80,7 +102,7 @@ if(BUILD_DOCS AND HAVE_SPHINX)

  if(PDFLATEX_COMPILER)
    add_custom_target(docs
-      COMMAND ${SPHINX_BUILD} ${BUILD_PLANTUML} -b latex -c ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. .
+      COMMAND ${SPHINX_BUILD} ${BUILD_PLANTUML} -b latex -c "${CMAKE_CURRENT_SOURCE_DIR}" "${DOC_FAKE_ROOT}" .
      COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/pics ${CMAKE_CURRENT_BINARY_DIR}/doc/opencv1/pics
      COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/mymath.sty ${CMAKE_CURRENT_BINARY_DIR}
      COMMAND ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/patch_refman_latex.py" opencv2refman.tex
@ -100,7 +122,7 @@ if(BUILD_DOCS AND HAVE_SPHINX)
      COMMAND ${CMAKE_COMMAND} -E echo "Generating opencv_cheatsheet.pdf"
      COMMAND ${PDFLATEX_COMPILER} -interaction=batchmode "${CMAKE_CURRENT_SOURCE_DIR}/opencv_cheatsheet.tex"
      COMMAND ${PDFLATEX_COMPILER} -interaction=batchmode "${CMAKE_CURRENT_SOURCE_DIR}/opencv_cheatsheet.tex"
-      DEPENDS ${OPENCV_DOC_DEPS}
+      DEPENDS ${DOC_FAKE_ROOT_FILES}
      WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
      COMMENT "Generating the PDF Manuals"
      )
@ -114,9 +136,9 @@ if(BUILD_DOCS AND HAVE_SPHINX)
  endif()

  add_custom_target(html_docs
-    COMMAND ${SPHINX_BUILD} ${BUILD_PLANTUML} -b html -c ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. ./_html
+    COMMAND "${SPHINX_BUILD}" ${BUILD_PLANTUML} -b html -c "${CMAKE_CURRENT_SOURCE_DIR}" "${DOC_FAKE_ROOT}" ./_html
    COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/mymath.sty ${CMAKE_CURRENT_BINARY_DIR}
-    DEPENDS ${OPENCV_DOC_DEPS}
+    DEPENDS ${DOC_FAKE_ROOT_FILES}
    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
    COMMENT "Generating Online Documentation"
    )
--- a/doc/tutorials/introduction/display_image/display_image.rst
+++ b/doc/tutorials/introduction/display_image/display_image.rst
@ -36,7 +36,7 @@ You'll almost always end up using the:
   + *core* section, as here are defined the basic building blocks of the library
   + *highgui* module, as this contains the functions for input and output operations

-.. literalinclude:: ../../../../samples/cpp/tutorial_code/introduction/windows_visual_studio_Opencv/Test.cpp
+.. literalinclude:: ../../../../samples/cpp/tutorial_code/introduction/display_image/display_image.cpp
   :language: cpp
   :tab-width: 4
   :lines:  1-3
--- a/doc/tutorials/introduction/windows_visual_studio_Opencv/windows_visual_studio_Opencv.rst
+++ b/doc/tutorials/introduction/windows_visual_studio_Opencv/windows_visual_studio_Opencv.rst
@ -142,9 +142,9 @@ The process is the same as described in case of the local approach. Just add the
 Test it!
 ========

-Now to try this out download our little test :download:`source code <../../../../samples/cpp/tutorial_code/introduction/windows_visual_studio_Opencv/Test.cpp>` or get it from the sample code folder of the OpenCV sources. Add this to your project and build it. Here's its content:
+Now to try this out download our little test :download:`source code <../../../../samples/cpp/tutorial_code/introduction/windows_visual_studio_Opencv/introduction_windows_vs.cpp>` or get it from the sample code folder of the OpenCV sources. Add this to your project and build it. Here's its content:

-.. literalinclude:: ../../../../samples/cpp/tutorial_code/introduction/windows_visual_studio_Opencv/Test.cpp
+.. literalinclude:: ../../../../samples/cpp/tutorial_code/introduction/windows_visual_studio_Opencv/introduction_windows_vs.cpp
   :language: cpp
   :tab-width: 4
   :linenos:
--- a/modules/androidcamera/camera_wrapper/camera_wrapper.cpp
+++ b/modules/androidcamera/camera_wrapper/camera_wrapper.cpp
@ -1,6 +1,6 @@
 #if !defined(ANDROID_r2_2_0) && !defined(ANDROID_r2_3_3) && !defined(ANDROID_r3_0_1) && \
 !defined(ANDROID_r4_0_0) && !defined(ANDROID_r4_0_3) && !defined(ANDROID_r4_1_1) && \
- !defined(ANDROID_r4_2_0) && !defined(ANDROID_r4_3_0)
+ !defined(ANDROID_r4_2_0) && !defined(ANDROID_r4_3_0) && !defined(ANDROID_r4_4_0)
 # error Building camera wrapper for your version of Android is not supported by OpenCV.\
 You need to modify OpenCV sources in order to compile camera wrapper for your version of Android.
 #endif
@ -22,7 +22,7 @@
 #elif defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0)
 # include <gui/ISurface.h>
 # include <gui/BufferQueue.h>
-#elif defined(ANDROID_r4_3_0)
+#elif defined(ANDROID_r4_3_0) || defined(ANDROID_r4_4_0)
 # include <gui/IGraphicBufferProducer.h>
 # include <gui/BufferQueue.h>
 #else
@ -74,6 +74,20 @@ public:
    {
    }
 };
+#elif defined(ANDROID_r4_4_0)
+class ConsumerListenerStub: public android::BnConsumerListener
+{
+public:
+    virtual void onFrameAvailable()
+    {
+    }
+    virtual void onBuffersReleased()
+    {
+    }
+    virtual ~ConsumerListenerStub()
+    {
+    }
+};
 #endif

 std::string getProcessName()
@ -306,7 +320,8 @@ public:
    }

    virtual void postData(int32_t msgType, const sp<IMemory>& dataPtr
-    #if defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3) || defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0) || defined(ANDROID_r4_3_0)
+#if defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3) || defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0) \
+ || defined(ANDROID_r4_3_0) || defined(ANDROID_r4_4_0)
                          ,camera_frame_metadata_t*
 #endif
                          )
@ -623,6 +638,13 @@ CameraHandler* CameraHandler::initCameraConnect(const CameraCallback& callback,
    bufferStatus = camera->setPreviewTexture(bufferQueue);
    if (bufferStatus != 0)
        LOGE("initCameraConnect: failed setPreviewTexture call; camera might not work correctly");
+# elif defined(ANDROID_r4_4_0)
+    sp<BufferQueue> bufferQueue = new BufferQueue();
+    sp<IConsumerListener> queueListener = new ConsumerListenerStub();
+    bufferQueue->consumerConnect(queueListener, true);
+    bufferStatus = handler->camera->setPreviewTarget(bufferQueue);
+    if (bufferStatus != 0)
+        LOGE("applyProperties: failed setPreviewTexture call; camera might not work correctly");
 # endif

 #if (defined(ANDROID_r2_2_0) || defined(ANDROID_r2_3_3) || defined(ANDROID_r3_0_1))
@ -663,7 +685,8 @@ void CameraHandler::closeCameraConnect()
    }

    camera->stopPreview();
-#if defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3) || defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0) || defined(ANDROID_r4_3_0)
+#if defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3) || defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0) \
+ || defined(ANDROID_r4_3_0) || defined(ANDROID_r4_3_0)
    camera->setPreviewCallbackFlags(CAMERA_FRAME_CALLBACK_FLAG_NOOP);
 #endif
    camera->disconnect();
@ -914,7 +937,8 @@ void CameraHandler::applyProperties(CameraHandler** ppcameraHandler)

    CameraParameters curCameraParameters((*ppcameraHandler)->params.flatten());

-#if defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3) || defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0) || defined(ANDROID_r4_3_0)
+#if defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3) || defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0) \
+ || defined(ANDROID_r4_3_0) || defined(ANDROID_r4_4_0)
    CameraHandler* handler=*ppcameraHandler;

    handler->camera->stopPreview();
@ -943,6 +967,13 @@ void CameraHandler::applyProperties(CameraHandler** ppcameraHandler)
    bufferStatus = handler->camera->setPreviewTexture(bufferQueue);
    if (bufferStatus != 0)
        LOGE("applyProperties: failed setPreviewTexture call; camera might not work correctly");
+# elif defined(ANDROID_r4_4_0)
+    sp<BufferQueue> bufferQueue = new BufferQueue();
+    sp<IConsumerListener> queueListener = new ConsumerListenerStub();
+    bufferQueue->consumerConnect(queueListener, true);
+    bufferStatus = handler->camera->setPreviewTarget(bufferQueue);
+    if (bufferStatus != 0)
+        LOGE("applyProperties: failed setPreviewTexture call; camera might not work correctly");
 # endif

    handler->camera->setPreviewCallbackFlags( CAMERA_FRAME_CALLBACK_FLAG_ENABLE_MASK | CAMERA_FRAME_CALLBACK_FLAG_COPY_OUT_MASK);//with copy
--- a/modules/calib3d/src/stereosgbm.cpp
+++ b/modules/calib3d/src/stereosgbm.cpp
@ -125,7 +125,7 @@ static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
                            int tabOfs, int )
 {
    int x, c, width = img1.cols, cn = img1.channels();
-    int minX1 = std::max(maxD, 0), maxX1 = width + std::min(minD, 0);
+    int minX1 = std::max(-maxD, 0), maxX1 = width + std::min(minD, 0);
    int minX2 = std::max(minX1 - maxD, 0), maxX2 = std::min(maxX1 - minD, width);
    int D = maxD - minD, width1 = maxX1 - minX1, width2 = maxX2 - minX2;
    const PixType *row1 = img1.ptr<PixType>(y), *row2 = img2.ptr<PixType>(y);
@ -340,7 +340,7 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
    int disp12MaxDiff = params.disp12MaxDiff > 0 ? params.disp12MaxDiff : 1;
    int P1 = params.P1 > 0 ? params.P1 : 2, P2 = std::max(params.P2 > 0 ? params.P2 : 5, P1+1);
    int k, width = disp1.cols, height = disp1.rows;
-    int minX1 = std::max(maxD, 0), maxX1 = width + std::min(minD, 0);
+    int minX1 = std::max(-maxD, 0), maxX1 = width + std::min(minD, 0);
    int D = maxD - minD, width1 = maxX1 - minX1;
    int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE;
    int SW2 = SADWindowSize.width/2, SH2 = SADWindowSize.height/2;
--- a/modules/contrib/src/chamfermatching.cpp
+++ b/modules/contrib/src/chamfermatching.cpp
@ -474,12 +474,19 @@ public:
        chamfer_ = new Matching(true);
    }

+    ~ChamferMatcher()
+    {
+        delete chamfer_;
+    }
+
    void showMatch(Mat& img, int index = 0);
    void showMatch(Mat& img, Match match_);

    const Matches& matching(Template&, Mat&);

 private:
+    ChamferMatcher(const ChamferMatcher&);
+    ChamferMatcher& operator=(const ChamferMatcher&);
    void addMatch(float cost, Point offset, const Template* tpl);


--- a/modules/contrib/src/fuzzymeanshifttracker.cpp
+++ b/modules/contrib/src/fuzzymeanshifttracker.cpp
@ -80,9 +80,7 @@ void CvFuzzyCurve::clear()

 void CvFuzzyCurve::addPoint(double x, double y)
 {
-    CvFuzzyPoint *point;
-    point = new CvFuzzyPoint(x, y);
-    points.push_back(*point);
+    points.push_back(CvFuzzyPoint(x, y));
 };

 double CvFuzzyCurve::calcValue(double param)
--- a/modules/core/include/opencv2/core/mat.inl.hpp
+++ b/modules/core/include/opencv2/core/mat.inl.hpp
@ -1531,7 +1531,9 @@ template<typename _Tp> template<int m, int n> inline
 Mat_<_Tp>::operator Matx<typename DataType<_Tp>::channel_type, m, n>() const
 {
    CV_Assert(n % DataType<_Tp>::channels == 0);
-    return this->Mat::operator Matx<typename DataType<_Tp>::channel_type, m, n>();
+
+    Matx<typename DataType<_Tp>::channel_type, m, n> res = this->Mat::operator Matx<typename DataType<_Tp>::channel_type, m, n>();
+    return res;
 }

 template<typename _Tp> inline
--- a/modules/core/src/opengl.cpp
+++ b/modules/core/src/opengl.cpp
@ -1032,6 +1032,7 @@ cv::ogl::Texture2D::Texture2D(InputArray arr, bool autoRelease) : rows_(0), cols
            #else
                GpuMat dmat = arr.getGpuMat();
                ogl::Buffer buf(dmat, ogl::Buffer::PIXEL_UNPACK_BUFFER);
+                buf.setAutoRelease(true);
                buf.bind(ogl::Buffer::PIXEL_UNPACK_BUFFER);
                impl_.reset(new Impl(internalFormats[cn], asize.width, asize.height, srcFormats[cn], gl_types[depth], 0, autoRelease));
                ogl::Buffer::unbind(ogl::Buffer::PIXEL_UNPACK_BUFFER);
@ -1145,6 +1146,7 @@ void cv::ogl::Texture2D::copyFrom(InputArray arr, bool autoRelease)
            #else
                GpuMat dmat = arr.getGpuMat();
                ogl::Buffer buf(dmat, ogl::Buffer::PIXEL_UNPACK_BUFFER);
+                buf.setAutoRelease(true);
                buf.bind(ogl::Buffer::PIXEL_UNPACK_BUFFER);
                impl_->copyFrom(asize.width, asize.height, srcFormats[cn], gl_types[depth], 0);
                ogl::Buffer::unbind(ogl::Buffer::PIXEL_UNPACK_BUFFER);
@ -1195,6 +1197,7 @@ void cv::ogl::Texture2D::copyTo(OutputArray arr, int ddepth, bool autoRelease) c
                throw_no_cuda();
            #else
                ogl::Buffer buf(rows_, cols_, CV_MAKE_TYPE(ddepth, cn), ogl::Buffer::PIXEL_PACK_BUFFER);
+                buf.setAutoRelease(true);
                buf.bind(ogl::Buffer::PIXEL_PACK_BUFFER);
                impl_->copyTo(dstFormat, gl_types[ddepth], 0);
                ogl::Buffer::unbind(ogl::Buffer::PIXEL_PACK_BUFFER);
--- a/modules/highgui/CMakeLists.txt
+++ b/modules/highgui/CMakeLists.txt
@ -175,9 +175,9 @@ if(HAVE_XIMEA)
  list(APPEND highgui_srcs src/cap_ximea.cpp)
  ocv_include_directories(${XIMEA_PATH})
  if(XIMEA_LIBRARY_DIR)
-    link_directories(${XIMEA_LIBRARY_DIR})
+    link_directories("${XIMEA_LIBRARY_DIR}")
  endif()
-  if(CMAKE_CL_64)
+  if(X86_64)
    list(APPEND HIGHGUI_LIBRARIES m3apiX64)
  else()
    list(APPEND HIGHGUI_LIBRARIES m3api)
--- a/modules/highgui/src/cap_openni.cpp
+++ b/modules/highgui/src/cap_openni.cpp
@ -1047,7 +1047,7 @@ double CvCapture_OpenNI::getImageGeneratorProperty( int propIdx )
        propValue = (double)imageGenerator.GetTimestamp();
        break;
    case CV_CAP_PROP_POS_FRAMES :
-        propValue = imageGenerator.GetFrameID();
+        propValue = (double)imageGenerator.GetFrameID();
        break;
    default :
        CV_Error( CV_StsBadArg, cv::format("Image generator does not support such parameter (propIdx=%d) for getting.\n", propIdx) );
--- a/modules/highgui/src/cap_qtkit.mm
+++ b/modules/highgui/src/cap_qtkit.mm
@ -177,6 +177,7 @@ private:
    int changedPos;

    int started;
+    QTTime endOfMovie;
 };


@ -671,6 +672,8 @@ CvCaptureFile::CvCaptureFile(const char* filename) {
        return;
    }

+    [mCaptureSession gotoEnd];
+    endOfMovie = [mCaptureSession currentTime];

    [mCaptureSession gotoBeginning];

@ -707,6 +710,11 @@ int CvCaptureFile::didStart() {
 bool CvCaptureFile::grabFrame() {
    NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];
    double t1 = getProperty(CV_CAP_PROP_POS_MSEC);
+
+    QTTime curTime;
+    curTime = [mCaptureSession currentTime];
+    bool isEnd=(QTTimeCompare(curTime,endOfMovie) == NSOrderedSame);
+
    [mCaptureSession stepForward];
    double t2 = getProperty(CV_CAP_PROP_POS_MSEC);
    if (t2>t1 && !changedPos) {
@ -716,7 +724,7 @@ bool CvCaptureFile::grabFrame() {
    }
    changedPos = 0;
    [localpool drain];
-    return 1;
+    return !isEnd;
 }


--- a/modules/highgui/src/cap_ximea.cpp
+++ b/modules/highgui/src/cap_ximea.cpp
@ -75,19 +75,20 @@ bool CvCaptureCAM_XIMEA::open( int wIndex )
        return false;
    }

+    int width   = 0;
+    int height  = 0;
+    int isColor = 0;
+
    // always use auto exposure/gain
    mvret = xiSetParamInt( hmv, XI_PRM_AEAG, 1);
    HandleXiResult(mvret);

-    int width = 0;
    mvret = xiGetParamInt( hmv, XI_PRM_WIDTH, &width);
    HandleXiResult(mvret);

-    int height = 0;
    mvret = xiGetParamInt( hmv, XI_PRM_HEIGHT, &height);
    HandleXiResult(mvret);

-    int isColor = 0;
    mvret = xiGetParamInt(hmv, XI_PRM_IMAGE_IS_COLOR, &isColor);
    HandleXiResult(mvret);

@ -97,7 +98,7 @@ bool CvCaptureCAM_XIMEA::open( int wIndex )
        mvret = xiSetParamInt( hmv, XI_PRM_IMAGE_DATA_FORMAT, XI_RGB24);
        HandleXiResult(mvret);

-        // always use auto white ballance for color cameras
+        // always use auto white balance for color cameras
        mvret = xiSetParamInt( hmv, XI_PRM_AUTO_WB, 1);
        HandleXiResult(mvret);

--- a/modules/imgproc/src/precomp.hpp
+++ b/modules/imgproc/src/precomp.hpp
@ -66,7 +66,7 @@

 /* helper tables */
 extern const uchar icvSaturate8u_cv[];
-#define CV_FAST_CAST_8U(t)  (assert(-256 <= (t) || (t) <= 512), icvSaturate8u_cv[(t)+256])
+#define CV_FAST_CAST_8U(t)  (assert(-256 <= (t) && (t) <= 512), icvSaturate8u_cv[(t)+256])
 #define CV_CALC_MIN_8U(a,b) (a) -= CV_FAST_CAST_8U((a) - (b))
 #define CV_CALC_MAX_8U(a,b) (a) += CV_FAST_CAST_8U((b) - (a))

--- a/modules/ml/include/opencv2/ml.hpp
+++ b/modules/ml/include/opencv2/ml.hpp
@ -554,6 +554,10 @@ protected:

    CvSVMSolver* solver;
    CvSVMKernel* kernel;
+
+private:
+    CvSVM(const CvSVM&);
+    CvSVM& operator = (const CvSVM&);
 };

 /****************************************************************************************\
--- a/modules/ml/src/data.cpp
+++ b/modules/ml/src/data.cpp
@ -232,6 +232,7 @@ int CvMLData::read_csv(const char* filename)
            if (!token)
            {
                fclose(file);
+                delete [] el_ptr;
                return -1;
            }
        }
--- a/modules/objdetect/src/hog.cpp
+++ b/modules/objdetect/src/hog.cpp
@ -2928,8 +2928,10 @@ void HOGDescriptor::readALTModel(String modelfile)
        double *linearwt = new double[totwords+1];
        int length = totwords;
        nread = fread(linearwt, sizeof(double), totwords + 1, modelfl);
-        if(nread != static_cast<size_t>(length) + 1)
+        if(nread != static_cast<size_t>(length) + 1) {
+            delete [] linearwt;
            throw Exception();
+        }

        for(int i = 0; i < length; i++)
            detector.push_back((float)linearwt[i]);
--- a/modules/objdetect/src/latentsvmdetector.cpp
+++ b/modules/objdetect/src/latentsvmdetector.cpp
@ -144,6 +144,7 @@ CvSeq* cvLatentSvmDetectObjects(IplImage* image,
    free(points);
    free(oppPoints);
    free(score);
+    free(scoreOut);

    return result_seq;
 }
--- a/modules/objdetect/src/lsvmparser.cpp
+++ b/modules/objdetect/src/lsvmparser.cpp
@ -741,8 +741,11 @@ int LSVMparser(const char * filename, CvLSVMFilterObject *** model, int *last, i
    //printf("parse : %s\n", filename);

    xmlf = fopen(filename, "rb");
-    if(xmlf == NULL)
+    if(xmlf == NULL) {
+        free(*model);
+        *model = NULL;
        return LSVM_PARSER_FILE_NOT_FOUND;
+    }

    //i   = 0;
    j   = 0;
@ -787,7 +790,7 @@ int loadModel(
              float *scoreThreshold){
    int last;
    int max;
-    int *comp;
+    int *comp = NULL;
    int count;
    int i;
    int err;
@ -808,6 +811,7 @@ int loadModel(
        (*kPartFilters)[i] = (comp[i] - comp[i - 1]) - 1;
    }
    (*kPartFilters)[0] = comp[0];
+    free(comp);

    return 0;
 }
--- a/modules/ocl/CMakeLists.txt
+++ b/modules/ocl/CMakeLists.txt
@ -5,4 +5,4 @@ endif()

 set(the_description "OpenCL-accelerated Computer Vision")
 ocv_define_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video opencv_calib3d opencv_ml "${OPENCL_LIBRARIES}")
-ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow -Wundef)
+ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow)
--- a/modules/ocl/perf/perf_color.cpp
+++ b/modules/ocl/perf/perf_color.cpp
@ -46,30 +46,62 @@
 #include "perf_precomp.hpp"

 using namespace perf;
+using std::tr1::tuple;
+using std::tr1::get;
+using std::tr1::make_tuple;

 ///////////// cvtColor////////////////////////

-typedef TestBaseWithParam<Size> cvtColorFixture;
+CV_ENUM(ConversionTypes, COLOR_RGB2GRAY, COLOR_RGB2BGR, COLOR_RGB2YUV, COLOR_YUV2RGB, COLOR_RGB2YCrCb,
+        COLOR_YCrCb2RGB, COLOR_RGB2XYZ, COLOR_XYZ2RGB, COLOR_RGB2HSV, COLOR_HSV2RGB, COLOR_RGB2HLS,
+        COLOR_HLS2RGB, COLOR_BGR5652BGR, COLOR_BGR2BGR565, COLOR_RGBA2mRGBA, COLOR_mRGBA2RGBA, COLOR_YUV2RGB_NV12)

-PERF_TEST_P(cvtColorFixture, cvtColor, OCL_TYPICAL_MAT_SIZES)
+typedef tuple<Size, tuple<ConversionTypes, int, int> > cvtColorParams;
+typedef TestBaseWithParam<cvtColorParams> cvtColorFixture;
+
+PERF_TEST_P(cvtColorFixture, cvtColor, testing::Combine(
+                testing::Values(Size(1000, 1002), Size(2000, 2004), Size(4000, 4008)),
+                testing::Values(
+                    make_tuple(ConversionTypes(COLOR_RGB2GRAY), 3, 1),
+                    make_tuple(ConversionTypes(COLOR_RGB2BGR), 3, 3),
+                    make_tuple(ConversionTypes(COLOR_RGB2YUV), 3, 3),
+                    make_tuple(ConversionTypes(COLOR_YUV2RGB), 3, 3),
+                    make_tuple(ConversionTypes(COLOR_RGB2YCrCb), 3, 3),
+                    make_tuple(ConversionTypes(COLOR_YCrCb2RGB), 3, 3),
+                    make_tuple(ConversionTypes(COLOR_RGB2XYZ), 3, 3),
+                    make_tuple(ConversionTypes(COLOR_XYZ2RGB), 3, 3),
+                    make_tuple(ConversionTypes(COLOR_RGB2HSV), 3, 3),
+                    make_tuple(ConversionTypes(COLOR_HSV2RGB), 3, 3),
+                    make_tuple(ConversionTypes(COLOR_RGB2HLS), 3, 3),
+                    make_tuple(ConversionTypes(COLOR_HLS2RGB), 3, 3),
+                    make_tuple(ConversionTypes(COLOR_BGR5652BGR), 2, 3),
+                    make_tuple(ConversionTypes(COLOR_BGR2BGR565), 3, 2),
+                    make_tuple(ConversionTypes(COLOR_RGBA2mRGBA), 4, 4),
+                    make_tuple(ConversionTypes(COLOR_mRGBA2RGBA), 4, 4),
+                    make_tuple(ConversionTypes(COLOR_YUV2RGB_NV12), 1, 3)
+                    )))
 {
-    const Size srcSize = GetParam();
+    cvtColorParams params = GetParam();
+    const Size srcSize = get<0>(params);
+    const tuple<int, int, int> conversionParams = get<1>(params);
+    const int code = get<0>(conversionParams), scn = get<1>(conversionParams),
+            dcn = get<2>(conversionParams);

-    Mat src(srcSize, CV_8UC4), dst(srcSize, CV_8UC4);
+    Mat src(srcSize, CV_8UC(scn)), dst(srcSize, CV_8UC(scn));
    declare.in(src, WARMUP_RNG).out(dst);

    if (RUN_OCL_IMPL)
    {
-        ocl::oclMat oclSrc(src), oclDst(src.size(), CV_8UC4);
+        ocl::oclMat oclSrc(src), oclDst(src.size(), dst.type());

-        OCL_TEST_CYCLE() ocl::cvtColor(oclSrc, oclDst, COLOR_RGBA2GRAY, 4);
+        OCL_TEST_CYCLE() ocl::cvtColor(oclSrc, oclDst, code, dcn);
        oclDst.download(dst);

-        SANITY_CHECK(dst);
+        SANITY_CHECK(dst, 1);
    }
    else if (RUN_PLAIN_IMPL)
    {
-        TEST_CYCLE() cv::cvtColor(src, dst, COLOR_RGBA2GRAY, 4);
+        TEST_CYCLE() cv::cvtColor(src, dst, code, dcn);

        SANITY_CHECK(dst);
    }
--- a/modules/ocl/perf/perf_imgwarp.cpp
+++ b/modules/ocl/perf/perf_imgwarp.cpp
@ -185,6 +185,46 @@ PERF_TEST_P(resizeFixture, resize,
        OCL_PERF_ELSE
 }

+typedef tuple<Size, MatType, double> resizeAreaParams;
+typedef TestBaseWithParam<resizeAreaParams> resizeAreaFixture;
+
+PERF_TEST_P(resizeAreaFixture, resize,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+                               ::testing::Values(0.3, 0.5, 0.6)))
+{
+    const resizeAreaParams params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
+    double scale = get<2>(params);
+    const Size dstSize(cvRound(srcSize.width * scale), cvRound(srcSize.height * scale));
+
+    checkDeviceMaxMemoryAllocSize(srcSize, type);
+
+    Mat src(srcSize, type), dst;
+    dst.create(dstSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclSrc(src), oclDst(dstSize, type);
+
+        OCL_TEST_CYCLE() cv::ocl::resize(oclSrc, oclDst, Size(), scale, scale, cv::INTER_AREA);
+
+        oclDst.download(dst);
+
+        SANITY_CHECK(dst, 1 + DBL_EPSILON);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::resize(src, dst, Size(), scale, scale, cv::INTER_AREA);
+
+        SANITY_CHECK(dst, 1 + DBL_EPSILON);
+    }
+    else
+        OCL_PERF_ELSE
+}
+
 ///////////// remap////////////////////////

 CV_ENUM(RemapInterType, INTER_NEAREST, INTER_LINEAR)
--- a/modules/ocl/src/arithm.cpp
+++ b/modules/ocl/src/arithm.cpp
@ -103,7 +103,11 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const
    int dststep1 = dst.step / dst.elemSize(), dstoffset1 = dst.offset / dst.elemSize();
    std::vector<uchar> m;

+#ifdef ANDROID
+    size_t localThreads[3]  = { 16, 10, 1 };
+#else
    size_t localThreads[3]  = { 16, 16, 1 };
+#endif
    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };

    std::string kernelName = "arithm_binary_op";
@ -337,10 +341,15 @@ static void arithmetic_sum_buffer_run(const oclMat &src, cl_mem &dst, int groupn
    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst ));
    size_t globalThreads[3] = { groupnum * 256, 1, 1 };
-    size_t localThreads[3] = { 256, 1, 1 };

+#ifdef ANDROID
+    openCLExecuteKernel(src.clCxt, &arithm_sum, "arithm_op_sum", globalThreads, NULL,
+                        args, -1, -1, buildOptions.c_str());
+#else
+    size_t localThreads[3] = { 256, 1, 1 };
    openCLExecuteKernel(src.clCxt, &arithm_sum, "arithm_op_sum", globalThreads, localThreads,
                        args, -1, -1, buildOptions.c_str());
+#endif
 }

 template <typename T>
@ -394,12 +403,16 @@ Scalar cv::ocl::sum(const oclMat &src)

 Scalar cv::ocl::absSum(const oclMat &src)
 {
-    if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
+    int sdepth = src.depth();
+    if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && sdepth == CV_64F)
    {
        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
        return cv::Scalar::all(0);
    }

+    if (sdepth == CV_8U || sdepth == CV_16U)
+        return sum(src);
+
    static sumFunc functab[3] =
    {
        arithmetic_sum<int>,
@ -407,7 +420,7 @@ Scalar cv::ocl::absSum(const oclMat &src)
        arithmetic_sum<double>
    };

-    int ddepth = std::max(src.depth(), CV_32S);
+    int ddepth = std::max(sdepth, CV_32S);
    sumFunc func = functab[ddepth - CV_32S];
    return func(src, ABS_SUM, ddepth);
 }
@ -511,6 +524,7 @@ static void arithmetic_minMax_run(const oclMat &src, const oclMat & mask, cl_mem
    size_t globalThreads[3] = {groupnum * 256, 1, 1};
    size_t localThreads[3] = {256, 1, 1};

+    // kernel use fixed grid size, replace lt on NULL is imposible without kernel changes
    openCLExecuteKernel(src.clCxt, &arithm_minMax, kernelName, globalThreads, localThreads,
                        args, -1, -1, buildOptions.c_str());
 }
@ -599,6 +613,12 @@ static void arithm_absdiff_nonsaturate_run(const oclMat & src1, const oclMat & s
    }
    CV_Assert(src1.step % src1.elemSize() == 0 && (src2.empty() || src2.step % src2.elemSize() == 0));

+    if (src2.empty() && (src1.depth() == CV_8U || src1.depth() == CV_16U))
+    {
+        src1.convertTo(diff, CV_32S);
+        return;
+    }
+
    int ddepth = std::max(src1.depth(), CV_32S);
    if (ntype == NORM_L2)
        ddepth = std::max<int>(CV_32F, ddepth);
@ -612,7 +632,11 @@ static void arithm_absdiff_nonsaturate_run(const oclMat & src1, const oclMat & s
    int diffstep1 = diff.step / diff.elemSize(), diffoffset1 = diff.offset / diff.elemSize();

    String kernelName = "arithm_absdiff_nonsaturate";
+#ifdef ANDROID
+    size_t localThreads[3]  = { 16, 10, 1 };
+#else
    size_t localThreads[3]  = { 16, 16, 1 };
+#endif
    size_t globalThreads[3] = { diff.cols, diff.rows, 1 };

    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
@ -635,6 +659,7 @@ static void arithm_absdiff_nonsaturate_run(const oclMat & src1, const oclMat & s
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2offset1 ));

        kernelName += "_binary";
+        buildOptions += " -D BINARY";
    }

    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&diff.data ));
@ -831,7 +856,11 @@ static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, String kernel
    int srcoffset1 = src.offset / src.elemSize1(), dstoffset1 = dst.offset / dst.elemSize1();
    int srcstep1 = src.step1(), dststep1 = dst.step1();

+#ifdef ANDROID
+    size_t localThreads[3]  = { 64, 2, 1 };
+#else
    size_t localThreads[3]  = { 64, 4, 1 };
+#endif
    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };

    std::string buildOptions = format("-D srcT=%s",
@ -869,7 +898,11 @@ static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src
 {
    int depth = dst.depth();

+#ifdef ANDROID
+    size_t localThreads[3]  = { 64, 2, 1 };
+#else
    size_t localThreads[3]  = { 64, 4, 1 };
+#endif
    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };

    int src1_step = src1.step / src1.elemSize(), src1_offset = src1.offset / src1.elemSize();
@ -917,7 +950,11 @@ static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat
    int src2step1 = src2.step / src2.elemSize1(), src2offset1 = src2.offset / src2.elemSize1();
    int dststep1 = dst.step / dst.elemSize1(), dstoffset1 = dst.offset / dst.elemSize1();

+#ifdef ANDROID
+    size_t localThreads[3]  = { 64, 2, 1 };
+#else
    size_t localThreads[3]  = { 64, 4, 1 };
+#endif
    size_t globalThreads[3] = { cols1, dst.rows, 1 };

    std::vector<std::pair<size_t , const void *> > args;
@ -963,7 +1000,11 @@ static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, o

    int cols = src1.cols * channels;

+#ifdef ANDROID
+    size_t localThreads[3]  = { 64, 2, 1 };
+#else
    size_t localThreads[3]  = { 64, 4, 1 };
+#endif
    size_t globalThreads[3] = { cols, src1.rows, 1 };

    int src1_step = src1.step / src1.elemSize1(), src1_offset = src1.offset / src1.elemSize1();
@ -1017,7 +1058,11 @@ static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &d
    int channels = src2.oclchannels(), depth = src2.depth();
    int cols = src2.cols * channels, rows = src2.rows;

+#ifdef ANDROID
+    size_t localThreads[3]  = { 64, 2, 1 };
+#else
    size_t localThreads[3]  = { 64, 4, 1 };
+#endif
    size_t globalThreads[3] = { cols, rows, 1 };

    int src1_step = src1.step / src1.elemSize1(), src1_offset = src1.offset / src1.elemSize1();
@ -1093,6 +1138,8 @@ static void arithmetic_minMaxLoc_run(const oclMat &src, cl_mem &dst, int vlen ,
    char build_options[50];
    sprintf(build_options, "-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d", src.depth(), repeat_s, repeat_e);
    size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1};
+
+    // kernel use fixed grid size, replace lt on NULL is imposible without kernel changes
    openCLExecuteKernel(src.clCxt, &arithm_minMaxLoc, "arithm_op_minMaxLoc", gt, lt, args, -1, -1, build_options);
 }

@ -1122,6 +1169,7 @@ static void arithmetic_minMaxLoc_mask_run(const oclMat &src, const oclMat &mask,
        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&mask.data ));
        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst ));

+        // kernel use fixed grid size, replace lt on NULL is imposible without kernel changes
        openCLExecuteKernel(src.clCxt, &arithm_minMaxLoc_mask, "arithm_op_minMaxLoc_mask", gt, lt, args, -1, -1, build_options);
    }
 }
@ -1239,10 +1287,15 @@ static void arithmetic_countNonZero_run(const oclMat &src, cl_mem &dst, int grou
    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst ));

    size_t globalThreads[3] = { groupnum * 256, 1, 1 };
-    size_t localThreads[3] = { 256, 1, 1 };

+#ifdef ANDROID
+    openCLExecuteKernel(src.clCxt, &arithm_nonzero, kernelName, globalThreads, NULL,
+                        args, -1, -1, buildOptions.c_str());
+#else
+    size_t localThreads[3] = { 256, 1, 1 };
    openCLExecuteKernel(src.clCxt, &arithm_nonzero, kernelName, globalThreads, localThreads,
                        args, -1, -1, buildOptions.c_str());
+#endif
 }

 int cv::ocl::countNonZero(const oclMat &src)
@ -1300,7 +1353,11 @@ static void bitwise_unary_run(const oclMat &src1, oclMat &dst, String kernelName
    int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1);
    int cols = divUp(dst.cols * channels + offset_cols, vector_length);

+#ifdef ANDROID
+    size_t localThreads[3]  = { 64, 2, 1 };
+#else
    size_t localThreads[3]  = { 64, 4, 1 };
+#endif
    size_t globalThreads[3] = { cols, dst.rows, 1 };

    int dst_step1 = dst.cols * dst.elemSize();
@ -1340,7 +1397,11 @@ static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Sca
                                      operationMap[operationType], vlenstr.c_str(), vlenstr.c_str(),
                                      (int)src1.elemSize(), vlen, vlenstr.c_str());

+#ifdef ANDROID
+    size_t localThreads[3]  = { 16, 10, 1 };
+#else
    size_t localThreads[3]  = { 16, 16, 1 };
+#endif
    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };

    std::vector<std::pair<size_t , const void *> > args;
@ -1588,7 +1649,6 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2,
                                      typeMap[depth], hasDouble ? "double" : "float", typeMap[depth],
                                      depth >= CV_32F ? "" : "_sat_rte");

-    size_t localThreads[3]  = { 256, 1, 1 };
    size_t globalThreads[3] = { cols1, dst.rows, 1};

    float alpha_f = static_cast<float>(alpha),
@ -1622,8 +1682,14 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2,
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols1 ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.rows ));

+#ifdef ANDROID
+    openCLExecuteKernel(clCxt, &arithm_addWeighted, "addWeighted", globalThreads, NULL,
+                        args, -1, -1, buildOptions.c_str());
+#else
+    size_t localThreads[3] = { 256, 1, 1};
    openCLExecuteKernel(clCxt, &arithm_addWeighted, "addWeighted", globalThreads, localThreads,
                        args, -1, -1, buildOptions.c_str());
+#endif
 }

 //////////////////////////////////////////////////////////////////////////////
--- a/modules/ocl/src/brute_force_matcher.cpp
+++ b/modules/ocl/src/brute_force_matcher.cpp
@ -48,6 +48,7 @@
 #include <functional>
 #include <iterator>
 #include <vector>
+#include <algorithm>
 #include "opencl_kernels.hpp"

 using namespace cv;
@ -967,14 +968,14 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &query, std::vec
                std::vector<DMatch> &localMatch = curMatches[queryIdx];
                std::vector<DMatch> &globalMatch = matches[queryIdx];

-                for_each(localMatch.begin(), localMatch.end(), ImgIdxSetter(static_cast<int>(imgIdx)));
+                std::for_each(localMatch.begin(), localMatch.end(), ImgIdxSetter(static_cast<int>(imgIdx)));

                temp.clear();
-                merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), back_inserter(temp));
+                std::merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), back_inserter(temp));

                globalMatch.clear();
                const size_t count = std::min((size_t)k, temp.size());
-                copy(temp.begin(), temp.begin() + count, back_inserter(globalMatch));
+                std::copy(temp.begin(), temp.begin() + count, back_inserter(globalMatch));
            }
        }

@ -1072,7 +1073,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat &trainIdx
            curMatches[i] = m;
        }

-        sort(curMatches.begin(), curMatches.end());
+        std::sort(curMatches.begin(), curMatches.end());
    }
 }

@ -1199,7 +1200,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat &trainIdx
            curMatches.push_back(m);
        }

-        sort(curMatches.begin(), curMatches.end());
+        std::sort(curMatches.begin(), curMatches.end());
    }
 }

--- a/modules/ocl/src/build_warps.cpp
+++ b/modules/ocl/src/build_warps.cpp
@ -92,8 +92,11 @@ void cv::ocl::buildWarpPlaneMaps(Size /*src_size*/, Rect dst_roi, const Mat &K,
    args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale));

    size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
+#ifdef ANDROID
+    size_t localThreads[3]  = {32, 4, 1};
+#else
    size_t localThreads[3]  = {32, 8, 1};
-
+#endif
    openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpPlaneMaps", globalThreads, localThreads, args, -1, -1);
 }

@ -135,8 +138,11 @@ void cv::ocl::buildWarpCylindricalMaps(Size /*src_size*/, Rect dst_roi, const Ma
    args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale));

    size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
+#ifdef ANDROID
+    size_t localThreads[3]  = {32, 1, 1};
+#else
    size_t localThreads[3]  = {32, 8, 1};
-
+#endif
    openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpCylindricalMaps", globalThreads, localThreads, args, -1, -1);
 }

@ -178,7 +184,11 @@ void cv::ocl::buildWarpSphericalMaps(Size /*src_size*/, Rect dst_roi, const Mat
    args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale));

    size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
+#ifdef ANDROID
+    size_t localThreads[3]  = {32, 4, 1};
+#else
    size_t localThreads[3]  = {32, 8, 1};
+#endif
    openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpSphericalMaps", globalThreads, localThreads, args, -1, -1);
 }

@ -222,7 +232,11 @@ void cv::ocl::buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset));

    size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
+#ifdef ANDROID
+    size_t localThreads[3]  = {32, 4, 1};
+#else
    size_t localThreads[3]  = {32, 8, 1};
+#endif
    openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpAffineMaps", globalThreads, localThreads, args, -1, -1);
 }

--- a/modules/ocl/src/cl_context.cpp
+++ b/modules/ocl/src/cl_context.cpp
@ -46,6 +46,8 @@
 //M*/

 #include "precomp.hpp"
+#include <stdlib.h>
+#include <ctype.h>
 #include <iomanip>
 #include <fstream>
 #include "cl_programcache.hpp"
--- a/modules/ocl/src/color.cpp
+++ b/modules/ocl/src/color.cpp
@ -77,7 +77,12 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
    if (!data2.empty())
        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data2.data ));

-    size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
+    size_t gt[3] = { dst.cols, dst.rows, 1 };
+#ifdef ANDROID
+    size_t lt[3] = { 16, 10, 1 };
+#else
+    size_t lt[3] = { 16, 16, 1 };
+#endif
    openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
 }

@ -105,7 +110,12 @@ static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::st
    if (!data.empty())
        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));

-    size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
+    size_t gt[3] = {src.cols, src.rows, 1};
+#ifdef ANDROID
+    size_t lt[3] = {16, 10, 1};
+#else
+    size_t lt[3] = {16, 16, 1};
+#endif
    openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
 }

@ -126,7 +136,12 @@ static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));

-    size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
+    size_t gt[3] = { dst.cols, dst.rows, 1 };
+#ifdef ANDROID
+    size_t lt[3] = { 16, 10, 1 };
+#else
+    size_t lt[3] = { 16, 16, 1 };
+#endif
    openCLExecuteKernel(src.clCxt, &cvt_color, "RGB", gt, lt, args, -1, -1, build_options.c_str());
 }

@ -148,7 +163,12 @@ static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int gree
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));

-    size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
+    size_t gt[3] = { dst.cols, dst.rows, 1 };
+#ifdef ANDROID
+    size_t lt[3] = { 16, 10, 1 };
+#else
+    size_t lt[3] = { 16, 16, 1 };
+#endif
    openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
 }

@ -170,7 +190,12 @@ static void toRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenb
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));

-    size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
+    size_t gt[3] = { dst.cols, dst.rows, 1 };
+#ifdef ANDROID
+    size_t lt[3] = { 16, 10, 1 };
+#else
+    size_t lt[3] = { 16, 16, 1 };
+#endif
    openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
 }

--- a/modules/ocl/src/filtering.cpp
+++ b/modules/ocl/src/filtering.cpp
@ -184,7 +184,11 @@ static void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
    int srcOffset_y = srcOffset / srcStep;
    Context *clCxt = src.clCxt;
    String kernelName;
+#ifdef ANDROID
+    size_t localThreads[3] = {16, 8, 1};
+#else
    size_t localThreads[3] = {16, 16, 1};
+#endif
    size_t globalThreads[3] = {(src.cols + localThreads[0] - 1) / localThreads[0] *localThreads[0], (src.rows + localThreads[1] - 1) / localThreads[1] *localThreads[1], 1};

    if (src.type() == CV_8UC1)
@ -265,7 +269,11 @@ static void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
    int srcOffset_y = srcOffset / srcStep;
    Context *clCxt = src.clCxt;
    String kernelName;
+#ifdef ANDROID
+    size_t localThreads[3] = {16, 10, 1};
+#else
    size_t localThreads[3] = {16, 16, 1};
+#endif
    size_t globalThreads[3] = {(src.cols + localThreads[0] - 1) / localThreads[0] *localThreads[0],
                               (src.rows + localThreads[1] - 1) / localThreads[1] *localThreads[1], 1};

@ -1001,7 +1009,11 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel
    CV_Assert(ksize == (anchor << 1) + 1);
    int channels = src.oclchannels();

+#ifdef ANDROID
+    size_t localThreads[3] = { 16, 10, 1 };
+#else
    size_t localThreads[3] = { 16, 16, 1 };
+#endif
    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };

    const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101" };
@ -1098,7 +1110,11 @@ void linearColumnFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_ker
    Context *clCxt = src.clCxt;
    int channels = src.oclchannels();

+#ifdef ANDROID
+    size_t localThreads[3] = {16, 10, 1};
+#else
    size_t localThreads[3] = {16, 16, 1};
+#endif
    String kernelName = "col_filter";

    char btype[30];
--- a/modules/ocl/src/imgproc.cpp
+++ b/modules/ocl/src/imgproc.cpp
@ -230,7 +230,6 @@ namespace cv
                CV_Error(Error::StsBadArg, "Unsupported map types");

            int ocn = dst.oclchannels();
-            size_t localThreads[3] = { 256, 1, 1 };
            size_t globalThreads[3] = { dst.cols, dst.rows, 1 };

            Mat scalar(1, 1, CV_MAKE_TYPE(dst.depth(), ocn), borderValue);
@ -276,29 +275,102 @@ namespace cv
            args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.rows));
            args.push_back( std::make_pair(scalar.elemSize(), (void *)scalar.data));

+#ifdef ANDROID
+            openCLExecuteKernel(clCxt, &imgproc_remap, kernelName, globalThreads, NULL, args, -1, -1, buildOptions.c_str());
+#else
+            size_t localThreads[3] = { 256, 1, 1 };
            openCLExecuteKernel(clCxt, &imgproc_remap, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str());
+#endif
        }

        ////////////////////////////////////////////////////////////////////////////////////////////
        // resize

-        static void resize_gpu( const oclMat &src, oclMat &dst, double fx, double fy, int interpolation)
+        static void computeResizeAreaTabs(int ssize, int dsize, double scale, int * const map_tab,
+                                          float * const alpha_tab, int * const ofs_tab)
+        {
+            int k = 0, dx = 0;
+            for ( ; dx < dsize; dx++)
+            {
+                ofs_tab[dx] = k;
+
+                double fsx1 = dx * scale;
+                double fsx2 = fsx1 + scale;
+                double cellWidth = std::min(scale, ssize - fsx1);
+
+                int sx1 = cvCeil(fsx1), sx2 = cvFloor(fsx2);
+
+                sx2 = std::min(sx2, ssize - 1);
+                sx1 = std::min(sx1, sx2);
+
+                if (sx1 - fsx1 > 1e-3)
+                {
+                    map_tab[k] = sx1 - 1;
+                    alpha_tab[k++] = (float)((sx1 - fsx1) / cellWidth);
+                }
+
+                for (int sx = sx1; sx < sx2; sx++)
+                {
+                    map_tab[k] = sx;
+                    alpha_tab[k++] = float(1.0 / cellWidth);
+                }
+
+                if (fsx2 - sx2 > 1e-3)
+                {
+                    map_tab[k] = sx2;
+                    alpha_tab[k++] = (float)(std::min(std::min(fsx2 - sx2, 1.), cellWidth) / cellWidth);
+                }
+            }
+            ofs_tab[dx] = k;
+        }
+
+        static void computeResizeAreaFastTabs(int * dmap_tab, int * smap_tab, int scale, int dcols, int scol)
+        {
+            for (int i = 0; i < dcols; ++i)
+                dmap_tab[i] = scale * i;
+
+            for (int i = 0, size = dcols * scale; i < size; ++i)
+                smap_tab[i] = std::min(scol - 1, i);
+        }
+
+        static void resize_gpu( const oclMat &src, oclMat &dst, double ifx, double ify, int interpolation)
        {
-            float ifx = 1.f / fx, ify = 1.f / fy;
+            float ifxf = (float)ifx, ifyf = (float)ify;
            int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
            int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
-            int ocn = interpolation == INTER_LINEAR ? dst.oclchannels() : -1;
-            int depth = interpolation == INTER_LINEAR ? dst.depth() : -1;
+            int ocn = dst.oclchannels(), depth = dst.depth();

            const char * const interMap[] = { "NN", "LN", "CUBIC", "AREA", "LAN4" };
            std::string kernelName = std::string("resize") + interMap[interpolation];

-            const char * const typeMap[] = { "uchar", "uchar", "ushort", "ushort", "int", "int", "double" };
+            const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
            const char * const channelMap[] = { "" , "", "2", "4", "4" };
-            std::string buildOption = format("-D %s -D T=%s%s", interMap[interpolation], typeMap[dst.depth()], channelMap[dst.oclchannels()]);
+            std::string buildOption = format("-D %s -D T=%s%s", interMap[interpolation], typeMap[depth], channelMap[ocn]);

-            //TODO: improve this kernel
+            int wdepth = std::max(src.depth(), CV_32F);
+
+            // check if fx, fy is integer and then we have inter area fast mode
+            int iscale_x = saturate_cast<int>(ifx);
+            int iscale_y = saturate_cast<int>(ify);
+
+            bool is_area_fast = std::abs(ifx - iscale_x) < DBL_EPSILON &&
+                std::abs(ify - iscale_y) < DBL_EPSILON;
+            if (is_area_fast)
+                wdepth = std::max(src.depth(), CV_32S);
+
+            if (interpolation != INTER_NEAREST)
+            {
+                buildOption += format(" -D WT=%s -D WTV=%s%s -D convertToWTV=convert_%s%s -D convertToT=convert_%s%s%s",
+                                      typeMap[wdepth], typeMap[wdepth], channelMap[ocn],
+                                      typeMap[wdepth], channelMap[ocn],
+                                      typeMap[src.depth()], channelMap[ocn], src.depth() <= CV_32S ? "_sat_rte" : "");
+            }
+
+#ifdef ANDROID
+            size_t blkSizeX = 16, blkSizeY = 8;
+#else
            size_t blkSizeX = 16, blkSizeY = 16;
+#endif
            size_t glbSizeX;
            if (src.type() == CV_8UC1 && interpolation == INTER_LINEAR)
            {
@ -308,6 +380,50 @@ namespace cv
            else
                glbSizeX = dst.cols;

+            oclMat alphaOcl, mapOcl, tabofsOcl;
+            if (interpolation == INTER_AREA)
+            {
+                if (is_area_fast)
+                {
+                    kernelName += "_FAST";
+                    int wdepth2 = std::max(CV_32F, src.depth());
+                    buildOption += format(" -D WT2V=%s%s -D convertToWT2V=convert_%s%s -D AREA_FAST -D XSCALE=%d -D YSCALE=%d -D SCALE=%f",
+                                          typeMap[wdepth2], channelMap[ocn], typeMap[wdepth2], channelMap[ocn],
+                                          iscale_x, iscale_y, 1.0f / (iscale_x * iscale_y));
+
+                    int smap_tab_size = dst.cols * iscale_x + dst.rows * iscale_y;
+                    AutoBuffer<int> dmap_tab(dst.cols + dst.rows), smap_tab(smap_tab_size);
+                    int * dxmap_tab = dmap_tab, * dymap_tab = dxmap_tab + dst.cols;
+                    int * sxmap_tab = smap_tab, * symap_tab = smap_tab + dst.cols * iscale_y;
+
+                    computeResizeAreaFastTabs(dxmap_tab, sxmap_tab, iscale_x, dst.cols, src.cols);
+                    computeResizeAreaFastTabs(dymap_tab, symap_tab, iscale_y, dst.rows, src.rows);
+
+                    tabofsOcl = oclMat(1, dst.cols + dst.rows, CV_32SC1, (void *)dmap_tab);
+                    mapOcl = oclMat(1, smap_tab_size, CV_32SC1, (void *)smap_tab);
+                }
+                else
+                {
+                    Size ssize = src.size(), dsize = dst.size();
+                    int xytab_size = (ssize.width + ssize.height) << 1;
+                    int tabofs_size = dsize.height + dsize.width + 2;
+
+                    AutoBuffer<int> _xymap_tab(xytab_size), _xyofs_tab(tabofs_size);
+                    AutoBuffer<float> _xyalpha_tab(xytab_size);
+                    int * xmap_tab = _xymap_tab, * ymap_tab = _xymap_tab + (ssize.width << 1);
+                    float * xalpha_tab = _xyalpha_tab, * yalpha_tab = _xyalpha_tab + (ssize.width << 1);
+                    int * xofs_tab = _xyofs_tab, * yofs_tab = _xyofs_tab + dsize.width + 1;
+
+                    computeResizeAreaTabs(ssize.width, dsize.width, ifx, xmap_tab, xalpha_tab, xofs_tab);
+                    computeResizeAreaTabs(ssize.height, dsize.height, ify, ymap_tab, yalpha_tab, yofs_tab);
+
+                    // loading precomputed arrays to GPU
+                    alphaOcl = oclMat(1, xytab_size, CV_32FC1, (void *)_xyalpha_tab);
+                    mapOcl = oclMat(1, xytab_size, CV_32SC1, (void *)_xymap_tab);
+                    tabofsOcl = oclMat(1, tabofs_size, CV_32SC1, (void *)_xyofs_tab);
+                }
+            }
+
            size_t globalThreads[3] = { glbSizeX, dst.rows, 1 };
            size_t localThreads[3] = { blkSizeX, blkSizeY, 1 };

@ -322,8 +438,30 @@ namespace cv
            args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.rows));
            args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.cols));
            args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.rows));
-            args.push_back( std::make_pair(sizeof(cl_float), (void *)&ifx));
-            args.push_back( std::make_pair(sizeof(cl_float), (void *)&ify));
+
+            if (wdepth == CV_64F)
+            {
+                args.push_back( std::make_pair(sizeof(cl_double), (void *)&ifx));
+                args.push_back( std::make_pair(sizeof(cl_double), (void *)&ify));
+            }
+            else
+            {
+                args.push_back( std::make_pair(sizeof(cl_float), (void *)&ifxf));
+                args.push_back( std::make_pair(sizeof(cl_float), (void *)&ifyf));
+            }
+
+            // precomputed tabs
+            if (!tabofsOcl.empty())
+                args.push_back( std::make_pair(sizeof(cl_mem), (void *)&tabofsOcl.data));
+
+            if (!mapOcl.empty())
+                args.push_back( std::make_pair(sizeof(cl_mem), (void *)&mapOcl.data));
+
+            if (!alphaOcl.empty())
+                args.push_back( std::make_pair(sizeof(cl_mem), (void *)&alphaOcl.data));
+
+            ocn = interpolation == INTER_LINEAR ? ocn : -1;
+            depth = interpolation == INTER_LINEAR ? depth : -1;

            openCLExecuteKernel(src.clCxt, &imgproc_resize, kernelName, globalThreads, localThreads, args,
                                ocn, depth, buildOption.c_str());
@ -331,9 +469,14 @@ namespace cv

        void resize(const oclMat &src, oclMat &dst, Size dsize, double fx, double fy, int interpolation)
        {
+            if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
+            {
+                CV_Error(CV_OpenCLDoubleNotSupported, "Selected device does not support double");
+                return;
+            }
+
            CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3 || src.type() == CV_8UC4
                      || src.type() == CV_32FC1 || src.type() == CV_32FC3 || src.type() == CV_32FC4);
-            CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST);
            CV_Assert(dsize.area() > 0 || (fx > 0 && fy > 0));

            if (dsize.area() == 0)
@ -347,9 +490,13 @@ namespace cv
                fy = (double)dsize.height / src.rows;
            }

+            double inv_fy = 1 / fy, inv_fx = 1 / fx;
+            CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST ||
+                      (interpolation == INTER_AREA && inv_fx >= 1 && inv_fy >= 1));
+
            dst.create(dsize, src.type());

-            resize_gpu( src, dst, fx, fy, interpolation);
+            resize_gpu( src, dst, inv_fx, inv_fy, interpolation);
        }

        ////////////////////////////////////////////////////////////////////////
@ -575,8 +722,13 @@ namespace cv
                                                        1, 0, sizeof(float) * 2 * 3, float_coeffs, 0, 0, 0));

                }
+
                //TODO: improve this kernel
+#ifdef ANDROID
+                size_t blkSizeX = 16, blkSizeY = 4;
+#else
                size_t blkSizeX = 16, blkSizeY = 16;
+#endif
                size_t glbSizeX;
                size_t cols;

@ -648,7 +800,11 @@ namespace cv
                }

                //TODO: improve this kernel
+#ifdef ANDROID
+                size_t blkSizeX = 16, blkSizeY = 8;
+#else
                size_t blkSizeX = 16, blkSizeY = 16;
+#endif
                size_t glbSizeX;
                size_t cols;
                if (src.type() == CV_8UC1 && interpolation == 0)
@ -1564,7 +1720,11 @@ namespace cv
            oclMat oclspace_ofs(1, d * d, CV_32SC1, space_ofs);

            String kernelName = "bilateral";
+#ifdef ANDROID
+            size_t localThreads[3]  = { 16, 8, 1 };
+#else
            size_t localThreads[3]  = { 16, 16, 1 };
+#endif
            size_t globalThreads[3] = { dst.cols, dst.rows, 1 };

            if ((dst.type() == CV_8UC1) && ((dst.offset & 3) == 0) && ((dst.cols & 3) == 0))
--- a/modules/ocl/src/matrix_operations.cpp
+++ b/modules/ocl/src/matrix_operations.cpp
@ -85,10 +85,15 @@ static void convert_C3C4(const cl_mem &src, oclMat &dst)
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&pixel_end));

    size_t globalThreads[3] = { divUp(dst.wholecols * dst.wholerows, 4), 1, 1 };
-    size_t localThreads[3] = { 256, 1, 1 };

+#ifdef ANDROID
+    openCLExecuteKernel(clCxt, &convertC3C4, "convertC3C4", globalThreads, NULL,
+                        args, -1, -1, buildOptions.c_str());
+#else
+    size_t localThreads[3] = { 256, 1, 1 };
    openCLExecuteKernel(clCxt, &convertC3C4, "convertC3C4", globalThreads, localThreads,
                        args, -1, -1, buildOptions.c_str());
+#endif
 }

 ////////////////////////////////////////////////////////////////////////
@ -112,9 +117,13 @@ static void convert_C4C3(const oclMat &src, cl_mem &dst)
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&pixel_end));

    size_t globalThreads[3] = { divUp(src.wholecols * src.wholerows, 4), 1, 1};
-    size_t localThreads[3] = { 256, 1, 1 };

+#ifdef ANDROID
+    openCLExecuteKernel(clCxt, &convertC3C4, "convertC4C3", globalThreads, NULL, args, -1, -1, buildOptions.c_str());
+#else
+    size_t localThreads[3] = { 256, 1, 1};
    openCLExecuteKernel(clCxt, &convertC3C4, "convertC4C3", globalThreads, localThreads, args, -1, -1, buildOptions.c_str());
+#endif
 }

 void cv::ocl::oclMat::upload(const Mat &m)
--- a/modules/ocl/src/mssegmentation.cpp
+++ b/modules/ocl/src/mssegmentation.cpp
@ -348,7 +348,7 @@ namespace cv
            }

            // Sort all graph's edges connecting differnet components (in asceding order)
-            sort(edges.begin(), edges.end());
+            std::sort(edges.begin(), edges.end());

            // Exclude small components (starting from the nearest couple)
            for (size_t i = 0; i < edges.size(); ++i)
--- a/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
+++ b/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
@ -52,6 +52,8 @@
 #endif
 #endif

+#ifdef BINARY
+
 __kernel void arithm_absdiff_nonsaturate_binary(__global srcT *src1, int src1_step, int src1_offset,
                         __global srcT *src2, int src2_step, int src2_offset,
                         __global dstT *dst, int dst_step, int dst_offset,
@ -78,6 +80,8 @@ __kernel void arithm_absdiff_nonsaturate_binary(__global srcT *src1, int src1_st
    }
 }

+#else
+
 __kernel void arithm_absdiff_nonsaturate(__global srcT *src1, int src1_step, int src1_offset,
                         __global dstT *dst, int dst_step, int dst_offset,
                         int cols, int rows)
@ -99,3 +103,5 @@ __kernel void arithm_absdiff_nonsaturate(__global srcT *src1, int src1_step, int
        }
    }
 }
+
+#endif
--- a/modules/ocl/src/opencl/brute_force_match.cl
+++ b/modules/ocl/src/opencl/brute_force_match.cl
@ -82,7 +82,7 @@ typedef float result_type;
 #define DIST_RES(x) sqrt(x)
 #elif (DIST_TYPE == 2) // Hamming
 //http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
-static int bit1Count(int v)
+inline int bit1Count(int v)
 {
    v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary
    v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp
@ -94,7 +94,7 @@ typedef int result_type;
 #define DIST_RES(x) (x)
 #endif

-static result_type reduce_block(
+inline result_type reduce_block(
    __local value_type *s_query,
    __local value_type *s_train,
    int lidx,
@ -112,7 +112,7 @@ static result_type reduce_block(
    return DIST_RES(result);
 }

-static result_type reduce_block_match(
+inline result_type reduce_block_match(
    __local value_type *s_query,
    __local value_type *s_train,
    int lidx,
@ -130,7 +130,7 @@ static result_type reduce_block_match(
    return (result);
 }

-static result_type reduce_multi_block(
+inline result_type reduce_multi_block(
    __local value_type *s_query,
    __local value_type *s_train,
    int block_index,
--- a/modules/ocl/src/opencl/imgproc_clahe.cl
+++ b/modules/ocl/src/opencl/imgproc_clahe.cl
@ -47,7 +47,7 @@
 #define WAVE_SIZE 1
 #endif

-static int calc_lut(__local int* smem, int val, int tid)
+inline int calc_lut(__local int* smem, int val, int tid)
 {
    smem[tid] = val;
    barrier(CLK_LOCAL_MEM_FENCE);
@ -61,7 +61,7 @@ static int calc_lut(__local int* smem, int val, int tid)
 }

 #ifdef CPU
-static void reduce(volatile __local int* smem, int val, int tid)
+inline void reduce(volatile __local int* smem, int val, int tid)
 {
    smem[tid] = val;
    barrier(CLK_LOCAL_MEM_FENCE);
@ -101,7 +101,7 @@ static void reduce(volatile __local int* smem, int val, int tid)

 #else

-static void reduce(__local volatile int* smem, int val, int tid)
+inline void reduce(__local volatile int* smem, int val, int tid)
 {
    smem[tid] = val;
    barrier(CLK_LOCAL_MEM_FENCE);
--- a/modules/ocl/src/opencl/imgproc_resize.cl
+++ b/modules/ocl/src/opencl/imgproc_resize.cl
@ -315,4 +315,91 @@ __kernel void resizeNN(__global T * dst, __global T * src,
    }
 }

+#elif defined AREA
+
+#ifdef AREA_FAST
+
+__kernel void resizeAREA_FAST(__global T * dst, __global T * src,
+                         int dst_offset, int src_offset, int dst_step, int src_step,
+                         int src_cols, int src_rows, int dst_cols, int dst_rows, WT ifx, WT ify,
+                         __global const int * dmap_tab, __global const int * smap_tab)
+{
+    int dx = get_global_id(0);
+    int dy = get_global_id(1);
+
+    if (dx < dst_cols && dy < dst_rows)
+    {
+        int dst_index = mad24(dy, dst_step, dst_offset + dx);
+
+        __global const int * xmap_tab = dmap_tab;
+        __global const int * ymap_tab = dmap_tab + dst_cols;
+        __global const int * sxmap_tab = smap_tab;
+        __global const int * symap_tab = smap_tab + XSCALE * dst_cols;
+
+        int sx = xmap_tab[dx], sy = ymap_tab[dy];
+        WTV sum = (WTV)(0);
+
+        #pragma unroll
+        for (int y = 0; y < YSCALE; ++y)
+        {
+            int src_index = mad24(symap_tab[y + sy], src_step, src_offset);
+            #pragma unroll
+            for (int x = 0; x < XSCALE; ++x)
+                sum += convertToWTV(src[src_index + sxmap_tab[sx + x]]);
+        }
+
+        dst[dst_index] = convertToT(convertToWT2V(sum) * (WT2V)(SCALE));
+    }
+}
+
+#else
+
+__kernel void resizeAREA(__global T * dst, __global T * src,
+                         int dst_offset, int src_offset, int dst_step, int src_step,
+                         int src_cols, int src_rows, int dst_cols, int dst_rows, WT ifx, WT ify,
+                         __global const int * ofs_tab, __global const int * map_tab,
+                         __global const float * alpha_tab)
+{
+    int dx = get_global_id(0);
+    int dy = get_global_id(1);
+
+    if (dx < dst_cols && dy < dst_rows)
+    {
+        int dst_index = mad24(dy, dst_step, dst_offset + dx);
+
+        __global const int * xmap_tab = map_tab;
+        __global const int * ymap_tab = (__global const int *)(map_tab + (src_cols << 1));
+        __global const float * xalpha_tab = alpha_tab;
+        __global const float * yalpha_tab = (__global const float *)(alpha_tab + (src_cols << 1));
+        __global const int * xofs_tab = ofs_tab;
+        __global const int * yofs_tab = (__global const int *)(ofs_tab + dst_cols + 1);
+
+        int xk0 = xofs_tab[dx], xk1 = xofs_tab[dx + 1];
+        int yk0 = yofs_tab[dy], yk1 = yofs_tab[dy + 1];
+
+        int sy0 = ymap_tab[yk0], sy1 = ymap_tab[yk1 - 1];
+        int sx0 = xmap_tab[xk0], sx1 = xmap_tab[xk1 - 1];
+
+        WTV sum = (WTV)(0), buf;
+        int src_index = mad24(sy0, src_step, src_offset);
+
+        for (int sy = sy0, yk = yk0; sy <= sy1; ++sy, src_index += src_step, ++yk)
+        {
+            WTV beta = (WTV)(yalpha_tab[yk]);
+            buf = (WTV)(0);
+
+            for (int sx = sx0, xk = xk0; sx <= sx1; ++sx, ++xk)
+            {
+                WTV alpha = (WTV)(xalpha_tab[xk]);
+                buf += convertToWTV(src[src_index + sx]) * alpha;
+            }
+            sum += buf * beta;
+        }
+
+        dst[dst_index] = convertToT(sum);
+    }
+}
+
+#endif
+
 #endif
--- a/modules/ocl/src/opencl/kernel_stablesort_by_key.cl
+++ b/modules/ocl/src/opencl/kernel_stablesort_by_key.cl
@ -65,7 +65,7 @@
 //  by a base pointer and left and right index for a particular candidate value.  The comparison operator is
 //  passed as a functor parameter my_comp
 //  This function returns an index that is the first index whos value would be equal to the searched value
-static uint lowerBoundBinary( global K_T* data, uint left, uint right, K_T searchVal)
+inline uint lowerBoundBinary( global K_T* data, uint left, uint right, K_T searchVal)
 {
    //  The values firstIndex and lastIndex get modified within the loop, narrowing down the potential sequence
    uint firstIndex = left;
@ -101,7 +101,7 @@ static uint lowerBoundBinary( global K_T* data, uint left, uint right, K_T searc
 //  passed as a functor parameter my_comp
 //  This function returns an index that is the first index whos value would be greater than the searched value
 //  If the search value is not found in the sequence, upperbound returns the same result as lowerbound
-static uint upperBoundBinary( global K_T* data, uint left, uint right, K_T searchVal)
+inline uint upperBoundBinary( global K_T* data, uint left, uint right, K_T searchVal)
 {
    uint upperBound = lowerBoundBinary( data, left, right, searchVal );

--- a/modules/ocl/src/opencl/stereobm.cl
+++ b/modules/ocl/src/opencl/stereobm.cl
@ -56,7 +56,7 @@
 #define radius 64
 #endif

-static unsigned int CalcSSD(__local unsigned int *col_ssd)
+inline unsigned int CalcSSD(__local unsigned int *col_ssd)
 {
    unsigned int cache = col_ssd[0];

@ -67,7 +67,7 @@ static unsigned int CalcSSD(__local unsigned int *col_ssd)
    return cache;
 }

-static uint2 MinSSD(__local unsigned int *col_ssd)
+inline uint2 MinSSD(__local unsigned int *col_ssd)
 {
    unsigned int ssd[N_DISPARITIES];
    const int win_size = (radius << 1);
@ -95,7 +95,7 @@ static uint2 MinSSD(__local unsigned int *col_ssd)
    return (uint2)(mssd, bestIdx);
 }

-static void StepDown(int idx1, int idx2, __global unsigned char* imageL,
+inline void StepDown(int idx1, int idx2, __global unsigned char* imageL,
              __global unsigned char* imageR, int d,   __local unsigned int *col_ssd)
 {
    uint8 imgR1 = convert_uint8(vload8(0, imageR + (idx1 - d - 7)));
@ -114,7 +114,7 @@ static void StepDown(int idx1, int idx2, __global unsigned char* imageL,
    col_ssd[7 * (BLOCK_W + win_size)] += res.s0;
 }

-static void InitColSSD(int x_tex, int y_tex, int im_pitch, __global unsigned char* imageL,
+inline void InitColSSD(int x_tex, int y_tex, int im_pitch, __global unsigned char* imageL,
                __global unsigned char* imageR, int d,
                 __local unsigned int *col_ssd)
 {
@ -241,7 +241,7 @@ __kernel void prefilter_xsobel(__global unsigned char *input, __global unsigned
 /////////////////////////////////// Textureness filtering ////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////////////////////////////

-static float sobel(__global unsigned char *input, int x, int y, int rows, int cols)
+inline float sobel(__global unsigned char *input, int x, int y, int rows, int cols)
 {
    float conv = 0;
    int y1 = y==0? 0 : y-1;
@ -256,7 +256,7 @@ static float sobel(__global unsigned char *input, int x, int y, int rows, int co
    return fabs(conv);
 }

-static float CalcSums(__local float *cols, __local float *cols_cache, int winsz)
+inline float CalcSums(__local float *cols, __local float *cols_cache, int winsz)
 {
    unsigned int cache = cols[0];

--- a/modules/ocl/src/opencl/stereocsbp.cl
+++ b/modules/ocl/src/opencl/stereocsbp.cl
@ -1000,7 +1000,7 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
 //////////////////////// init message /////////////////////////
 ///////////////////////////////////////////////////////////////

-static void get_first_k_element_increase_0(__global short* u_new, __global short *d_new, __global short *l_new,
+inline void get_first_k_element_increase_0(__global short* u_new, __global short *d_new, __global short *l_new,
    __global short *r_new, __global const short *u_cur, __global const short *d_cur,
    __global const short *l_cur, __global const short *r_cur,
    __global short *data_cost_selected, __global short *disparity_selected_new,
@ -1165,7 +1165,7 @@ __kernel void init_message_1(__global float *u_new_, __global float *d_new_, __g
 ////////////////////  calc all iterations /////////////////////
 ///////////////////////////////////////////////////////////////

-static void message_per_pixel_0(__global const short *data, __global short *msg_dst, __global const short *msg1,
+inline void message_per_pixel_0(__global const short *data, __global short *msg_dst, __global const short *msg1,
    __global const short *msg2, __global const short *msg3,
    __global const short *dst_disp, __global const short *src_disp,
    int nr_plane, __global short *temp,
@ -1202,7 +1202,7 @@ static void message_per_pixel_0(__global const short *data, __global short *msg_
        msg_dst[d * cdisp_step1] = convert_short_sat_rte(temp[d * cdisp_step1] - sum);
 }

-static void message_per_pixel_1(__global const float *data, __global float *msg_dst, __global const float *msg1,
+inline void message_per_pixel_1(__global const float *data, __global float *msg_dst, __global const float *msg1,
    __global const float *msg2, __global const float *msg3,
    __global const float *dst_disp, __global const float *src_disp,
    int nr_plane, __global float *temp,
--- a/modules/ocl/src/opencl/svm.cl
+++ b/modules/ocl/src/opencl/svm.cl
@ -56,6 +56,8 @@
 #endif
 #define MAX_VAL   (FLT_MAX*1e-3)

+#define BLOCK_SIZE 16
+
 __kernel void svm_linear(__global float* src, int src_step, __global float* src2, int src2_step, __global TYPE* dst, int dst_step, int src_rows, int src2_cols,
                         int width, TYPE alpha, TYPE beta)
 {
@ -66,7 +68,7 @@ __kernel void svm_linear(__global float* src, int src_step, __global float* src2
    {
        int t = 0;
        TYPE temp = 0.0;
-        for(t = 0; t < width - 16; t += 16)
+        for(t = 0; t < width - BLOCK_SIZE; t += BLOCK_SIZE)
        {
            float16 t0 = vload16(0, src + row * src_step + t);
            float16 t1 = vload16(0, src2 + col * src2_step + t);
@ -103,7 +105,7 @@ __kernel void svm_sigmod(__global float* src, int src_step, __global float* src2
    {
        int t = 0;
        TYPE temp = 0.0;
-        for(t = 0; t < width - 16; t += 16)
+        for(t = 0; t < width - BLOCK_SIZE; t += BLOCK_SIZE)
        {
            float16 t0 = vload16(0, src + row * src_step + t);
            float16 t1 = vload16(0, src2 + col * src2_step + t);
@ -148,7 +150,7 @@ __kernel void svm_poly(__global float* src, int src_step, __global float* src2,
    {
        int t = 0;
        TYPE temp = 0.0;
-        for(t = 0; t < width - 16; t += 16)
+        for(t = 0; t < width - BLOCK_SIZE; t += BLOCK_SIZE)
        {
            float16 t0 = vload16(0, src + row * src_step + t);
            float16 t1 = vload16(0, src2 + col * src2_step + t);
@ -183,7 +185,7 @@ __kernel void svm_rbf(__global float* src, int src_step, __global float* src2, i
    {
        int t = 0;
        TYPE temp = 0.0;
-        for(t = 0; t < width - 16; t += 16)
+        for(t = 0; t < width - BLOCK_SIZE; t += BLOCK_SIZE)
        {
            float16 t0 = vload16(0, src + row * src_step + t);
            float16 t1 = vload16(0, src2 + col * src2_step + t);
--- a/modules/ocl/src/optical_flow_farneback.cpp
+++ b/modules/ocl/src/optical_flow_farneback.cpp
@ -73,7 +73,11 @@ inline void setGaussianBlurKernel(const float *c_gKer, int ksizeHalf)
 static void gaussianBlurOcl(const oclMat &src, int ksizeHalf, oclMat &dst)
 {
    String kernelName("gaussianBlur");
+#ifdef ANDROID
+    size_t localThreads[3] = { 128, 1, 1 };
+#else
    size_t localThreads[3] = { 256, 1, 1 };
+#endif
    size_t globalThreads[3] = { src.cols, src.rows, 1 };
    int smem_size = (localThreads[0] + 2*ksizeHalf) * sizeof(float);

@ -96,7 +100,12 @@ static void gaussianBlurOcl(const oclMat &src, int ksizeHalf, oclMat &dst)
 static void polynomialExpansionOcl(const oclMat &src, int polyN, oclMat &dst)
 {
    String kernelName("polynomialExpansion");
+
+#ifdef ANDROID
+    size_t localThreads[3] = { 128, 1, 1 };
+#else
    size_t localThreads[3] = { 256, 1, 1 };
+#endif
    size_t globalThreads[3] = { divUp(src.cols, localThreads[0] - 2*polyN) * localThreads[0], src.rows, 1 };
    int smem_size = 3 * localThreads[0] * sizeof(float);

@ -123,7 +132,11 @@ static void polynomialExpansionOcl(const oclMat &src, int polyN, oclMat &dst)
 static void updateMatricesOcl(const oclMat &flowx, const oclMat &flowy, const oclMat &R0, const oclMat &R1, oclMat &M)
 {
    String kernelName("updateMatrices");
+#ifdef ANDROID
+    size_t localThreads[3] = { 32, 4, 1 };
+#else
    size_t localThreads[3] = { 32, 8, 1 };
+#endif
    size_t globalThreads[3] = { flowx.cols, flowx.rows, 1 };

    std::vector< std::pair<size_t, const void *> > args;
@ -148,7 +161,11 @@ static void boxFilter5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
 {
    String kernelName("boxFilter5");
    int height = src.rows / 5;
+#ifdef ANDROID
+    size_t localThreads[3] = { 128, 1, 1 };
+#else
    size_t localThreads[3] = { 256, 1, 1 };
+#endif
    size_t globalThreads[3] = { src.cols, height, 1 };
    int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float);

@ -170,7 +187,11 @@ static void updateFlowOcl(const oclMat &M, oclMat &flowx, oclMat &flowy)
 {
    String kernelName("updateFlow");
    int cols = divUp(flowx.cols, 4);
+#ifdef ANDROID
+    size_t localThreads[3] = { 32, 4, 1 };
+#else
    size_t localThreads[3] = { 32, 8, 1 };
+#endif
    size_t globalThreads[3] = { cols, flowx.rows, 1 };

    std::vector< std::pair<size_t, const void *> > args;
@ -191,7 +212,11 @@ static void gaussianBlur5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
 {
    String kernelName("gaussianBlur5");
    int height = src.rows / 5;
+#ifdef ANDROID
+    size_t localThreads[3] = { 128, 1, 1 };
+#else
    size_t localThreads[3] = { 256, 1, 1 };
+#endif
    size_t globalThreads[3] = { src.cols, height, 1 };
    int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float);

--- a/modules/ocl/src/sort_by_key.cpp
+++ b/modules/ocl/src/sort_by_key.cpp
@ -55,8 +55,10 @@ namespace ocl
 {
 void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, int method, bool isGreaterThan);

+#ifndef ANDROID
 //TODO(pengx17): change this value depending on device other than a constant
 const static unsigned int GROUP_SIZE = 256;
+#endif

 const char * depth_strings[] =
 {
@ -91,7 +93,6 @@ static void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, bool isGreater

    Context * cxt = Context::getContext();
    size_t globalThreads[3] = {vecSize / 2, 1, 1};
-    size_t localThreads[3]  = {GROUP_SIZE, 1, 1};

    // 2^numStages should be equal to vecSize or the output is invalid
    int numStages = 0;
@ -115,7 +116,12 @@ static void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, bool isGreater
        for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
        {
            args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
+#ifdef ANDROID
+            openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, NULL, args, -1, -1, build_opt_buf);
+#else
+            size_t localThreads[3]  = {GROUP_SIZE, 1, 1};
            openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, localThreads, args, -1, -1, build_opt_buf);
+#endif
        }
    }
 }
@ -131,7 +137,6 @@ static void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, bool isGreater
    Context * cxt = Context::getContext();

    size_t globalThreads[3] = {vecSize, 1, 1};
-    size_t localThreads[3]  = {GROUP_SIZE, 1, 1};

    std::vector< std::pair<size_t, const void *> > args;
    char build_opt_buf [100];
@ -139,18 +144,31 @@ static void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, bool isGreater

    //local
    String kernelname = "selectionSortLocal";
+#ifdef ANDROID
+    int lds_size = cxt->getDeviceInfo().maxWorkGroupSize * keys.elemSize();
+#else
    int lds_size = GROUP_SIZE * keys.elemSize();
+#endif
    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&keys.data));
    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&vals.data));
    args.push_back(std::make_pair(sizeof(cl_int), (void *)&vecSize));
    args.push_back(std::make_pair(lds_size,       (void*)NULL));

+#ifdef ANDROID
+    openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, NULL, args, -1, -1, build_opt_buf);
+#else
+    size_t localThreads[3] = {GROUP_SIZE, 1, 1};
    openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, localThreads, args, -1, -1, build_opt_buf);
+#endif

    //final
    kernelname = "selectionSortFinal";
    args.pop_back();
+#ifdef ANDROID
+    openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, NULL, args, -1, -1, build_opt_buf);
+#else
    openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, localThreads, args, -1, -1, build_opt_buf);
+#endif
 }

 }  /* selection_sort */
@ -340,6 +358,8 @@ static void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, bool isGreater
 {
    Context * cxt = Context::getContext();

+    const size_t GROUP_SIZE = cxt->getDeviceInfo().maxWorkGroupSize >= 256 ? 256: 128;
+
    size_t globalThreads[3] = {vecSize, 1, 1};
    size_t localThreads[3]  = {GROUP_SIZE, 1, 1};

--- a/modules/ocl/test/test_brute_force_matcher.cpp
+++ b/modules/ocl/test/test_brute_force_matcher.cpp
@ -106,7 +106,11 @@ namespace
        }
    };

+#ifdef ANDROID
+    OCL_TEST_P(BruteForceMatcher, DISABLED_Match_Single)
+#else
    OCL_TEST_P(BruteForceMatcher, Match_Single)
+#endif
    {
        cv::ocl::BruteForceMatcher_OCL_base matcher(distType);

@ -126,7 +130,11 @@ namespace
        ASSERT_EQ(0, badCount);
    }

+#ifdef ANDROID
+    OCL_TEST_P(BruteForceMatcher, DISABLED_KnnMatch_2_Single)
+#else
    OCL_TEST_P(BruteForceMatcher, KnnMatch_2_Single)
+#endif
    {
        const int knn = 2;

@ -158,7 +166,11 @@ namespace
        ASSERT_EQ(0, badCount);
    }

+#ifdef ANDROID
+    OCL_TEST_P(BruteForceMatcher, DISABLED_RadiusMatch_Single)
+#else
    OCL_TEST_P(BruteForceMatcher, RadiusMatch_Single)
+#endif
    {
        float radius = 1.f / countFactor;

--- a/modules/ocl/test/test_filters.cpp
+++ b/modules/ocl/test/test_filters.cpp
@ -132,7 +132,11 @@ PARAM_TEST_CASE(FilterTestBase, MatType,

 typedef FilterTestBase Blur;

+#ifdef ANDROID
+OCL_TEST_P(Blur, DISABLED_Mat)
+#else
 OCL_TEST_P(Blur, Mat)
+#endif
 {
    Size kernelSize(ksize, ksize);

@ -272,7 +276,7 @@ OCL_TEST_P(GaussianBlurTest, Mat)
        GaussianBlur(src_roi, dst_roi, Size(ksize, ksize), sigma1, sigma2, borderType);
        ocl::GaussianBlur(gsrc_roi, gdst_roi, Size(ksize, ksize), sigma1, sigma2, borderType);

-        Near(CV_MAT_DEPTH(type) == CV_8U ? 3 : 1e-6, false);
+        Near(CV_MAT_DEPTH(type) == CV_8U ? 3 : 5e-5, false);
    }
 }

--- a/modules/ocl/test/test_split_merge.cpp
+++ b/modules/ocl/test/test_split_merge.cpp
@ -189,7 +189,13 @@ PARAM_TEST_CASE(SplitTestBase, MatType, int, bool)

 struct Split : SplitTestBase {};

+#ifdef ANDROID
+// NOTE: The test fail on Android is the top of the iceberg only
+// The real fail reason is memory access vialation somewhere else
+OCL_TEST_P(Split, DISABLED_Accuracy)
+#else
 OCL_TEST_P(Split, Accuracy)
+#endif
 {
    for(int j = 0; j < LOOP_TIMES; j++)
    {
--- a/modules/ocl/test/test_warp.cpp
+++ b/modules/ocl/test/test_warp.cpp
@ -398,10 +398,7 @@ PARAM_TEST_CASE(Resize, MatType, double, double, Interpolation, bool)
        dstRoiSize.height = cvRound(srcRoiSize.height * fy);

        if (dstRoiSize.area() == 0)
-        {
-            random_roi();
-            return;
-        }
+            return random_roi();

        Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
        randomSubMat(src, src_roi, srcRoiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
@ -480,11 +477,18 @@ INSTANTIATE_TEST_CASE_P(ImgprocWarp, Remap_INTER_NEAREST, Combine(
                                   (Border)BORDER_REFLECT_101),
                            Bool()));

-INSTANTIATE_TEST_CASE_P(ImgprocWarp, Resize, Combine(
-                            Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
-                            Values(0.5, 1.5, 2.0),
-                            Values(0.5, 1.5, 2.0),
+INSTANTIATE_TEST_CASE_P(ImgprocWarpResize, Resize, Combine(
+                            Values((MatType)CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
+                            Values(0.7, 0.4, 2.0),
+                            Values(0.3, 0.6, 2.0),
                            Values((Interpolation)INTER_NEAREST, (Interpolation)INTER_LINEAR),
                            Bool()));

+INSTANTIATE_TEST_CASE_P(ImgprocWarpResizeArea, Resize, Combine(
+                            Values((MatType)CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
+                            Values(0.7, 0.4, 0.5),
+                            Values(0.3, 0.6, 0.5),
+                            Values((Interpolation)INTER_AREA),
+                            Bool()));
+
 #endif // HAVE_OPENCL
--- a/modules/ocl/test/utility.cpp
+++ b/modules/ocl/test/utility.cpp
@ -230,7 +230,7 @@ double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& o
    return final_test_result;
 }

-void showDiff(const Mat& gold, const Mat& actual, double eps, bool alwaysShow)
+void showDiff(const Mat& src, const Mat& gold, const Mat& actual, double eps, bool alwaysShow)
 {
    Mat diff, diff_thresh;
    absdiff(gold, actual, diff);
@ -239,10 +239,18 @@ void showDiff(const Mat& gold, const Mat& actual, double eps, bool alwaysShow)

    if (alwaysShow || cv::countNonZero(diff_thresh.reshape(1)) > 0)
    {
+#if 0
+        std::cout << "Src: " << std::endl << src << std::endl;
+        std::cout << "Reference: " << std::endl << gold << std::endl;
+        std::cout << "OpenCL: " << std::endl << actual << std::endl;
+#endif
+
+        namedWindow("src", WINDOW_NORMAL);
        namedWindow("gold", WINDOW_NORMAL);
        namedWindow("actual", WINDOW_NORMAL);
        namedWindow("diff", WINDOW_NORMAL);

+        imshow("src", src);
        imshow("gold", gold);
        imshow("actual", actual);
        imshow("diff", diff);
--- a/modules/ocl/test/utility.hpp
+++ b/modules/ocl/test/utility.hpp
@ -54,7 +54,7 @@ extern int LOOP_TIMES;

 namespace cvtest {

-void showDiff(const Mat& gold, const Mat& actual, double eps, bool alwaysShow = false);
+void showDiff(const Mat& src, const Mat& gold, const Mat& actual, double eps, bool alwaysShow = false);

 cv::ocl::oclMat createMat_ocl(cv::RNG& rng, Size size, int type, bool useRoi);
 cv::ocl::oclMat loadMat_ocl(cv::RNG& rng, const Mat& m, bool useRoi);
@ -264,7 +264,7 @@ CV_ENUM(NormCode, NORM_INF, NORM_L1, NORM_L2, NORM_TYPE_MASK, NORM_RELATIVE, NOR
 CV_ENUM(ReduceOp, REDUCE_SUM, REDUCE_AVG, REDUCE_MAX, REDUCE_MIN)
 CV_ENUM(MorphOp, MORPH_OPEN, MORPH_CLOSE, MORPH_GRADIENT, MORPH_TOPHAT, MORPH_BLACKHAT)
 CV_ENUM(ThreshOp, THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV)
-CV_ENUM(Interpolation, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC)
+CV_ENUM(Interpolation, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA)
 CV_ENUM(Border, BORDER_REFLECT101, BORDER_REPLICATE, BORDER_CONSTANT, BORDER_REFLECT, BORDER_WRAP)
 CV_ENUM(TemplateMethod, TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED)

--- a/modules/ts/misc/run.py
+++ b/modules/ts/misc/run.py
@ -562,7 +562,10 @@ class TestSuite(object):
            else:
                hw = ""
            tstamp = timestamp.strftime("%Y%m%d-%H%M%S")
-            return "%s_%s_%s_%s%s%s.xml" % (app, self.targetos, self.targetarch, hw, rev, tstamp)
+            lname = "%s_%s_%s_%s%s%s.xml" % (app, self.targetos, self.targetarch, hw, rev, tstamp)
+            lname = str.replace(lname, '(', '_')
+            lname = str.replace(lname, ')', '_')
+            return lname

    def getTest(self, name):
        # full path
--- a/modules/ts/misc/summary.py
+++ b/modules/ts/misc/summary.py
@ -39,6 +39,7 @@ if __name__ == "__main__":
    parser.add_option("", "--no-relatives", action="store_false", dest="calc_relatives", default=True, help="do not output relative values")
    parser.add_option("", "--with-cycles-reduction", action="store_true", dest="calc_cr", default=False, help="output cycle reduction percentages")
    parser.add_option("", "--with-score", action="store_true", dest="calc_score", default=False, help="output automatic classification of speedups")
+    parser.add_option("", "--progress", action="store_true", dest="progress_mode", default=False, help="enable progress mode")
    parser.add_option("", "--show-all", action="store_true", dest="showall", default=False, help="also include empty and \"notrun\" lines")
    parser.add_option("", "--match", dest="match", default=None)
    parser.add_option("", "--match-replace", dest="match_replace", default="")
@ -108,11 +109,9 @@ if __name__ == "__main__":

    # build table
    getter = metrix_table[options.metric][1]
-    getter_score = metrix_table["score"][1]
-    if options.calc_relatives:
-        getter_p = metrix_table[options.metric + "%"][1]
-    if options.calc_cr:
-        getter_cr = metrix_table[options.metric + "$"][1]
+    getter_score = metrix_table["score"][1] if options.calc_score else None
+    getter_p = metrix_table[options.metric + "%"][1] if options.calc_relatives else None
+    getter_cr = metrix_table[options.metric + "$"][1] if options.calc_cr else None
    tbl = table(metrix_table[options.metric][0])

    # header
@ -125,17 +124,20 @@ if __name__ == "__main__":
    if options.calc_cr:
        i = 1
        for set in metric_sets:
-            tbl.newColumn(str(i) + "$", getSetName(set, i, options.columns) + "\nvs\n" + getSetName(test_sets[0], 0, options.columns) + "\n(cycles reduction)", align = "center", cssclass = "col_cr")
+            reference = getSetName(test_sets[0], 0, options.columns) if not options.progress_mode else 'previous'
+            tbl.newColumn(str(i) + "$", getSetName(set, i, options.columns) + "\nvs\n" + reference + "\n(cycles reduction)", align = "center", cssclass = "col_cr")
            i += 1
    if options.calc_relatives:
        i = 1
        for set in metric_sets:
-            tbl.newColumn(str(i) + "%", getSetName(set, i, options.columns) + "\nvs\n" + getSetName(test_sets[0], 0, options.columns) + "\n(x-factor)", align = "center", cssclass = "col_rel")
+            reference = getSetName(test_sets[0], 0, options.columns) if not options.progress_mode else 'previous'
+            tbl.newColumn(str(i) + "%", getSetName(set, i, options.columns) + "\nvs\n" + reference + "\n(x-factor)", align = "center", cssclass = "col_rel")
            i += 1
    if options.calc_score:
        i = 1
        for set in metric_sets:
-            tbl.newColumn(str(i) + "S", getSetName(set, i, options.columns) + "\nvs\n" + getSetName(test_sets[0], 0, options.columns) + "\n(score)", align = "center", cssclass = "col_name")
+            reference = getSetName(test_sets[0], 0, options.columns) if not options.progress_mode else 'previous'
+            tbl.newColumn(str(i) + "S", getSetName(set, i, options.columns) + "\nvs\n" + reference + "\n(score)", align = "center", cssclass = "col_name")
            i += 1

    # rows
@ -181,18 +183,16 @@ if __name__ == "__main__":
                        tbl.newCell(str(i) + "S", "-", color = "red")
                else:
                    val = getter(case, cases[0], options.units)
-                    if options.calc_relatives and i > 0 and val:
-                        valp = getter_p(case, cases[0], options.units)
-                    else:
-                        valp = None
-                    if options.calc_cr and i > 0 and val:
-                        valcr = getter_cr(case, cases[0], options.units)
-                    else:
-                        valcr = None
-                    if options.calc_score and i > 0 and val:
-                        val_score = getter_score(case, cases[0], options.units)
-                    else:
-                        val_score = None
+                    def getter_fn(fn):
+                        if fn and i > 0 and val:
+                            for j in reversed(range(i)) if options.progress_mode else [0]:
+                                r = cases[j]
+                                if r is not None and r.get("status") == 'run':
+                                    return fn(case, r, options.units)
+                        return None
+                    valp = getter_fn(getter_p) if options.calc_relatives or options.progress_mode else None
+                    valcr = getter_fn(getter_cr) if options.calc_cr else None
+                    val_score = getter_fn(getter_score) if options.calc_score else None
                    if not valp or i == 0:
                        color = None
                    elif valp > 1.05:
--- a/platforms/android/android.toolchain.cmake
+++ b/platforms/android/android.toolchain.cmake
@ -318,7 +318,7 @@ set( CMAKE_SYSTEM_VERSION 1 )
 # rpath makes low sence for Android
 set( CMAKE_SKIP_RPATH TRUE CACHE BOOL "If set, runtime paths are not added when using shared libraries." )

-set( ANDROID_SUPPORTED_NDK_VERSIONS ${ANDROID_EXTRA_NDK_VERSIONS} -r9 -r8e -r8d -r8c -r8b -r8 -r7c -r7b -r7 -r6b -r6 -r5c -r5b -r5 "" )
+set( ANDROID_SUPPORTED_NDK_VERSIONS ${ANDROID_EXTRA_NDK_VERSIONS} -r9b -r9 -r8e -r8d -r8c -r8b -r8 -r7c -r7b -r7 -r6b -r6 -r5c -r5b -r5 "" )
 if(NOT DEFINED ANDROID_NDK_SEARCH_PATHS)
 if( CMAKE_HOST_WIN32 )
  file( TO_CMAKE_PATH "$ENV{PROGRAMFILES}" ANDROID_NDK_SEARCH_PATHS )
@ -634,6 +634,8 @@ endif()

 macro( __GLOB_NDK_TOOLCHAINS __availableToolchainsVar __availableToolchainsLst __toolchain_subpath )
 foreach( __toolchain ${${__availableToolchainsLst}} )
+  # Skip renderscript folder. It's not C++ toolchain
+  if (NOT ${__toolchain} STREQUAL "renderscript")
   if( "${__toolchain}" MATCHES "-clang3[.][0-9]$" AND NOT EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/${__toolchain}${__toolchain_subpath}" )
     string( REGEX REPLACE "-clang3[.][0-9]$" "-4.6" __gcc_toolchain "${__toolchain}" )
   else()
@ -655,6 +657,7 @@ macro( __GLOB_NDK_TOOLCHAINS __availableToolchainsVar __availableToolchainsLst _
    list( APPEND ${__availableToolchainsVar} "${__toolchain}" )
   endif()
   unset( __gcc_toolchain )
+  endif()
 endforeach()
 endmacro()

--- a/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.cpp
+++ b/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.cpp
@ -91,6 +91,10 @@ int GetCpuID()
        {
            result |= FEATURES_HAS_NEON2;
        }
+        if (features.end() != features.find(CPU_INFO_VFPV4_STR))
+        {
+            result |= FEATURES_HAS_VFPv4;
+        }
        if (features.end() != features.find(CPU_INFO_VFPV3_STR))
        {
            if (features.end () != features.find(CPU_INFO_VFPV3D16_STR))
--- a/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.h
+++ b/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.h
@ -14,8 +14,10 @@

 #define FEATURES_HAS_VFPv3d16 1L
 #define FEATURES_HAS_VFPv3 2L
-#define FEATURES_HAS_NEON 4L
-#define FEATURES_HAS_NEON2 8L
+#define FEATURES_HAS_VFPv4 4L
+#define FEATURES_HAS_NEON 8L
+#define FEATURES_HAS_NEON2 16L
+
 #define FEATURES_HAS_SSE 1L
 #define FEATURES_HAS_SSE2 2L
 #define FEATURES_HAS_SSSE3 4L
@ -27,7 +29,9 @@
 #define PLATFORM_TEGRA   1L
 #define PLATFORM_TEGRA2  2L
 #define PLATFORM_TEGRA3  3L
-#define PLATFORM_TEGRA4 4L
+#define PLATFORM_TEGRA4i 4L
+#define PLATFORM_TEGRA4  5L
+#define PLATFORM_TEGRA5  6L

 int DetectKnownPlatforms();
 int GetProcessorCount();
--- a/platforms/android/service/engine/jni/BinderComponent/ProcReader.h
+++ b/platforms/android/service/engine/jni/BinderComponent/ProcReader.h
@ -7,8 +7,9 @@

 #define CPU_INFO_NEON_STR "neon"
 #define CPU_INFO_NEON2_STR "neon2"
-#define CPU_INFO_VFPV3_STR "vfpv3"
 #define CPU_INFO_VFPV3D16_STR "vfpv3d16"
+#define CPU_INFO_VFPV3_STR "vfpv3"
+#define CPU_INFO_VFPV4_STR "vfpv4"

 #define CPU_INFO_SSE_STR "sse"
 #define CPU_INFO_SSE2_STR "sse2"
--- a/platforms/android/service/engine/jni/NativeService/CommonPackageManager.cpp
+++ b/platforms/android/service/engine/jni/NativeService/CommonPackageManager.cpp
@ -187,17 +187,26 @@ std::vector<std::pair<int, int> > CommonPackageManager::InitArmRating()
    result.push_back(std::pair<int, int>(PLATFORM_UNKNOWN, ARCH_ARMv6 | FEATURES_HAS_VFPv3d16));
    result.push_back(std::pair<int, int>(PLATFORM_UNKNOWN, ARCH_ARMv6 | FEATURES_HAS_VFPv3));
    result.push_back(std::pair<int, int>(PLATFORM_UNKNOWN, ARCH_ARMv6 | FEATURES_HAS_VFPv3 | FEATURES_HAS_VFPv3d16));
+
    result.push_back(std::pair<int, int>(PLATFORM_UNKNOWN, ARCH_ARMv7));
    result.push_back(std::pair<int, int>(PLATFORM_UNKNOWN, ARCH_ARMv7 | FEATURES_HAS_VFPv3d16));
    result.push_back(std::pair<int, int>(PLATFORM_TEGRA2,  ARCH_ARMv7 | FEATURES_HAS_VFPv3d16));
    result.push_back(std::pair<int, int>(PLATFORM_UNKNOWN, ARCH_ARMv7 | FEATURES_HAS_VFPv3));
    result.push_back(std::pair<int, int>(PLATFORM_UNKNOWN, ARCH_ARMv7 | FEATURES_HAS_VFPv3d16 | FEATURES_HAS_VFPv3));
+
    result.push_back(std::pair<int, int>(PLATFORM_UNKNOWN, ARCH_ARMv7 | FEATURES_HAS_NEON));
    result.push_back(std::pair<int, int>(PLATFORM_UNKNOWN, ARCH_ARMv7 | FEATURES_HAS_VFPv3d16 | FEATURES_HAS_NEON));
    result.push_back(std::pair<int, int>(PLATFORM_UNKNOWN, ARCH_ARMv7 | FEATURES_HAS_VFPv3 | FEATURES_HAS_NEON));
    result.push_back(std::pair<int, int>(PLATFORM_UNKNOWN, ARCH_ARMv7 | FEATURES_HAS_VFPv3 | FEATURES_HAS_VFPv3d16 | FEATURES_HAS_NEON));
+
+    result.push_back(std::pair<int, int>(PLATFORM_UNKNOWN, ARCH_ARMv7 | FEATURES_HAS_VFPv4 | FEATURES_HAS_NEON));
+    result.push_back(std::pair<int, int>(PLATFORM_UNKNOWN, ARCH_ARMv7 | FEATURES_HAS_VFPv4 | FEATURES_HAS_VFPv3 | FEATURES_HAS_NEON));
+    result.push_back(std::pair<int, int>(PLATFORM_UNKNOWN, ARCH_ARMv7 | FEATURES_HAS_VFPv4 | FEATURES_HAS_VFPv3 | FEATURES_HAS_VFPv3d16 | FEATURES_HAS_NEON));
+
    result.push_back(std::pair<int, int>(PLATFORM_TEGRA3,  ARCH_ARMv7 | FEATURES_HAS_VFPv3 | FEATURES_HAS_NEON));
-    result.push_back(std::pair<int, int>(PLATFORM_TEGRA4,  ARCH_ARMv7 | FEATURES_HAS_VFPv3 | FEATURES_HAS_NEON));
+    result.push_back(std::pair<int, int>(PLATFORM_TEGRA4i, ARCH_ARMv7 | FEATURES_HAS_VFPv3 | FEATURES_HAS_VFPv4 | FEATURES_HAS_NEON));
+    result.push_back(std::pair<int, int>(PLATFORM_TEGRA4,  ARCH_ARMv7 | FEATURES_HAS_VFPv3 | FEATURES_HAS_VFPv4 | FEATURES_HAS_NEON));
+    result.push_back(std::pair<int, int>(PLATFORM_TEGRA5,  ARCH_ARMv7 | FEATURES_HAS_VFPv3 | FEATURES_HAS_VFPv4 | FEATURES_HAS_NEON));

    return result;
 }
--- a/platforms/android/service/engine/jni/NativeService/PackageInfo.cpp
+++ b/platforms/android/service/engine/jni/NativeService/PackageInfo.cpp
@ -19,6 +19,8 @@ map<int, string> PackageInfo::InitPlatformNameMap()
    result[PLATFORM_TEGRA2] = PLATFORM_TEGRA2_NAME;
    result[PLATFORM_TEGRA3] = PLATFORM_TEGRA3_NAME;
    result[PLATFORM_TEGRA4] = PLATFORM_TEGRA4_NAME;
+    result[PLATFORM_TEGRA4i] = PLATFORM_TEGRA4_NAME;
+    result[PLATFORM_TEGRA5] = PLATFORM_TEGRA5_NAME;

    return result;
 }
--- a/Show More
+++ b/Show More