Merge https://github.com/Itseez/opencv

12 years ago · d36dbe6ad6
parent d194bb6143 2349f7ea89
commit d36dbe6ad6
99 changed files with 2217 additions and 373 deletions
--- a/3rdparty/lib/armeabi-v7a/libnative_camera_r2.2.0.so
+++ b/3rdparty/lib/armeabi-v7a/libnative_camera_r2.2.0.so
--- a/3rdparty/lib/armeabi-v7a/libnative_camera_r2.3.3.so
+++ b/3rdparty/lib/armeabi-v7a/libnative_camera_r2.3.3.so
--- a/3rdparty/lib/armeabi-v7a/libnative_camera_r3.0.1.so
+++ b/3rdparty/lib/armeabi-v7a/libnative_camera_r3.0.1.so
--- a/3rdparty/lib/armeabi-v7a/libnative_camera_r4.0.0.so
+++ b/3rdparty/lib/armeabi-v7a/libnative_camera_r4.0.0.so
--- a/3rdparty/lib/armeabi-v7a/libnative_camera_r4.0.3.so
+++ b/3rdparty/lib/armeabi-v7a/libnative_camera_r4.0.3.so
--- a/3rdparty/lib/armeabi-v7a/libnative_camera_r4.1.1.so
+++ b/3rdparty/lib/armeabi-v7a/libnative_camera_r4.1.1.so
--- a/3rdparty/lib/armeabi-v7a/libnative_camera_r4.2.0.so
+++ b/3rdparty/lib/armeabi-v7a/libnative_camera_r4.2.0.so
--- a/3rdparty/lib/armeabi-v7a/libnative_camera_r4.3.0.so
+++ b/3rdparty/lib/armeabi-v7a/libnative_camera_r4.3.0.so
--- a/3rdparty/lib/armeabi/libnative_camera_r2.2.0.so
+++ b/3rdparty/lib/armeabi/libnative_camera_r2.2.0.so
--- a/3rdparty/lib/armeabi/libnative_camera_r2.3.3.so
+++ b/3rdparty/lib/armeabi/libnative_camera_r2.3.3.so
--- a/3rdparty/lib/armeabi/libnative_camera_r3.0.1.so
+++ b/3rdparty/lib/armeabi/libnative_camera_r3.0.1.so
--- a/3rdparty/lib/armeabi/libnative_camera_r4.0.0.so
+++ b/3rdparty/lib/armeabi/libnative_camera_r4.0.0.so
--- a/3rdparty/lib/armeabi/libnative_camera_r4.0.3.so
+++ b/3rdparty/lib/armeabi/libnative_camera_r4.0.3.so
--- a/3rdparty/lib/armeabi/libnative_camera_r4.1.1.so
+++ b/3rdparty/lib/armeabi/libnative_camera_r4.1.1.so
--- a/3rdparty/lib/armeabi/libnative_camera_r4.2.0.so
+++ b/3rdparty/lib/armeabi/libnative_camera_r4.2.0.so
--- a/3rdparty/lib/armeabi/libnative_camera_r4.3.0.so
+++ b/3rdparty/lib/armeabi/libnative_camera_r4.3.0.so
--- a/3rdparty/lib/mips/libnative_camera_r4.0.3.so
+++ b/3rdparty/lib/mips/libnative_camera_r4.0.3.so
--- a/3rdparty/lib/mips/libnative_camera_r4.1.1.so
+++ b/3rdparty/lib/mips/libnative_camera_r4.1.1.so
--- a/3rdparty/lib/mips/libnative_camera_r4.2.0.so
+++ b/3rdparty/lib/mips/libnative_camera_r4.2.0.so
--- a/3rdparty/lib/mips/libnative_camera_r4.3.0.so
+++ b/3rdparty/lib/mips/libnative_camera_r4.3.0.so
--- a/3rdparty/lib/x86/libnative_camera_r2.3.3.so
+++ b/3rdparty/lib/x86/libnative_camera_r2.3.3.so
--- a/3rdparty/lib/x86/libnative_camera_r3.0.1.so
+++ b/3rdparty/lib/x86/libnative_camera_r3.0.1.so
--- a/3rdparty/lib/x86/libnative_camera_r4.0.3.so
+++ b/3rdparty/lib/x86/libnative_camera_r4.0.3.so
--- a/3rdparty/lib/x86/libnative_camera_r4.1.1.so
+++ b/3rdparty/lib/x86/libnative_camera_r4.1.1.so
--- a/3rdparty/lib/x86/libnative_camera_r4.2.0.so
+++ b/3rdparty/lib/x86/libnative_camera_r4.2.0.so
--- a/3rdparty/lib/x86/libnative_camera_r4.3.0.so
+++ b/3rdparty/lib/x86/libnative_camera_r4.3.0.so
--- a/cmake/OpenCVDetectPython.cmake
+++ b/cmake/OpenCVDetectPython.cmake
@ -106,7 +106,7 @@ if(PYTHON_EXECUTABLE)
                        OUTPUT_QUIET
                        ERROR_VARIABLE SPHINX_OUTPUT
                        OUTPUT_STRIP_TRAILING_WHITESPACE)
-        if(SPHINX_OUTPUT MATCHES "^Sphinx v([0-9][^ \n]*)")
+        if(SPHINX_OUTPUT MATCHES "Sphinx v([0-9][^ \n]*)")
          set(SPHINX_VERSION "${CMAKE_MATCH_1}")
          set(HAVE_SPHINX 1)
          message(STATUS "Found Sphinx ${SPHINX_VERSION}: ${SPHINX_BUILD}")
--- a/cmake/OpenCVFindIPP.cmake
+++ b/cmake/OpenCVFindIPP.cmake
@ -138,9 +138,14 @@ endfunction()
 # This is auxiliary function called from set_ipp_variables()
 # to set IPP_LIBRARIES variable in IPP 7.x style
 # ------------------------------------------------------------------------
-function(set_ipp_new_libraries)
+function(set_ipp_new_libraries _LATEST_VERSION)
    set(IPP_PREFIX "ipp")
+    
+    if(${_LATEST_VERSION} VERSION_LESS "8.0")
        set(IPP_SUFFIX "_l")       # static not threaded libs suffix
+    else()
+        set(IPP_SUFFIX "")       # static not threaded libs suffix
+    endif()
    set(IPP_THRD   "_t")       # static threaded libs suffix
    set(IPPCORE    "core")     # core functionality
    set(IPPSP      "s")        # signal processing
@ -199,7 +204,9 @@ function(set_ipp_variables _LATEST_VERSION)
        # set INCLUDE and LIB folders
        set(IPP_INCLUDE_DIRS ${IPP_ROOT_DIR}/include PARENT_SCOPE)

-        if (IPP_X64)
+        if (APPLE)
+            set(IPP_LIBRARY_DIRS ${IPP_ROOT_DIR}/lib PARENT_SCOPE)
+        elseif (IPP_X64)
            if(NOT EXISTS ${IPP_ROOT_DIR}/lib/intel64)
                message(SEND_ERROR "IPP EM64T libraries not found")
            endif()
@ -212,7 +219,7 @@ function(set_ipp_variables _LATEST_VERSION)
        endif()

        # set IPP_LIBRARIES variable (7.x lib names)
-        set_ipp_new_libraries()
+        set_ipp_new_libraries(${_LATEST_VERSION})
        set(IPP_LIBRARIES ${IPP_LIBRARIES} PARENT_SCOPE)
        message(STATUS "IPP libs: ${IPP_LIBRARIES}")

--- a/modules/androidcamera/camera_wrapper/camera_wrapper.cpp
+++ b/modules/androidcamera/camera_wrapper/camera_wrapper.cpp
@ -1,5 +1,8 @@
-#if !defined(ANDROID_r2_2_0) && !defined(ANDROID_r2_3_3) && !defined(ANDROID_r3_0_1) && !defined(ANDROID_r4_0_0) && !defined(ANDROID_r4_0_3) && !defined(ANDROID_r4_1_1) && !defined(ANDROID_r4_2_0)
-# error Building camera wrapper for your version of Android is not supported by OpenCV. You need to modify OpenCV sources in order to compile camera wrapper for your version of Android.
+#if !defined(ANDROID_r2_2_0) && !defined(ANDROID_r2_3_3) && !defined(ANDROID_r3_0_1) && \
+ !defined(ANDROID_r4_0_0) && !defined(ANDROID_r4_0_3) && !defined(ANDROID_r4_1_1) && \
+ !defined(ANDROID_r4_2_0) && !defined(ANDROID_r4_3_0)
+# error Building camera wrapper for your version of Android is not supported by OpenCV.\
+ You need to modify OpenCV sources in order to compile camera wrapper for your version of Android.
 #endif

 #include <camera/Camera.h>
@ -16,17 +19,18 @@
 //Include SurfaceTexture.h file with the SurfaceTexture class
 # include <gui/SurfaceTexture.h>
 # define MAGIC_OPENCV_TEXTURE_ID (0x10)
-#else // defined(ANDROID_r3_0_1) || defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3)
-//TODO: This is either 2.2 or 2.3. Include the headers for ISurface.h access
-#if defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0)
+#elif defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0)
 # include <gui/ISurface.h>
 # include <gui/BufferQueue.h>
+#elif defined(ANDROID_r4_3_0)
+# include <gui/IGraphicBufferProducer.h>
+# include <gui/BufferQueue.h>
 #else
 # include <surfaceflinger/ISurface.h>
-#endif  // defined(ANDROID_r4_1_1)
-#endif  // defined(ANDROID_r3_0_1) || defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3)
+#endif

 #include <string>
+#include <fstream>

 //undef logging macro from /system/core/libcutils/loghack.h
 #ifdef LOGD
@ -45,7 +49,6 @@
 # undef LOGE
 #endif

-
 // LOGGING
 #include <android/log.h>
 #define CAMERA_LOG_TAG "OpenCV_NativeCamera"
@ -60,7 +63,7 @@ using namespace android;

 void debugShowFPS();

-#if defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0)
+#if defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0) || defined(ANDROID_r4_3_0)
 class ConsumerListenerStub: public BufferQueue::ConsumerListener
 {
 public:
@ -73,6 +76,29 @@ public:
 };
 #endif

+std::string getProcessName()
+{
+    std::string result;
+    std::ifstream f;
+
+    f.open("/proc/self/cmdline");
+    if (f.is_open())
+    {
+        std::string fullPath;
+        std::getline(f, fullPath, '\0');
+        if (!fullPath.empty())
+        {
+            int i = fullPath.size()-1;
+            while ((i >= 0) && (fullPath[i] != '/')) i--;
+            result = fullPath.substr(i+1, std::string::npos);
+        }
+    }
+
+    f.close();
+
+    return result;
+}
+
 void debugShowFPS()
 {
    static int mFrameCount = 0;
@ -280,7 +306,7 @@ public:
    }

    virtual void postData(int32_t msgType, const sp<IMemory>& dataPtr
-    #if defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3) || defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0)
+    #if defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3) || defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0) || defined(ANDROID_r4_3_0)
                          ,camera_frame_metadata_t*
 #endif
                          )
@ -361,7 +387,9 @@ CameraHandler* CameraHandler::initCameraConnect(const CameraCallback& callback,
    typedef sp<Camera> (*Android22ConnectFuncType)();
    typedef sp<Camera> (*Android23ConnectFuncType)(int);
    typedef sp<Camera> (*Android3DConnectFuncType)(int, int);
+    typedef sp<Camera> (*Android43ConnectFuncType)(int, const String16&, int);

+    const int ANY_CAMERA_INDEX = -1;
    const int BACK_CAMERA_INDEX = 99;
    const int FRONT_CAMERA_INDEX = 98;

@ -372,14 +400,24 @@ CameraHandler* CameraHandler::initCameraConnect(const CameraCallback& callback,
    CAMERA_SUPPORT_MODE_ZSL = 0x08 /* Camera Sensor supports ZSL mode. */
    };

+    // used for Android 4.3
+    enum {
+        USE_CALLING_UID = -1
+    };
+
    const char Android22ConnectName[] = "_ZN7android6Camera7connectEv";
    const char Android23ConnectName[] = "_ZN7android6Camera7connectEi";
    const char Android3DConnectName[] = "_ZN7android6Camera7connectEii";
+    const char Android43ConnectName[] = "_ZN7android6Camera7connectEiRKNS_8String16Ei";

    int localCameraIndex = cameraId;

+    if (cameraId == ANY_CAMERA_INDEX)
+    {
+        localCameraIndex = 0;
+    }
 #if !defined(ANDROID_r2_2_0)
-    if (cameraId == BACK_CAMERA_INDEX)
+    else if (cameraId == BACK_CAMERA_INDEX)
    {
        LOGD("Back camera selected");
        for (int i = 0; i < Camera::getNumberOfCameras(); i++)
@ -450,6 +488,12 @@ CameraHandler* CameraHandler::initCameraConnect(const CameraCallback& callback,
        LOGD("Connecting to CameraService v 3D");
        camera = Android3DConnect(localCameraIndex, CAMERA_SUPPORT_MODE_2D);
    }
+    else if (Android43ConnectFuncType Android43Connect = (Android43ConnectFuncType)dlsym(CameraHALHandle, Android43ConnectName))
+    {
+        std::string currentProcName = getProcessName();
+        LOGD("Current process name for camera init: %s", currentProcName.c_str());
+        camera = Android43Connect(localCameraIndex, String16(currentProcName.c_str()), USE_CALLING_UID);
+    }
    else
    {
        dlclose(CameraHALHandle);
@ -471,7 +515,7 @@ CameraHandler* CameraHandler::initCameraConnect(const CameraCallback& callback,
    handler->camera = camera;
    handler->cameraId = localCameraIndex;

-    if (prevCameraParameters != 0)
+    if (prevCameraParameters != NULL)
    {
        LOGI("initCameraConnect: Setting paramers from previous camera handler");
        camera->setParameters(prevCameraParameters->flatten());
@ -560,26 +604,25 @@ CameraHandler* CameraHandler::initCameraConnect(const CameraCallback& callback,
        }
    }

-    status_t pdstatus;
+    status_t bufferStatus;
 #if defined(ANDROID_r2_2_0)
-    pdstatus = camera->setPreviewDisplay(sp<ISurface>(0 /*new DummySurface*/));
-    if (pdstatus != 0)
-        LOGE("initCameraConnect: failed setPreviewDisplay(0) call; camera migth not work correctly on some devices");
+    bufferStatus = camera->setPreviewDisplay(sp<ISurface>(0 /*new DummySurface*/));
+    if (bufferStatus != 0)
+        LOGE("initCameraConnect: failed setPreviewDisplay(0) call (status %d); camera might not work correctly on some devices", bufferStatus);
 #elif defined(ANDROID_r2_3_3)
    /* Do nothing in case of 2.3 for now */
-
 #elif defined(ANDROID_r3_0_1) || defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3)
    sp<SurfaceTexture> surfaceTexture = new SurfaceTexture(MAGIC_OPENCV_TEXTURE_ID);
-    pdstatus = camera->setPreviewTexture(surfaceTexture);
-    if (pdstatus != 0)
-        LOGE("initCameraConnect: failed setPreviewTexture call; camera migth not work correctly");
-#elif defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0)
+    bufferStatus = camera->setPreviewTexture(surfaceTexture);
+    if (bufferStatus != 0)
+        LOGE("initCameraConnect: failed setPreviewTexture call (status %d); camera might not work correctly", bufferStatus);
+#elif defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0) || defined(ANDROID_r4_3_0)
    sp<BufferQueue> bufferQueue = new BufferQueue();
    sp<BufferQueue::ConsumerListener> queueListener = new ConsumerListenerStub();
    bufferQueue->consumerConnect(queueListener);
-    pdstatus = camera->setPreviewTexture(bufferQueue);
-    if (pdstatus != 0)
-    LOGE("initCameraConnect: failed setPreviewTexture call; camera migth not work correctly");
+    bufferStatus = camera->setPreviewTexture(bufferQueue);
+    if (bufferStatus != 0)
+        LOGE("initCameraConnect: failed setPreviewTexture call; camera might not work correctly");
 #endif

 #if (defined(ANDROID_r2_2_0) || defined(ANDROID_r2_3_3) || defined(ANDROID_r3_0_1))
@ -595,9 +638,9 @@ CameraHandler* CameraHandler::initCameraConnect(const CameraCallback& callback,
 #endif //!(defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3))

    LOGD("Starting preview");
-    status_t resStart = camera->startPreview();
+    status_t previewStatus = camera->startPreview();

-    if (resStart != 0)
+    if (previewStatus != 0)
    {
        LOGE("initCameraConnect: startPreview() fails. Closing camera connection...");
        handler->closeCameraConnect();
@ -620,9 +663,11 @@ void CameraHandler::closeCameraConnect()
    }

    camera->stopPreview();
+#if defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3) || defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0) || defined(ANDROID_r4_3_0)
+    camera->setPreviewCallbackFlags(CAMERA_FRAME_CALLBACK_FLAG_NOOP);
+#endif
    camera->disconnect();
    camera.clear();
-
    camera=NULL;
    // ATTENTION!!!!!!!!!!!!!!!!!!!!!!!!!!
    // When we set
@ -863,14 +908,60 @@ void CameraHandler::applyProperties(CameraHandler** ppcameraHandler)

    if (*ppcameraHandler == 0)
    {
-        LOGE("applyProperties: Passed null *ppcameraHandler");
+        LOGE("applyProperties: Passed NULL *ppcameraHandler");
        return;
    }

-    LOGD("CameraHandler::applyProperties()");
-    CameraHandler* previousCameraHandler=*ppcameraHandler;
-    CameraParameters curCameraParameters(previousCameraHandler->params.flatten());
+    CameraParameters curCameraParameters((*ppcameraHandler)->params.flatten());
+
+#if defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3) || defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0) || defined(ANDROID_r4_3_0)
+    CameraHandler* handler=*ppcameraHandler;

+    handler->camera->stopPreview();
+    handler->camera->setPreviewCallbackFlags(CAMERA_FRAME_CALLBACK_FLAG_NOOP);
+
+    status_t reconnectStatus = handler->camera->reconnect();
+    if (reconnectStatus != 0)
+    {
+        LOGE("applyProperties: failed to reconnect camera (status %d)", reconnectStatus);
+        return;
+    }
+
+    handler->camera->setParameters(curCameraParameters.flatten());
+    handler->params.unflatten(curCameraParameters.flatten());
+
+    status_t bufferStatus;
+# if defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3)
+    sp<SurfaceTexture> surfaceTexture = new SurfaceTexture(MAGIC_OPENCV_TEXTURE_ID);
+    bufferStatus = handler->camera->setPreviewTexture(surfaceTexture);
+    if (bufferStatus != 0)
+        LOGE("applyProperties: failed setPreviewTexture call (status %d); camera might not work correctly", bufferStatus);
+# elif defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0) || defined(ANDROID_r4_3_0)
+    sp<BufferQueue> bufferQueue = new BufferQueue();
+    sp<BufferQueue::ConsumerListener> queueListener = new ConsumerListenerStub();
+    bufferQueue->consumerConnect(queueListener);
+    bufferStatus = handler->camera->setPreviewTexture(bufferQueue);
+    if (bufferStatus != 0)
+        LOGE("applyProperties: failed setPreviewTexture call; camera might not work correctly");
+# endif
+
+    handler->camera->setPreviewCallbackFlags( CAMERA_FRAME_CALLBACK_FLAG_ENABLE_MASK | CAMERA_FRAME_CALLBACK_FLAG_COPY_OUT_MASK);//with copy
+
+    LOGD("Starting preview");
+    status_t previewStatus = handler->camera->startPreview();
+
+    if (previewStatus != 0)
+    {
+        LOGE("initCameraConnect: startPreview() fails. Closing camera connection...");
+        handler->closeCameraConnect();
+        handler = NULL;
+    }
+    else
+    {
+        LOGD("Preview started successfully");
+    }
+#else
+    CameraHandler* previousCameraHandler=*ppcameraHandler;
    CameraCallback cameraCallback=previousCameraHandler->cameraCallback;
    void* userData=previousCameraHandler->userData;
    int cameraId=previousCameraHandler->cameraId;
@ -879,7 +970,6 @@ void CameraHandler::applyProperties(CameraHandler** ppcameraHandler)
    previousCameraHandler->closeCameraConnect();
    LOGD("CameraHandler::applyProperties(): after previousCameraHandler->closeCameraConnect");

-
    LOGD("CameraHandler::applyProperties(): before initCameraConnect");
    CameraHandler* handler=initCameraConnect(cameraCallback, cameraId, userData, &curCameraParameters);
    LOGD("CameraHandler::applyProperties(): after initCameraConnect, handler=0x%x", (int)handler);
@ -892,6 +982,7 @@ void CameraHandler::applyProperties(CameraHandler** ppcameraHandler)
        }
    }
    (*ppcameraHandler)=handler;
+#endif
 }


--- a/modules/bioinspired/doc/retina/index.rst
+++ b/modules/bioinspired/doc/retina/index.rst
@ -110,9 +110,9 @@ Here is an overview of the abstract Retina interface, allocate one instance with

 .. Sample code::

-   * : An example on retina tone mapping can be found at opencv_source_code/samples/cpp/OpenEXRimages_HighDynamicRange_Retina_toneMapping.cpp
-   * : An example on retina tone mapping on video input can be found at opencv_source_code/samples/cpp/OpenEXRimages_HighDynamicRange_Retina_toneMapping.cpp
-   * : A complete example illustrating the retina interface can be found at opencv_source_code/samples/cpp/retinaDemo.cpp
+   * An example on retina tone mapping can be found at opencv_source_code/samples/cpp/OpenEXRimages_HighDynamicRange_Retina_toneMapping.cpp
+   * An example on retina tone mapping on video input can be found at opencv_source_code/samples/cpp/OpenEXRimages_HighDynamicRange_Retina_toneMapping.cpp
+   * A complete example illustrating the retina interface can be found at opencv_source_code/samples/cpp/retinaDemo.cpp

 Description
 +++++++++++
--- a/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.rst
+++ b/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.rst
@ -109,16 +109,16 @@ The functions below use the above model to do the following:

 * Estimate the relative position and orientation of the stereo camera "heads" and compute the *rectification* transformation that makes the camera optical axes parallel.

-.. Sample code::
+.. note::

-   * : A calibration sample for 3 cameras in horizontal position can be found at opencv_source_code/samples/cpp/3calibration.cpp
-   * : A calibration sample based on a sequence of images can be found at opencv_source_code/samples/cpp/calibration.cpp
-   * : A calibration sample in order to do 3D reconstruction can be found at opencv_source_code/samples/cpp/build3dmodel.cpp
-   * : A calibration sample of an artificially generated camera and chessboard patterns can be found at opencv_source_code/samples/cpp/calibration_artificial.cpp
-   * : A calibration example on stereo calibration can be found at opencv_source_code/samples/cpp/stereo_calib.cpp
-   * : A calibration example on stereo matching can be found at opencv_source_code/samples/cpp/stereo_match.cpp
+   * A calibration sample for 3 cameras in horizontal position can be found at opencv_source_code/samples/cpp/3calibration.cpp
+   * A calibration sample based on a sequence of images can be found at opencv_source_code/samples/cpp/calibration.cpp
+   * A calibration sample in order to do 3D reconstruction can be found at opencv_source_code/samples/cpp/build3dmodel.cpp
+   * A calibration sample of an artificially generated camera and chessboard patterns can be found at opencv_source_code/samples/cpp/calibration_artificial.cpp
+   * A calibration example on stereo calibration can be found at opencv_source_code/samples/cpp/stereo_calib.cpp
+   * A calibration example on stereo matching can be found at opencv_source_code/samples/cpp/stereo_match.cpp

-   * : PYTHON : A camera calibration sample can be found at opencv_source_code/samples/python2/calibrate.py
+   * (Python) A camera calibration sample can be found at opencv_source_code/samples/python2/calibrate.py

 calibrateCamera
 ---------------
@ -588,9 +588,9 @@ Finds an object pose from 3D-2D point correspondences.

 The function estimates the object pose given a set of object points, their corresponding image projections, as well as the camera matrix and the distortion coefficients.

-.. Sample code::
+.. note::

-   * : An example of how to use solvePNP for planar augmented reality can be found at opencv_source_code/samples/python2/plane_ar.py
+   * An example of how to use solvePNP for planar augmented reality can be found at opencv_source_code/samples/python2/plane_ar.py

 solvePnPRansac
 ------------------
@ -892,9 +892,9 @@ Homography matrix is determined up to a scale. Thus, it is normalized so that
    :ocv:func:`warpPerspective`,
    :ocv:func:`perspectiveTransform`

-.. Sample code::
+.. note::

-   * : A example on calculating a homography for image matching can be found at opencv_source_code/samples/cpp/video_homography.cpp
+   * A example on calculating a homography for image matching can be found at opencv_source_code/samples/cpp/video_homography.cpp

 estimateAffine3D
 --------------------
@ -1186,7 +1186,7 @@ Class for computing stereo correspondence using the block matching algorithm, in

 .. Sample code:

-   * : OCL : An example for using the stereoBM matching algorithm can be found at opencv_source_code/samples/ocl/stereo_match.cpp
+   (Ocl) An example for using the stereoBM matching algorithm can be found at opencv_source_code/samples/ocl/stereo_match.cpp

 createStereoBM
 ------------------
@ -1218,9 +1218,9 @@ The class implements the modified H. Hirschmuller algorithm [HH08]_ that differs

 * Some pre- and post- processing steps from K. Konolige algorithm ``StereoBM``  are included, for example: pre-filtering (``StereoBM::PREFILTER_XSOBEL`` type) and post-filtering (uniqueness check, quadratic interpolation and speckle filtering).

-.. Sample code::
+.. note::

-   * : PYTHON : An example illustrating the use of the StereoSGBM matching algorithm can be found at opencv_source_code/samples/python2/stereo_match.py
+   * (Python) An example illustrating the use of the StereoSGBM matching algorithm can be found at opencv_source_code/samples/python2/stereo_match.py

 createStereoSGBM
 --------------------------
--- a/modules/contrib/doc/facerec/facerec_api.rst
+++ b/modules/contrib/doc/facerec/facerec_api.rst
@ -5,9 +5,9 @@ FaceRecognizer

 .. Sample code::

-   * : An example using the FaceRecognizer class can be found at opencv_source_code/samples/cpp/facerec_demo.cpp
+   * An example using the FaceRecognizer class can be found at opencv_source_code/samples/cpp/facerec_demo.cpp

-   * : PYTHON :  An example using the FaceRecognizer class can be found at opencv_source_code/samples/python2/facerec_demo.py
+   * (Python)  An example using the FaceRecognizer class can be found at opencv_source_code/samples/python2/facerec_demo.py

 FaceRecognizer
 --------------
--- a/modules/contrib/doc/openfabmap.rst
+++ b/modules/contrib/doc/openfabmap.rst
@ -9,9 +9,9 @@ FAB-MAP is an approach to appearance-based place recognition. FAB-MAP compares i

 openFABMAP requires training data (e.g. a collection of images from a similar but not identical environment) to construct a visual vocabulary for the visual bag-of-words model, along with a Chow-Liu tree representation of feature likelihood and for use in the Sampled new place method (see below).

-.. Sample code::
+.. note::

-   * : An example using the openFABMAP package can be found at opencv_source_code/samples/cpp/fabmap_sample.cpp
+   * An example using the openFABMAP package can be found at opencv_source_code/samples/cpp/fabmap_sample.cpp

 of2::FabMap
 --------------------
--- a/modules/core/doc/basic_structures.rst
+++ b/modules/core/doc/basic_structures.rst
@ -884,9 +884,9 @@ Finally, there are STL-style iterators that are smart enough to skip gaps betwee

 The matrix iterators are random-access iterators, so they can be passed to any STL algorithm, including ``std::sort()`` .

-.. Sample code::
+.. note::

-   * : An example demonstrating the serial out capabilities of cv::Mat can be found at opencv_source_code/samples/cpp/cout_mat.cpp
+   * An example demonstrating the serial out capabilities of cv::Mat can be found at opencv_source_code/samples/cpp/cout_mat.cpp

 .. _MatrixExpressions:

--- a/modules/core/doc/clustering.rst
+++ b/modules/core/doc/clustering.rst
@ -64,11 +64,11 @@ Basically, you can use only the core of the function, set the number of
 attempts to 1, initialize labels each time using a custom algorithm, pass them with the
 ( ``flags`` = ``KMEANS_USE_INITIAL_LABELS`` ) flag, and then choose the best (most-compact) clustering.

-.. Sample code::
+.. note::

-   * : An example on K-means clustering can be found at opencv_source_code/samples/cpp/kmeans.cpp
+   * An example on K-means clustering can be found at opencv_source_code/samples/cpp/kmeans.cpp

-   * : PYTHON : An example on K-means clustering can be found at opencv_source_code/samples/python2/kmeans.py
+   * (Python) An example on K-means clustering can be found at opencv_source_code/samples/python2/kmeans.py

 partition
 -------------
--- a/modules/core/doc/drawing_functions.rst
+++ b/modules/core/doc/drawing_functions.rst
@ -26,9 +26,9 @@ If a drawn figure is partially or completely outside the image, the drawing func

 .. note:: The functions do not support alpha-transparency when the target image is 4-channel. In this case, the ``color[3]`` is simply copied to the repainted pixels. Thus, if you want to paint semi-transparent shapes, you can paint them in a separate buffer and then blend it with the main image.

-.. Sample code::
+.. note::

-   * : An example on using variate drawing functions like line, rectangle, ... can be found at opencv_source_code/samples/cpp/drawing.cpp
+   * An example on using variate drawing functions like line, rectangle, ... can be found at opencv_source_code/samples/cpp/drawing.cpp

 circle
 ----------
@ -559,12 +559,12 @@ The function draws contour outlines in the image if
        waitKey(0);
    }

-.. Sample code::
+.. note::

-   * : An example using the drawContour functionality can be found at opencv_source_code/samples/cpp/contours2.cpp
-   * : An example using drawContours to clean up a background segmentation result at opencv_source_code/samples/cpp/segment_objects.cpp
+   * An example using the drawContour functionality can be found at opencv_source_code/samples/cpp/contours2.cpp
+   * An example using drawContours to clean up a background segmentation result at opencv_source_code/samples/cpp/segment_objects.cpp

-   * : PYTHON : An example using the drawContour functionality can be found at opencv_source/samples/python2/contours.py
+   * (Python) An example using the drawContour functionality can be found at opencv_source/samples/python2/contours.py


 putText
--- a/modules/core/doc/operations_on_arrays.rst
+++ b/modules/core/doc/operations_on_arrays.rst
@ -971,12 +971,12 @@ All of the above improvements have been implemented in :ocv:func:`matchTemplate`

 .. seealso:: :ocv:func:`dct` , :ocv:func:`getOptimalDFTSize` , :ocv:func:`mulSpectrums`, :ocv:func:`filter2D` , :ocv:func:`matchTemplate` , :ocv:func:`flip` , :ocv:func:`cartToPolar` , :ocv:func:`magnitude` , :ocv:func:`phase`

-.. Sample code::
+.. note::

-   * : An example using the discrete fourier transform can be found at opencv_source_code/samples/cpp/dft.cpp
+   * An example using the discrete fourier transform can be found at opencv_source_code/samples/cpp/dft.cpp

-   * : PYTHON : An example using the dft functionality to perform Wiener deconvolution can be found at opencv_source/samples/python2/deconvolution.py
-   * : PYTHON : An example rearranging the quadrants of a Fourier image can be found at opencv_source/samples/python2/dft.py
+   * (Python) An example using the dft functionality to perform Wiener deconvolution can be found at opencv_source/samples/python2/deconvolution.py
+   * (Python) An example rearranging the quadrants of a Fourier image can be found at opencv_source/samples/python2/dft.py


 divide
@ -2167,9 +2167,9 @@ The sample below is the function that takes two matrices. The first function sto
    :ocv:func:`dft`,
    :ocv:func:`dct`

-.. Sample code::
+.. note::

-   * : An example using PCA for dimensionality reduction while maintaining an amount of variance can be found at opencv_source_code/samples/cpp/pca.cpp
+   * An example using PCA for dimensionality reduction while maintaining an amount of variance can be found at opencv_source_code/samples/cpp/pca.cpp

 PCA::PCA
 --------
--- a/modules/core/doc/xml_yaml_persistence.rst
+++ b/modules/core/doc/xml_yaml_persistence.rst
@ -91,9 +91,9 @@ Several things can be noted by looking at the sample code and the output:
 *
   In YAML (but not XML), mappings and sequences can be written in a compact Python-like inline form. In the sample above matrix elements, as well as each feature, including its lbp value, is stored in such inline form. To store a mapping/sequence in a compact form, put ":" after the opening character, e.g. use **"{:"** instead of **"{"** and **"[:"** instead of **"["**. When the data is written to XML, those extra ":" are ignored.

-.. Sample code::
+.. note::

-   * : A complete example using the FileStorage interface can be found at opencv_source_code/samples/cpp/filestorage.cpp
+   * A complete example using the FileStorage interface can be found at opencv_source_code/samples/cpp/filestorage.cpp


 Reading data from a file storage.
--- a/modules/core/perf/perf_stat.cpp
+++ b/modules/core/perf/perf_stat.cpp
@ -33,7 +33,7 @@ PERF_TEST_P(Size_MatType, mean, TYPICAL_MATS)

    TEST_CYCLE() s = mean(src);

-    SANITY_CHECK(s, 1e-6);
+    SANITY_CHECK(s, 1e-5);
 }

 PERF_TEST_P(Size_MatType, mean_mask, TYPICAL_MATS)
@ -49,7 +49,7 @@ PERF_TEST_P(Size_MatType, mean_mask, TYPICAL_MATS)

    TEST_CYCLE() s = mean(src, mask);

-    SANITY_CHECK(s, 1e-6);
+    SANITY_CHECK(s, 5e-5);
 }

 PERF_TEST_P(Size_MatType, meanStdDev, TYPICAL_MATS)
--- a/modules/core/src/dxt.cpp
+++ b/modules/core/src/dxt.cpp
@ -1458,6 +1458,10 @@ static void CCSIDFT_64f( const double* src, double* dst, int n, int nf, int* fac

 }

+#ifdef HAVE_IPP
+typedef IppStatus (CV_STDCALL* IppDFTGetSizeFunc)(int, int, IppHintAlgorithm, int*, int*, int*);
+typedef IppStatus (CV_STDCALL* IppDFTInitFunc)(int, int, IppHintAlgorithm, void*, uchar*);
+#endif

 void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
 {
@ -1483,7 +1487,7 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
    int factors[34];
    bool inplace_transform = false;
 #ifdef HAVE_IPP
-    void *spec_r = 0, *spec_c = 0;
+    AutoBuffer<uchar> ippbuf;
    int ipp_norm_flag = !(flags & DFT_SCALE) ? 8 : inv ? 2 : 1;
 #endif

@ -1543,52 +1547,51 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )

        spec = 0;
 #ifdef HAVE_IPP
-        if( len*count >= 64 ) // use IPP DFT if available
+        if(
+#if IPP_VERSION_MAJOR >= 7
+           depth == CV_32F && // IPP 7.x and 8.0 have bug somewhere in double-precision DFT
+#endif
+           len*count >= 64 ) // use IPP DFT if available
        {
-            int ipp_sz = 0;
+            int specsize=0, initsize=0, worksize=0;
+            IppDFTGetSizeFunc getSizeFunc = 0;
+            IppDFTInitFunc initFunc = 0;

            if( real_transform && stage == 0 )
            {
                if( depth == CV_32F )
                {
-                    if( spec_r )
-                        IPPI_CALL( ippsDFTFree_R_32f( (IppsDFTSpec_R_32f*)spec_r ));
-                    IPPI_CALL( ippsDFTInitAlloc_R_32f(
-                        (IppsDFTSpec_R_32f**)&spec_r, len, ipp_norm_flag, ippAlgHintNone ));
-                    IPPI_CALL( ippsDFTGetBufSize_R_32f( (IppsDFTSpec_R_32f*)spec_r, &ipp_sz ));
+                    getSizeFunc = ippsDFTGetSize_R_32f;
+                    initFunc = (IppDFTInitFunc)ippsDFTInit_R_32f;
                }
                else
                {
-                    if( spec_r )
-                        IPPI_CALL( ippsDFTFree_R_64f( (IppsDFTSpec_R_64f*)spec_r ));
-                    IPPI_CALL( ippsDFTInitAlloc_R_64f(
-                        (IppsDFTSpec_R_64f**)&spec_r, len, ipp_norm_flag, ippAlgHintNone ));
-                    IPPI_CALL( ippsDFTGetBufSize_R_64f( (IppsDFTSpec_R_64f*)spec_r, &ipp_sz ));
+                    getSizeFunc = ippsDFTGetSize_R_64f;
+                    initFunc = (IppDFTInitFunc)ippsDFTInit_R_64f;
                }
-                spec = spec_r;
            }
            else
            {
                if( depth == CV_32F )
                {
-                    if( spec_c )
-                        IPPI_CALL( ippsDFTFree_C_32fc( (IppsDFTSpec_C_32fc*)spec_c ));
-                    IPPI_CALL( ippsDFTInitAlloc_C_32fc(
-                        (IppsDFTSpec_C_32fc**)&spec_c, len, ipp_norm_flag, ippAlgHintNone ));
-                    IPPI_CALL( ippsDFTGetBufSize_C_32fc( (IppsDFTSpec_C_32fc*)spec_c, &ipp_sz ));
+                    getSizeFunc = ippsDFTGetSize_C_32fc;
+                    initFunc = (IppDFTInitFunc)ippsDFTInit_C_32fc;
                }
                else
                {
-                    if( spec_c )
-                        IPPI_CALL( ippsDFTFree_C_64fc( (IppsDFTSpec_C_64fc*)spec_c ));
-                    IPPI_CALL( ippsDFTInitAlloc_C_64fc(
-                        (IppsDFTSpec_C_64fc**)&spec_c, len, ipp_norm_flag, ippAlgHintNone ));
-                    IPPI_CALL( ippsDFTGetBufSize_C_64fc( (IppsDFTSpec_C_64fc*)spec_c, &ipp_sz ));
+                    getSizeFunc = ippsDFTGetSize_C_64fc;
+                    initFunc = (IppDFTInitFunc)ippsDFTInit_C_64fc;
                }
-                spec = spec_c;
            }
-
-            sz += ipp_sz;
+            if( getSizeFunc(len, ipp_norm_flag, ippAlgHintNone, &specsize, &initsize, &worksize) >= 0 )
+            {
+                ippbuf.allocate(specsize + initsize + 64);
+                spec = alignPtr(&ippbuf[0], 32);
+                uchar* initbuf = alignPtr((uchar*)spec + specsize, 32);
+                if( initFunc(len, ipp_norm_flag, ippAlgHintNone, spec, initbuf) < 0 )
+                    spec = 0;
+                sz += worksize;
+            }
        }
        else
 #endif
@ -1862,24 +1865,6 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
            src = dst;
        }
    }
-
-#ifdef HAVE_IPP
-    if( spec_c )
-    {
-        if( depth == CV_32F )
-            ippsDFTFree_C_32fc( (IppsDFTSpec_C_32fc*)spec_c );
-        else
-            ippsDFTFree_C_64fc( (IppsDFTSpec_C_64fc*)spec_c );
-    }
-
-    if( spec_r )
-    {
-        if( depth == CV_32F )
-            ippsDFTFree_R_32f( (IppsDFTSpec_R_32f*)spec_r );
-        else
-            ippsDFTFree_R_64f( (IppsDFTSpec_R_64f*)spec_r );
-    }
-#endif
 }


--- a/modules/core/src/stat.cpp
+++ b/modules/core/src/stat.cpp
@ -440,6 +440,45 @@ cv::Scalar cv::sum( InputArray _src )
 {
    Mat src = _src.getMat();
    int k, cn = src.channels(), depth = src.depth();
+	
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+	size_t total_size = src.total();
+	int rows = src.size[0], cols = (int)(total_size/rows);
+	if( src.dims == 2 || (src.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
+	{
+		IppiSize sz = { cols, rows };
+		int type = src.type();
+		typedef IppStatus (CV_STDCALL* ippiSumFunc)(const void*, int, IppiSize, double *, int);
+		ippiSumFunc ippFunc = 
+			type == CV_8UC1 ? (ippiSumFunc)ippiSum_8u_C1R :
+			type == CV_8UC3 ? (ippiSumFunc)ippiSum_8u_C3R :
+			type == CV_8UC4 ? (ippiSumFunc)ippiSum_8u_C4R :
+			type == CV_16UC1 ? (ippiSumFunc)ippiSum_16u_C1R :
+			type == CV_16UC3 ? (ippiSumFunc)ippiSum_16u_C3R :
+			type == CV_16UC4 ? (ippiSumFunc)ippiSum_16u_C4R :
+			type == CV_16SC1 ? (ippiSumFunc)ippiSum_16s_C1R :
+			type == CV_16SC3 ? (ippiSumFunc)ippiSum_16s_C3R :
+			type == CV_16SC4 ? (ippiSumFunc)ippiSum_16s_C4R :
+			type == CV_32FC1 ? (ippiSumFunc)ippiSum_32f_C1R :
+			type == CV_32FC3 ? (ippiSumFunc)ippiSum_32f_C3R :
+			type == CV_32FC4 ? (ippiSumFunc)ippiSum_32f_C4R :
+			0;
+		if( ippFunc )
+		{
+			Ipp64f res[4];
+			if( ippFunc(src.data, src.step[0], sz, res, ippAlgHintAccurate) >= 0 )
+			{
+				Scalar sc;
+				for( int i = 0; i < cn; i++ )
+				{
+					sc[i] = res[i];
+				}
+				return sc;
+			}
+		}
+	}
+#endif 
+	
    SumFunc func = sumTab[depth];

    CV_Assert( cn <= 4 && func != 0 );
@ -513,6 +552,81 @@ cv::Scalar cv::mean( InputArray _src, InputArray _mask )
    CV_Assert( mask.empty() || mask.type() == CV_8U );

    int k, cn = src.channels(), depth = src.depth();
+	
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+	size_t total_size = src.total();
+	int rows = src.size[0], cols = (int)(total_size/rows);
+	if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
+	{
+		IppiSize sz = { cols, rows };
+		int type = src.type();
+		if( !mask.empty() )
+		{
+			typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC1)(const void *, int, void *, int, IppiSize, Ipp64f *);
+			ippiMaskMeanFuncC1 ippFuncC1 = 
+			type == CV_8UC1 ? (ippiMaskMeanFuncC1)ippiMean_8u_C1MR :
+			type == CV_16UC1 ? (ippiMaskMeanFuncC1)ippiMean_16u_C1MR :
+			type == CV_32FC1 ? (ippiMaskMeanFuncC1)ippiMean_32f_C1MR :
+			0;
+			if( ippFuncC1 )
+			{
+				Ipp64f res;
+				if( ippFuncC1(src.data, src.step[0], mask.data, mask.step[0], sz, &res) >= 0 )
+				{
+					return Scalar(res);
+				}
+			}
+			typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC3)(const void *, int, void *, int, IppiSize, int, Ipp64f *);
+			ippiMaskMeanFuncC3 ippFuncC3 = 
+			type == CV_8UC3 ? (ippiMaskMeanFuncC3)ippiMean_8u_C3CMR :
+			type == CV_16UC3 ? (ippiMaskMeanFuncC3)ippiMean_16u_C3CMR :
+			type == CV_32FC3 ? (ippiMaskMeanFuncC3)ippiMean_32f_C3CMR :
+			0;
+			if( ippFuncC3 )
+			{
+				Ipp64f res1, res2, res3;
+				if( ippFuncC3(src.data, src.step[0], mask.data, mask.step[0], sz, 1, &res1) >= 0 &&
+					ippFuncC3(src.data, src.step[0], mask.data, mask.step[0], sz, 2, &res2) >= 0 &&
+					ippFuncC3(src.data, src.step[0], mask.data, mask.step[0], sz, 3, &res3) >= 0 )
+				{
+					return Scalar(res1, res2, res3);
+				}
+			}
+		}
+		else
+		{
+			typedef IppStatus (CV_STDCALL* ippiMeanFunc)(const void*, int, IppiSize, double *, int);
+			ippiMeanFunc ippFunc = 
+				type == CV_8UC1 ? (ippiMeanFunc)ippiMean_8u_C1R :
+				type == CV_8UC3 ? (ippiMeanFunc)ippiMean_8u_C3R :
+				type == CV_8UC4 ? (ippiMeanFunc)ippiMean_8u_C4R :
+				type == CV_16UC1 ? (ippiMeanFunc)ippiMean_16u_C1R :
+				type == CV_16UC3 ? (ippiMeanFunc)ippiMean_16u_C3R :
+				type == CV_16UC4 ? (ippiMeanFunc)ippiMean_16u_C4R :
+				type == CV_16SC1 ? (ippiMeanFunc)ippiMean_16s_C1R :
+				type == CV_16SC3 ? (ippiMeanFunc)ippiMean_16s_C3R :
+				type == CV_16SC4 ? (ippiMeanFunc)ippiMean_16s_C4R :
+				type == CV_32FC1 ? (ippiMeanFunc)ippiMean_32f_C1R :
+				type == CV_32FC3 ? (ippiMeanFunc)ippiMean_32f_C3R :
+				type == CV_32FC4 ? (ippiMeanFunc)ippiMean_32f_C4R :
+				0;
+			if( ippFunc )
+			{
+				Ipp64f res[4];
+				if( ippFunc(src.data, src.step[0], sz, res, ippAlgHintAccurate) >= 0 )
+				{
+					Scalar sc;
+					for( int i = 0; i < cn; i++ )
+					{
+						sc[i] = res[i];
+					}
+					return sc;
+				}
+			}
+		}
+	}
+#endif
+	
    SumFunc func = sumTab[depth];

    CV_Assert( cn <= 4 && func != 0 );
--- a/modules/core/test/test_arithm.cpp
+++ b/modules/core/test/test_arithm.cpp
@ -1123,7 +1123,7 @@ struct MeanOp : public BaseElemWiseOp
    }
    double getMaxErr(int)
    {
-        return 1e-6;
+        return 1e-5;
    }
 };

--- a/modules/features2d/doc/common_interfaces_of_descriptor_extractors.rst
+++ b/modules/features2d/doc/common_interfaces_of_descriptor_extractors.rst
@ -9,10 +9,10 @@ represented as vectors in a multidimensional space. All objects that implement t
 descriptor extractors inherit the
 :ocv:class:`DescriptorExtractor` interface.

-.. Sample code::
+.. note::

-   * : An example explaining keypoint extraction can be found at opencv_source_code/samples/cpp/descriptor_extractor_matcher.cpp
-   * : An example on descriptor evaluation can be found at opencv_source_code/samples/cpp/detector_descriptor_evaluation.cpp
+   * An example explaining keypoint extraction can be found at opencv_source_code/samples/cpp/descriptor_extractor_matcher.cpp
+   * An example on descriptor evaluation can be found at opencv_source_code/samples/cpp/detector_descriptor_evaluation.cpp

 DescriptorExtractor
 -------------------
@ -145,6 +145,6 @@ Strecha C., Fua P. *BRIEF: Binary Robust Independent Elementary Features* ,
        ...
    };

-.. Sample code::
+.. note::

-   * : A complete BRIEF extractor sample can be found at opencv_source_code/samples/cpp/brief_match_test.cpp
+   * A complete BRIEF extractor sample can be found at opencv_source_code/samples/cpp/brief_match_test.cpp
--- a/modules/features2d/doc/common_interfaces_of_descriptor_matchers.rst
+++ b/modules/features2d/doc/common_interfaces_of_descriptor_matchers.rst
@ -9,11 +9,11 @@ that are represented as vectors in a multidimensional space. All objects that im
 descriptor matchers inherit the
 :ocv:class:`DescriptorMatcher` interface.

-.. Sample code::
+.. note::

-   * : An example explaining keypoint matching can be found at opencv_source_code/samples/cpp/descriptor_extractor_matcher.cpp
-   * : An example on descriptor matching evaluation can be found at opencv_source_code/samples/cpp/detector_descriptor_matcher_evaluation.cpp
-   * : An example on one to many image matching can be found at opencv_source_code/samples/cpp/matching_to_many_images.cpp
+   * An example explaining keypoint matching can be found at opencv_source_code/samples/cpp/descriptor_extractor_matcher.cpp
+   * An example on descriptor matching evaluation can be found at opencv_source_code/samples/cpp/detector_descriptor_matcher_evaluation.cpp
+   * An example on one to many image matching can be found at opencv_source_code/samples/cpp/matching_to_many_images.cpp

 DescriptorMatcher
 -----------------
--- a/modules/features2d/doc/common_interfaces_of_feature_detectors.rst
+++ b/modules/features2d/doc/common_interfaces_of_feature_detectors.rst
@ -8,9 +8,9 @@ between different algorithms solving the same problem. All objects that implemen
 inherit the
 :ocv:class:`FeatureDetector` interface.

-.. Sample code::
+.. note::

-   * : An example explaining keypoint detection can be found at opencv_source_code/samples/cpp/descriptor_extractor_matcher.cpp
+   * An example explaining keypoint detection can be found at opencv_source_code/samples/cpp/descriptor_extractor_matcher.cpp

 FeatureDetector
 ---------------
--- a/modules/features2d/doc/common_interfaces_of_generic_descriptor_matchers.rst
+++ b/modules/features2d/doc/common_interfaces_of_generic_descriptor_matchers.rst
@ -11,11 +11,11 @@ Every descriptor with the
 :ocv:class:`VectorDescriptorMatcher` ).
 There are descriptors such as the One-way descriptor and Ferns that have the ``GenericDescriptorMatcher`` interface implemented but do not support ``DescriptorExtractor``.

-.. Sample code::
+.. note::

-   * : An example explaining keypoint description can be found at opencv_source_code/samples/cpp/descriptor_extractor_matcher.cpp
-   * : An example on descriptor matching evaluation can be found at opencv_source_code/samples/cpp/detector_descriptor_matcher_evaluation.cpp
-   * : An example on one to many image matching can be found at opencv_source_code/samples/cpp/matching_to_many_images.cpp
+   * An example explaining keypoint description can be found at opencv_source_code/samples/cpp/descriptor_extractor_matcher.cpp
+   * An example on descriptor matching evaluation can be found at opencv_source_code/samples/cpp/detector_descriptor_matcher_evaluation.cpp
+   * An example on one to many image matching can be found at opencv_source_code/samples/cpp/matching_to_many_images.cpp

 GenericDescriptorMatcher
 ------------------------
--- a/modules/features2d/doc/feature_detection_and_description.rst
+++ b/modules/features2d/doc/feature_detection_and_description.rst
@ -3,9 +3,9 @@ Feature Detection and Description

 .. highlight:: cpp

-.. Sample code::
+.. note::

-   * : An example explaining keypoint detection and description can be found at opencv_source_code/samples/cpp/descriptor_extractor_matcher.cpp
+   * An example explaining keypoint detection and description can be found at opencv_source_code/samples/cpp/descriptor_extractor_matcher.cpp

 FAST
 ----
@ -62,9 +62,9 @@ Maximally stable extremal region extractor. ::
 The class encapsulates all the parameters of the MSER extraction algorithm (see
 http://en.wikipedia.org/wiki/Maximally_stable_extremal_regions). Also see http://code.opencv.org/projects/opencv/wiki/MSER for useful comments and parameters description.

-.. Sample code::
+.. note::

-   * : PYTHON : A complete example showing the use of the MSER detector can be found at opencv_source_code/samples/python2/mser.py
+   * (Python) A complete example showing the use of the MSER detector can be found at opencv_source_code/samples/python2/mser.py


 ORB
@ -190,9 +190,9 @@ Class implementing the FREAK (*Fast Retina Keypoint*) keypoint descriptor, descr

 .. [AOV12] A. Alahi, R. Ortiz, and P. Vandergheynst. FREAK: Fast Retina Keypoint. In IEEE Conference on Computer Vision and Pattern Recognition, 2012. CVPR 2012 Open Source Award Winner.

-.. Sample code::
+.. note::

-   * : An example on how to use the FREAK descriptor can be found at opencv_source_code/samples/cpp/freak_demo.cpp
+   * An example on how to use the FREAK descriptor can be found at opencv_source_code/samples/cpp/freak_demo.cpp

 FREAK::FREAK
 ------------
--- a/modules/features2d/doc/object_categorization.rst
+++ b/modules/features2d/doc/object_categorization.rst
@ -5,11 +5,11 @@ Object Categorization

 This section describes approaches based on local 2D features and used to categorize objects.

-.. Sample code::
+.. note::

-   * : A complete Bag-Of-Words sample can be found at opencv_source_code/samples/cpp/bagofwords_classification.cpp
+   * A complete Bag-Of-Words sample can be found at opencv_source_code/samples/cpp/bagofwords_classification.cpp

-   * : PYTHON : An example using the features2D framework to perform object categorization can be found at opencv_source_code/samples/python2/find_obj.py
+   * (Python) An example using the features2D framework to perform object categorization can be found at opencv_source_code/samples/python2/find_obj.py

 BOWTrainer
 ----------
@ -204,4 +204,3 @@ BOWImgDescriptorExtractor::descriptorType
 Returns an image descriptor type.

 .. ocv:function:: int BOWImgDescriptorExtractor::descriptorType() const
-
--- a/modules/gpu/doc/object_detection.rst
+++ b/modules/gpu/doc/object_detection.rst
@ -62,12 +62,12 @@ The class implements Histogram of Oriented Gradients ([Dalal2005]_) object detec

 Interfaces of all methods are kept similar to the ``CPU HOG`` descriptor and detector analogues as much as possible.

-.. Sample code::
+.. note::

-   * : An example applying the HOG descriptor for people detection can be found at opencv_source_code/samples/cpp/peopledetect.cpp
-   * : A GPU example applying the HOG descriptor for people detection can be found at opencv_source_code/samples/gpu/hog.cpp
+   * An example applying the HOG descriptor for people detection can be found at opencv_source_code/samples/cpp/peopledetect.cpp
+   * A GPU example applying the HOG descriptor for people detection can be found at opencv_source_code/samples/gpu/hog.cpp

-   * : PYTHON : An example applying the HOG descriptor for people detection can be found at opencv_source_code/samples/python2/peopledetect.py
+   * (Python) An example applying the HOG descriptor for people detection can be found at opencv_source_code/samples/python2/peopledetect.py

 gpu::HOGDescriptor::HOGDescriptor
 -------------------------------------
@ -234,10 +234,10 @@ Cascade classifier class used for object detection. Supports HAAR and LBP cascad
            Size getClassifierSize() const;
    };

-.. Sample code::
+.. note::

-   * : A cascade classifier example can be found at opencv_source_code/samples/gpu/cascadeclassifier.cpp
-   * : A Nvidea API specific cascade classifier example can be found at opencv_source_code/samples/gpu/cascadeclassifier_nvidia_api.cpp
+   * A cascade classifier example can be found at opencv_source_code/samples/gpu/cascadeclassifier.cpp
+   * A Nvidea API specific cascade classifier example can be found at opencv_source_code/samples/gpu/cascadeclassifier_nvidia_api.cpp

 gpu::CascadeClassifier_GPU::CascadeClassifier_GPU
 -----------------------------------------------------
--- a/modules/gpubgsegm/doc/background_segmentation.rst
+++ b/modules/gpubgsegm/doc/background_segmentation.rst
@ -15,9 +15,9 @@ The class discriminates between foreground and background pixels by building and

 .. seealso:: :ocv:class:`BackgroundSubtractorMOG`

-.. Sample code::
+.. note::

-   * : An example on gaussian mixture based background/foreground segmantation can be found at opencv_source_code/samples/gpu/bgfg_segm.cpp
+   * An example on gaussian mixture based background/foreground segmantation can be found at opencv_source_code/samples/gpu/bgfg_segm.cpp


 gpu::createBackgroundSubtractorMOG
--- a/modules/gpucodec/doc/videodec.rst
+++ b/modules/gpucodec/doc/videodec.rst
@ -11,9 +11,9 @@ Video reader interface.

 .. ocv:class:: gpucodec::VideoReader

-.. Sample code::
+.. note::

-   * : An example on how to use the videoReader class can be found at opencv_source_code/samples/gpu/video_reader.cpp
+   * An example on how to use the videoReader class can be found at opencv_source_code/samples/gpu/video_reader.cpp


 gpucodec::VideoReader::nextFrame
--- a/modules/gpucodec/doc/videoenc.rst
+++ b/modules/gpucodec/doc/videoenc.rst
@ -15,9 +15,9 @@ The implementation uses H264 video codec.

 .. note:: Currently only Windows platform is supported.

-.. Sample code::
+.. note::

-   * : An example on how to use the videoWriter class can be found at opencv_source_code/samples/gpu/video_writer.cpp
+   * An example on how to use the videoWriter class can be found at opencv_source_code/samples/gpu/video_writer.cpp


 gpucodec::VideoWriter::write
--- a/modules/gpufilters/doc/filtering.rst
+++ b/modules/gpufilters/doc/filtering.rst
@ -5,9 +5,9 @@ Image Filtering

 Functions and classes described in this section are used to perform various linear or non-linear filtering operations on 2D images.

-.. Sample code::
+.. note::

-   * : An example containing all basic morphology operators like erode and dilate can be found at opencv_source_code/samples/gpu/morphology.cpp
+   * An example containing all basic morphology operators like erode and dilate can be found at opencv_source_code/samples/gpu/morphology.cpp

 gpu::Filter
 -----------
--- a/modules/gpuimgproc/doc/color.rst
+++ b/modules/gpuimgproc/doc/color.rst
@ -123,9 +123,9 @@ Composites two images using alpha opacity values contained in each image.

    :param stream: Stream for the asynchronous version.

-.. Sample code::
+.. note::

-   * : An example demonstrating the use of alphaComp can be found at opencv_source_code/samples/gpu/alpha_comp.cpp
+   * An example demonstrating the use of alphaComp can be found at opencv_source_code/samples/gpu/alpha_comp.cpp


 .. [MHT2011] Pascal Getreuer, Malvar-He-Cutler Linear Image Demosaicking, Image Processing On Line, 2011
--- a/modules/gpuimgproc/doc/hough.rst
+++ b/modules/gpuimgproc/doc/hough.rst
@ -108,9 +108,9 @@ Base class for line segments detector algorithm. ::
        virtual int getMaxLines() const = 0;
    };

-.. Sample code::
+.. note::

-   * : An example using the Hough segment detector can be found at opencv_source_code/samples/gpu/houghlines.cpp
+   * An example using the Hough segment detector can be found at opencv_source_code/samples/gpu/houghlines.cpp


 gpu::HoughSegmentDetector::detect
--- a/modules/gpuoptflow/doc/optflow.rst
+++ b/modules/gpuoptflow/doc/optflow.rst
@ -3,10 +3,10 @@ Optical Flow

 .. highlight:: cpp

-.. Sample code::
+.. note::

-   * : A general optical flow example can be found at opencv_source_code/samples/gpu/optical_flow.cpp
-   * : A feneral optical flow example using the nvidia API can be found at opencv_source_code/samples/gpu/opticalflow_nvidia_api.cpp
+   * A general optical flow example can be found at opencv_source_code/samples/gpu/optical_flow.cpp
+   * A general optical flow example using the Nvidia API can be found at opencv_source_code/samples/gpu/opticalflow_nvidia_api.cpp


 gpu::BroxOpticalFlow
@ -48,9 +48,9 @@ Class computing the optical flow for two images using Brox et al Optical Flow al
        GpuMat buf;
    };

-.. Sample code::
+.. note::

-   * : An example illustrating the Brox et al optical flow algorithm can be found at opencv_source_code/samples/gpu/brox_optical_flow.cpp
+   * An example illustrating the Brox et al optical flow algorithm can be found at opencv_source_code/samples/gpu/brox_optical_flow.cpp


 gpu::FarnebackOpticalFlow
@ -145,9 +145,9 @@ The class can calculate an optical flow for a sparse feature set or dense optica

 .. seealso:: :ocv:func:`calcOpticalFlowPyrLK`

-.. Sample code::
+.. note::

-   * : An example of the Lucas Kanade optical flow algorithm can be found at opencv_source_code/samples/gpu/pyrlk_optical_flow.cpp
+   * An example of the Lucas Kanade optical flow algorithm can be found at opencv_source_code/samples/gpu/pyrlk_optical_flow.cpp


 gpu::PyrLKOpticalFlow::sparse
--- a/modules/gpustereo/doc/stereo.rst
+++ b/modules/gpustereo/doc/stereo.rst
@ -3,11 +3,11 @@ Stereo Correspondence

 .. highlight:: cpp

-.. Sample code::
+.. note::

-   * : A basic stereo matching example can be found at opencv_source_code/samples/gpu/stereo_match.cpp
-   * : A stereo matching example using several GPU's can be found at opencv_source_code/samples/gpu/stereo_multi.cpp
-   * : A stereo matching example using several GPU's and driver API can be found at opencv_source_code/samples/gpu/driver_api_stereo_multi.cpp
+   * A basic stereo matching example can be found at opencv_source_code/samples/gpu/stereo_match.cpp
+   * A stereo matching example using several GPU's can be found at opencv_source_code/samples/gpu/stereo_multi.cpp
+   * A stereo matching example using several GPU's and driver API can be found at opencv_source_code/samples/gpu/driver_api_stereo_multi.cpp

 gpu::StereoBM
 -------------
--- a/modules/highgui/doc/reading_and_writing_images_and_video.rst
+++ b/modules/highgui/doc/reading_and_writing_images_and_video.rst
@ -223,14 +223,14 @@ The class provides C++ API for capturing video from cameras or for reading video

 .. note:: In C API the black-box structure ``CvCapture`` is used instead of ``VideoCapture``.

-.. Sample code::
+.. note::

-   * : A basic sample on using the VideoCapture interface can be found at opencv_source_code/samples/cpp/starter_video.cpp
-   * : Another basic video processing sample can be found at opencv_source_code/samples/cpp/video_dmtx.cpp
+   * A basic sample on using the VideoCapture interface can be found at opencv_source_code/samples/cpp/starter_video.cpp
+   * Another basic video processing sample can be found at opencv_source_code/samples/cpp/video_dmtx.cpp

-   * : PYTHON : A basic sample on using the VideoCapture interface can be found at opencv_source_code/samples/python2/video.py
-   * : PYTHON : basic video processing sample can be found at opencv_source_code/samples/python2/video_dmtx.py
-   * : PYTHON : A multi threaded video processing sample can be found at opencv_source_code/samples/python2/video_threaded.py
+   * (Python) A basic sample on using the VideoCapture interface can be found at opencv_source_code/samples/python2/video.py
+   * (Python) Another basic video processing sample can be found at opencv_source_code/samples/python2/video_dmtx.py
+   * (Python) A multi threaded video processing sample can be found at opencv_source_code/samples/python2/video_threaded.py


 VideoCapture::VideoCapture
--- a/modules/highgui/doc/user_interface.rst
+++ b/modules/highgui/doc/user_interface.rst
@ -33,9 +33,9 @@ The function ``createTrackbar`` creates a trackbar (a slider or range control) w

 Clicking the label of each trackbar enables editing the trackbar values manually.

-.. Sample code::
+.. note::

-   * : An example of using the trackbar functionality can be found at opencv_source_code/samples/cpp/connected_components.cpp
+   * An example of using the trackbar functionality can be found at opencv_source_code/samples/cpp/connected_components.cpp

 getTrackbarPos
 ------------------
--- a/modules/highgui/src/cap_dshow.cpp
+++ b/modules/highgui/src/cap_dshow.cpp
@ -3162,18 +3162,18 @@ void CvCaptureCAM_DShow::close()
 // Initialize camera input
 bool CvCaptureCAM_DShow::open( int _index )
 {
-    int try_index = _index;
    int devices = 0;

    close();
    devices = VI.listDevices(true);
    if (devices == 0)
        return false;
-    try_index = try_index < 0 ? 0 : (try_index > devices-1 ? devices-1 : try_index);
-    VI.setupDevice(try_index);
-    if( !VI.isDeviceSetup(try_index) )
+    if (_index < 0 || _index > devices-1)
        return false;
-    index = try_index;
+    VI.setupDevice(_index);
+    if( !VI.isDeviceSetup(_index) )
+        return false;
+    index = _index;
    return true;
 }

--- a/modules/highgui/src/cap_ffmpeg_impl.hpp
+++ b/modules/highgui/src/cap_ffmpeg_impl.hpp
@ -1392,8 +1392,6 @@ bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int
 /// close video output stream and free associated memory
 void CvVideoWriter_FFMPEG::close()
 {
-    unsigned i;
-
    // nothing to do if already released
    if ( !picture )
        return;
@ -1449,13 +1447,6 @@ void CvVideoWriter_FFMPEG::close()

    av_free(outbuf);

-    /* free the streams */
-    for(i = 0; i < oc->nb_streams; i++)
-    {
-        av_freep(&oc->streams[i]->codec);
-        av_freep(&oc->streams[i]);
-    }
-
    if (!(fmt->flags & AVFMT_NOFILE))
    {
        /* close the output file */
@ -1473,7 +1464,7 @@ void CvVideoWriter_FFMPEG::close()
    }

    /* free the stream */
-    av_free(oc);
+    avformat_free_context(oc);

    if( temp_image.data )
    {
--- a/modules/imgproc/doc/feature_detection.rst
+++ b/modules/imgproc/doc/feature_detection.rst
@ -30,11 +30,11 @@ Finds edges in an image using the [Canny86]_ algorithm.
 The function finds edges in the input image ``image`` and marks them in the output map ``edges`` using the Canny algorithm. The smallest value between ``threshold1`` and ``threshold2`` is used for edge linking. The largest value is used to find initial segments of strong edges. See
 http://en.wikipedia.org/wiki/Canny_edge_detector

-.. Sample code::
+.. note::

-   * : An example on using the canny edge detector can be found at opencv_source_code/samples/cpp/edge.cpp
+   * An example on using the canny edge detector can be found at opencv_source_code/samples/cpp/edge.cpp

-   * : PYTHON : An example on using the canny edge detector can be found at opencv_source_code/samples/cpp/edge.py
+   * (Python) An example on using the canny edge detector can be found at opencv_source_code/samples/cpp/edge.py

 cornerEigenValsAndVecs
 ----------------------
@ -85,9 +85,9 @@ The output of the function can be used for robust edge or corner detection.
    :ocv:func:`cornerHarris`,
    :ocv:func:`preCornerDetect`

-.. Sample code::
+.. note::

-   * : PYTHON : An example on how to use eigenvectors and eigenvalues to estimate image texture flow direction can be found at opencv_source_code/samples/python2/texture_flow.py
+   * (Python) An example on how to use eigenvectors and eigenvalues to estimate image texture flow direction can be found at opencv_source_code/samples/python2/texture_flow.py

 cornerHarris
 ------------
@ -350,9 +350,9 @@ Example: ::
    :ocv:func:`fitEllipse`,
    :ocv:func:`minEnclosingCircle`

-.. Sample code::
+.. note::

-   * : An example using the Hough circle detector can be found at opencv_source_code/samples/cpp/houghcircles.cpp
+   * An example using the Hough circle detector can be found at opencv_source_code/samples/cpp/houghcircles.cpp

 HoughLines
 ----------
@ -407,9 +407,9 @@ Finds lines in a binary image using the standard Hough transform.
 The function implements the standard or standard multi-scale Hough transform algorithm for line detection.  See http://homepages.inf.ed.ac.uk/rbf/HIPR2/hough.htm for a good explanation of Hough transform.
 See also the example in :ocv:func:`HoughLinesP` description.

-.. Sample code::
+.. note::

-   * : An example using the Hough line detector can be found at opencv_source_code/samples/cpp/houghlines.cpp
+   * An example using the Hough line detector can be found at opencv_source_code/samples/cpp/houghlines.cpp

 HoughLinesP
 -----------
--- a/modules/imgproc/doc/filtering.rst
+++ b/modules/imgproc/doc/filtering.rst
@ -22,9 +22,9 @@ OpenCV enables you to specify the extrapolation method. For details, see the fun
    * BORDER_CONSTANT:      iiiiii|abcdefgh|iiiiiii  with some specified 'i'
    */

-.. Sample code::
+.. note::

-   * : PYTHON : A complete example illustrating different morphological operations like erode/dilate, open/close, blackhat/tophat ... can be found at opencv_source_code/samples/python2/morphology.py
+   * (Python) A complete example illustrating different morphological operations like erode/dilate, open/close, blackhat/tophat ... can be found at opencv_source_code/samples/python2/morphology.py

 BaseColumnFilter
 ----------------
@ -785,9 +785,9 @@ The function supports the in-place mode. Dilation can be applied several ( ``ite
    :ocv:func:`getStructuringElement`


-.. Sample code::
+.. note::

-   * : An example using the morphological dilate operation can be found at opencv_source_code/samples/cpp/morphology2.cpp
+   * An example using the morphological dilate operation can be found at opencv_source_code/samples/cpp/morphology2.cpp

    

@ -831,9 +831,9 @@ The function supports the in-place mode. Erosion can be applied several ( ``iter
    :ocv:func:`createMorphologyFilter`,
    :ocv:func:`getStructuringElement`

-.. Sample code::
+.. note::

-   * : An example using the morphological erode operation can be found at opencv_source_code/samples/cpp/morphology2.cpp
+   * An example using the morphological erode operation can be found at opencv_source_code/samples/cpp/morphology2.cpp

 filter2D
 --------
@ -1194,9 +1194,9 @@ Any of the operations can be done in-place. In case of multi-channel images, eac
    :ocv:func:`createMorphologyFilter`,
    :ocv:func:`getStructuringElement`

-.. Sample code::
+.. note::

-   * : An example using the morphologyEx function for the morphological opening and closing operations can be found at opencv_source_code/samples/cpp/morphology2.cpp
+   * An example using the morphologyEx function for the morphological opening and closing operations can be found at opencv_source_code/samples/cpp/morphology2.cpp

 Laplacian
 ---------
@ -1240,9 +1240,9 @@ This is done when ``ksize > 1`` . When ``ksize == 1`` , the Laplacian is compute
    :ocv:func:`Sobel`,
    :ocv:func:`Scharr`

-.. Sample code::
+.. note::

-   * : An example using the Laplace transformation for edge detection can be found at opencv_source_code/samples/cpp/laplace.cpp
+   * An example using the Laplace transformation for edge detection can be found at opencv_source_code/samples/cpp/laplace.cpp

 pyrDown
 -------
@ -1299,9 +1299,9 @@ Upsamples an image and then blurs it.
 The function performs the upsampling step of the Gaussian pyramid construction, though it can actually be used to construct the Laplacian pyramid. First, it upsamples the source image by injecting even zero rows and columns and then convolves the result with the same kernel as in
 :ocv:func:`pyrDown`  multiplied by 4.

-.. Sample code::
+.. note::

-   * : PYTHON : An example of Laplacian Pyramid construction and merging can be found at opencv_source_code/samples/python2/lappyr.py
+   * (Python) An example of Laplacian Pyramid construction and merging can be found at opencv_source_code/samples/python2/lappyr.py


 pyrMeanShiftFiltering
@ -1350,9 +1350,9 @@ After the iterations over, the color components of the initial pixel (that is, t

 When ``maxLevel > 0``, the gaussian pyramid of ``maxLevel+1`` levels is built, and the above procedure is run on the smallest layer first. After that, the results are propagated to the larger layer and the iterations are run again only on those pixels where the layer colors differ by more than ``sr`` from the lower-resolution layer of the pyramid. That makes boundaries of color regions sharper. Note that the results will be actually different from the ones obtained by running the meanshift procedure on the whole original image (i.e. when ``maxLevel==0``).

-.. Sample code::
+.. note::

-   * : An example using mean-shift image segmentation can be found at opencv_source_code/samples/cpp/meanshift_segmentation.cpp
+   * An example using mean-shift image segmentation can be found at opencv_source_code/samples/cpp/meanshift_segmentation.cpp

 sepFilter2D
 -----------
--- a/modules/imgproc/doc/geometric_transformations.rst
+++ b/modules/imgproc/doc/geometric_transformations.rst
@ -298,9 +298,9 @@ where

 The function emulates the human "foveal" vision and can be used for fast scale and rotation-invariant template matching, for object tracking and so forth. The function can not operate in-place.

-.. Sample code::
+.. note::

-   * : An example using the geometric logpolar operation in 4 applications can be found at opencv_source_code/samples/cpp/logpolar_bsm.cpp
+   * An example using the geometric logpolar operation in 4 applications can be found at opencv_source_code/samples/cpp/logpolar_bsm.cpp

 remap
 -----
--- a/modules/imgproc/doc/histograms.rst
+++ b/modules/imgproc/doc/histograms.rst
@ -98,12 +98,12 @@ input arrays at the same location. The sample below shows how to compute a 2D Hu
        waitKey();
    }

-.. Sample code::
+.. note::

-   * : An example for creating histograms of an image can be found at opencv_source_code/samples/cpp/demhist.cpp
+   * An example for creating histograms of an image can be found at opencv_source_code/samples/cpp/demhist.cpp

-   * : PYTHON : An example for creating color histograms can be found at opencv_source/samples/python2/color_histogram.py
-   * : PYTHON : An example illustrating RGB and grayscale histogram plotting can be found at opencv_source/samples/python2/hist.py
+   * (Python) An example for creating color histograms can be found at opencv_source/samples/python2/color_histogram.py
+   * (Python) An example illustrating RGB and grayscale histogram plotting can be found at opencv_source/samples/python2/hist.py


 calcBackProject
--- a/modules/imgproc/doc/miscellaneous_transformations.rst
+++ b/modules/imgproc/doc/miscellaneous_transformations.rst
@ -476,11 +476,11 @@ In this mode, the complexity is still linear.
 That is, the function provides a very fast way to compute the Voronoi diagram for a binary image.
 Currently, the second variant can use only the approximate distance transform algorithm, i.e. ``maskSize=CV_DIST_MASK_PRECISE`` is not supported yet.

-.. Sample code::
+.. note::

-   * : An example on using the distance transform can be found at opencv_source_code/samples/cpp/distrans.cpp
+   * An example on using the distance transform can be found at opencv_source_code/samples/cpp/distrans.cpp

-   * : PYTHON : An example on using the distance transform can be found at opencv_source/samples/python2/distrans.py
+   * (Python) An example on using the distance transform can be found at opencv_source/samples/python2/distrans.py

 floodFill
 ---------
@ -584,11 +584,11 @@ Use these functions to either mark a connected component with the specified colo

 .. seealso:: :ocv:func:`findContours`

-.. Sample code::
+.. note::

-   * : An example using the FloodFill technique can be found at opencv_source_code/samples/cpp/ffilldemo.cpp
+   * An example using the FloodFill technique can be found at opencv_source_code/samples/cpp/ffilldemo.cpp

-   * : PYTHON : An example using the FloodFill technique can be found at opencv_source_code/samples/python2/floodfill.cpp
+   * (Python) An example using the FloodFill technique can be found at opencv_source_code/samples/python2/floodfill.cpp

 integral
 --------
@ -748,11 +748,11 @@ Visual demonstration and usage example of the function can be found in the OpenC

 .. seealso:: :ocv:func:`findContours`

-.. Sample code::
+.. note::

-   * : An example using the watershed algorithm can be found at opencv_source_code/samples/cpp/watershed.cpp
+   * An example using the watershed algorithm can be found at opencv_source_code/samples/cpp/watershed.cpp

-   * : PYTHON : An example using the watershed algorithm can be found at opencv_source_code/samples/python2/watershed.py
+   * (Python) An example using the watershed algorithm can be found at opencv_source_code/samples/python2/watershed.py

 grabCut
 -------
@ -800,8 +800,8 @@ See the sample ``grabcut.cpp`` to learn how to use the function.
 .. [Meyer92] Meyer, F. *Color Image Segmentation*, ICIP92, 1992


-.. Sample code::
+.. note::

-   * : An example using the GrabCut algorithm can be found at opencv_source_code/samples/cpp/grabcut.cpp
+   * An example using the GrabCut algorithm can be found at opencv_source_code/samples/cpp/grabcut.cpp

-   * : PYTHON : An example using the GrabCut algorithm can be found at opencv_source_code/samples/python2/grabcut.py
+   * (Python) An example using the GrabCut algorithm can be found at opencv_source_code/samples/python2/grabcut.py
--- a/modules/imgproc/doc/object_detection.rst
+++ b/modules/imgproc/doc/object_detection.rst
@ -73,6 +73,6 @@ image patch:
 After the function finishes the comparison, the best matches can be found as global minimums (when ``CV_TM_SQDIFF`` was used) or maximums (when ``CV_TM_CCORR`` or ``CV_TM_CCOEFF`` was used) using the
 :ocv:func:`minMaxLoc` function. In case of a color image, template summation in the numerator and each sum in the denominator is done over all of the channels and separate mean values are used for each channel. That is, the function can take a color template and a color image. The result will still be a single-channel image, which is easier to analyze.

-.. Sample code::
+.. note::

-   * : PYTHON : An example on how to match mouse selected regions in an image can be found at opencv_source_code/samples/python2/mouse_and_match.py
+   * (Python) An example on how to match mouse selected regions in an image can be found at opencv_source_code/samples/python2/mouse_and_match.py
--- a/modules/imgproc/doc/structural_analysis_and_shape_descriptors.rst
+++ b/modules/imgproc/doc/structural_analysis_and_shape_descriptors.rst
@ -192,13 +192,13 @@ The function retrieves contours from the binary image using the algorithm

 .. note:: If you use the new Python interface then the ``CV_`` prefix has to be omitted in contour retrieval mode and contour approximation method parameters (for example, use ``cv2.RETR_LIST`` and ``cv2.CHAIN_APPROX_NONE`` parameters). If you use the old Python interface then these parameters have the ``CV_`` prefix (for example, use ``cv.CV_RETR_LIST`` and ``cv.CV_CHAIN_APPROX_NONE``).

-.. Sample code::
+.. note::

-   * : An example using the findContour functionality can be found at opencv_source_code/samples/cpp/contours2.cpp
-   * : An example using findContours to clean up a background segmentation result at opencv_source_code/samples/cpp/segment_objects.cpp
+   * An example using the findContour functionality can be found at opencv_source_code/samples/cpp/contours2.cpp
+   * An example using findContours to clean up a background segmentation result at opencv_source_code/samples/cpp/segment_objects.cpp

-   * : PYTHON : An example using the findContour functionality can be found at opencv_source/samples/python2/contours.py
-   * : PYTHON : An example of detecting squares in an image can be found at opencv_source/samples/python2/squares.py
+   * (Python) An example using the findContour functionality can be found at opencv_source/samples/python2/contours.py
+   * (Python) An example of detecting squares in an image can be found at opencv_source/samples/python2/squares.py


 approxPolyDP
@ -361,9 +361,9 @@ The functions find the convex hull of a 2D point set using the Sklansky's algori
 that has
 *O(N logN)* complexity in the current implementation. See the OpenCV sample ``convexhull.cpp`` that demonstrates the usage of different function variants.

-.. Sample code::
+.. note::

-   * : An example using the convexHull functionality can be found at opencv_source_code/samples/cpp/convexhull.cpp
+   * An example using the convexHull functionality can be found at opencv_source_code/samples/cpp/convexhull.cpp


 convexityDefects
@ -418,9 +418,9 @@ Fits an ellipse around a set of 2D points.

 The function calculates the ellipse that fits (in a least-squares sense) a set of 2D points best of all. It returns the rotated rectangle in which the ellipse is inscribed. The algorithm [Fitzgibbon95]_ is used.

-.. Sample code::
+.. note::

-   * : An example using the fitEllipse technique can be found at opencv_source_code/samples/cpp/fitellipse.cpp
+   * An example using the fitEllipse technique can be found at opencv_source_code/samples/cpp/fitellipse.cpp


 fitLine
@ -495,7 +495,7 @@ http://en.wikipedia.org/wiki/M-estimator

 .. Sample code:

-   * : PYTHON : An example of robust line fitting can be found at opencv_source_code/samples/python2/fitline.py
+   * (Python) An example of robust line fitting can be found at opencv_source_code/samples/python2/fitline.py


 isContourConvex
--- a/modules/imgproc/src/morph.cpp
+++ b/modules/imgproc/src/morph.cpp
@ -1136,11 +1136,151 @@ private:
    Scalar borderValue;
 };

+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kernel, const Point &anchor)
+{
+	int type = src.type();
+	const Mat* _src = &src;
+	Mat temp;
+	if( src.data == dst.data )
+	{
+		src.copyTo(temp);
+		_src = &temp;
+	}
+	//DEPRECATED. Allocates and initializes morphology state structure for erosion or dilation operation.
+	typedef IppStatus (CV_STDCALL* ippiMorphologyInitAllocFunc)(int, const void*, IppiSize, IppiPoint, IppiMorphState **);
+	ippiMorphologyInitAllocFunc ippInitAllocFunc = 
+		type == CV_8UC1 ? (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_8u_C1R : 
+		type == CV_8UC3 ? (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_8u_C3R : 
+		type == CV_8UC4 ? (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_8u_C4R : 
+		type == CV_32FC1 ? (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_32f_C1R : 
+		type == CV_32FC3 ? (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_32f_C3R :
+		type == CV_32FC4 ? (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_32f_C4R :
+		0;
+	typedef IppStatus (CV_STDCALL* ippiMorphologyBorderReplicateFunc)(const void*, int, void *, int, IppiSize, IppiBorderType, IppiMorphState *);
+	ippiMorphologyBorderReplicateFunc ippFunc = 0;
+	switch( op )
+	{
+	case MORPH_DILATE:
+		{
+			ippFunc = 
+				type == CV_8UC1 ? (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_8u_C1R : 
+				type == CV_8UC3 ? (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_8u_C3R : 
+				type == CV_8UC4 ? (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_8u_C4R : 
+				type == CV_32FC1 ? (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_32f_C1R :  
+				type == CV_32FC3 ? (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_32f_C3R :  
+				type == CV_32FC4 ? (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_32f_C4R :  
+				0;
+			break;
+		}
+	case MORPH_ERODE:
+		{
+			ippFunc = 
+				type == CV_8UC1 ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_8u_C1R : 
+				type == CV_8UC3 ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_8u_C3R : 
+				type == CV_8UC4 ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_8u_C4R : 
+				type == CV_32FC1 ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_32f_C1R :  
+				type == CV_32FC3 ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_32f_C3R :  
+				type == CV_32FC4 ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_32f_C4R :  
+				0;
+			break;
+		}
+	}
+	if( ippFunc && ippInitAllocFunc)
+	{
+		IppiMorphState* pState;
+		IppiSize roiSize = {src.cols, src.rows};
+		IppiSize kernelSize = {kernel.cols, kernel.rows};
+		IppiPoint point = {anchor.x, anchor.y};
+		if( ippInitAllocFunc( roiSize.width, kernel.data, kernelSize, point, &pState ) < 0 )
+			return false;
+		bool is_ok = ippFunc( _src->data, _src->step[0],  dst.data, dst.step[0], roiSize, ippBorderRepl, pState ) >= 0;
+		ippiMorphologyFree(pState);
+		return is_ok;
+	}
+	return false;
+}
+
+static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,
+	InputArray _kernel,
+	const Point &anchor, int iterations,
+	int borderType, const Scalar &borderValue)
+{
+	Mat src = _src.getMat(), kernel = _kernel.getMat();
+	if( !( src.depth() == CV_8U || src.depth() == CV_32F ) || ( iterations > 1 ) || 
+		!( borderType == cv::BORDER_REPLICATE || (borderType == cv::BORDER_CONSTANT && borderValue == morphologyDefaultBorderValue()) ) 
+		|| !( op == MORPH_DILATE || op == MORPH_ERODE) )
+		return false;
+	if( borderType == cv::BORDER_CONSTANT )
+	{
+		int x, y;
+		for( y = 0; y < kernel.rows; y++ )
+		{
+			if( kernel.at<uchar>(y, anchor.x) != 0 )
+				continue;
+			for( x = 0; x < kernel.cols; x++ )
+			{
+				if( kernel.at<uchar>(y,x) != 0 )
+					return false;
+			}
+		}
+		for( x = 0; y < kernel.cols; x++ )
+		{
+			if( kernel.at<uchar>(anchor.y, x) != 0 )
+				continue;
+			for( y = 0; y < kernel.rows; y++ )
+			{
+				if( kernel.at<uchar>(y,x) != 0 )
+					return false;
+			}
+		}
+
+	}
+	Size ksize = kernel.data ? kernel.size() : Size(3,3);
+	Point normanchor = normalizeAnchor(anchor, ksize);
+
+	CV_Assert( normanchor.inside(Rect(0, 0, ksize.width, ksize.height)) );
+
+	_dst.create( src.size(), src.type() );
+	Mat dst = _dst.getMat();
+
+	if( iterations == 0 || kernel.rows*kernel.cols == 1 )
+	{
+		src.copyTo(dst);
+		return true;
+	}
+
+	if( !kernel.data )
+	{
+		kernel = getStructuringElement(MORPH_RECT, Size(1+iterations*2,1+iterations*2));
+		normanchor = Point(iterations, iterations);
+		iterations = 1;
+	}
+	else if( iterations > 1 && countNonZero(kernel) == kernel.rows*kernel.cols )
+	{
+		normanchor = Point(normanchor.x*iterations, normanchor.y*iterations);
+		kernel = getStructuringElement(MORPH_RECT,
+			Size(ksize.width + (iterations-1)*(ksize.width-1),
+			ksize.height + (iterations-1)*(ksize.height-1)),
+			normanchor);
+		iterations = 1;
+	}
+
+	return IPPMorphReplicate( op, src, dst, kernel, normanchor );
+}
+#endif
+
 static void morphOp( int op, InputArray _src, OutputArray _dst,
                     InputArray _kernel,
                     Point anchor, int iterations,
                     int borderType, const Scalar& borderValue )
 {
+
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+	if( IPPMorphOp(op, _src, _dst, _kernel, anchor, iterations, borderType, borderValue) )
+		return;
+#endif
+
    Mat src = _src.getMat(), kernel = _kernel.getMat();
    Size ksize = kernel.data ? kernel.size() : Size(3,3);
    anchor = normalizeAnchor(anchor, ksize);
--- a/modules/imgproc/src/samplers.cpp
+++ b/modules/imgproc/src/samplers.cpp
@ -267,6 +267,11 @@ static void getRectSubPix_8u32f
    }
 }

+typedef CvStatus (CV_STDCALL *CvIPPGetRectSubPixFunc)( const void* src, int src_step,
+                                                       CvSize src_size, void* dst,
+                                                       int dst_step, CvSize win_size,
+                                                       CvPoint2D32f center,
+                                                       CvPoint* minpt, CvPoint* maxpt );

 static void
 getQuadrangleSubPix_8u32f_CnR( const uchar* src, size_t src_step, Size src_size,
@ -374,6 +379,19 @@ void cv::getRectSubPix( InputArray _image, Size patchSize, Point2f center,
    _patch.create(patchSize, CV_MAKETYPE(ddepth, cn));
    Mat patch = _patch.getMat();

+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    CvPoint minpt, maxpt;
+    int srctype = CV_MAT_TYPE(src->type), dsttype = CV_MAT_TYPE(dst->type);
+    CvIPPGetRectSubPixFunc ippfunc =
+        srctype == CV_8UC1 && dsttype == CV_8UC1 ? (CvIPPGetRectSubPixFunc)ippiCopySubpixIntersect_8u_C1R :
+        srctype == CV_8UC1 && dsttype == CV_32FC1 ? (CvIPPGetRectSubPixFunc)ippiCopySubpixIntersect_8u32f_C1R :
+        srctype == CV_32FC1 && dsttype == CV_32FC1 ? (CvIPPGetRectSubPixFunc)ippiCopySubpixIntersect_32f_C1R : 0;
+
+    if( ippfunc && ippfunc(src->data.ptr, src->step, src_size, dst->data.ptr,
+                           dst->step, dst_size, center, &minpt, &maxpt) >= 0 )
+        return;
+#endif
+
    if( depth == CV_8U && ddepth == CV_8U )
        getRectSubPix_Cn_<uchar, uchar, int, scale_fixpt, cast_8u>
        (image.data, image.step, image.size(), patch.data, patch.step, patch.size(), center, cn);
--- a/modules/imgproc/src/smooth.cpp
+++ b/modules/imgproc/src/smooth.cpp
@ -856,6 +856,22 @@ void cv::GaussianBlur( InputArray _src, OutputArray _dst, Size ksize,
        return;
 #endif

+#if defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7)
+    if(src.type() == CV_32FC1 && sigma1 == sigma2 && ksize.width == ksize.height && sigma1 != 0.0 )
+    {
+        IppiSize roi = {src.cols, src.rows};
+        int bufSize = 0;
+        ippiFilterGaussGetBufferSize_32f_C1R(roi, ksize.width, &bufSize);
+        AutoBuffer<uchar> buf(bufSize+128);
+        if( ippiFilterGaussBorder_32f_C1R((const Ipp32f *)src.data, (int)src.step,
+                                          (Ipp32f *)dst.data, (int)dst.step,
+                                          roi, ksize.width, (Ipp32f)sigma1,
+                                          (IppiBorderType)borderType, 0.0,
+                                          alignPtr(&buf[0],32)) >= 0 )
+            return;
+    }
+#endif
+
    Ptr<FilterEngine> f = createGaussianFilter( src.type(), ksize, sigma1, sigma2, borderType );
    f->apply( src, dst );
 }
@ -1892,6 +1908,29 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d,
    radius = MAX(radius, 1);
    d = radius*2 + 1;

+#if 0 && defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7)
+    if(cn == 1)
+    {
+        IppiSize kernel = {d, d};
+        IppiSize roi={src.cols, src.rows};
+        int bufsize=0;
+        ippiFilterBilateralGetBufSize_8u_C1R( ippiFilterBilateralGauss, roi, kernel, &bufsize);
+        AutoBuffer<uchar> buf(bufsize+128);
+        IppiFilterBilateralSpec *pSpec = (IppiFilterBilateralSpec *)alignPtr(&buf[0], 32);
+        ippiFilterBilateralInit_8u_C1R( ippiFilterBilateralGauss, kernel, sigma_color*sigma_color, sigma_space*sigma_space, 1, pSpec );
+        Mat tsrc;
+        const Mat* psrc = &src;
+        if( src.data == dst.data )
+        {
+            src.copyTo(tsrc);
+            psrc = &tsrc;
+        }
+        if( ippiFilterBilateral_8u_C1R(psrc->data, (int)psrc->step[0],
+                                       dst.data, (int)dst.step[0],
+                                       roi, kernel, pSpec) >= 0 )
+            return;
+    }
+#endif
    Mat temp;
    copyMakeBorder( src, temp, radius, radius, radius, radius, borderType );

--- a/modules/legacy/doc/expectation_maximization.rst
+++ b/modules/legacy/doc/expectation_maximization.rst
@ -5,11 +5,11 @@ This section describes obsolete ``C`` interface of EM algorithm. Details of the

 .. highlight:: cpp

-.. Sample code::
+.. note::

-   * : An example on using the Expectation Maximalization algorithm can be found at opencv_source_code/samples/cpp/em.cpp
+   * An example on using the Expectation Maximalization algorithm can be found at opencv_source_code/samples/cpp/em.cpp

-   * : PYTHON : An example using Expectation Maximalization for Gaussian Mixing can be found at opencv_source_code/samples/python2/gaussian_mix.py
+   * (Python) An example using Expectation Maximalization for Gaussian Mixing can be found at opencv_source_code/samples/python2/gaussian_mix.py


 CvEMParams
--- a/modules/legacy/doc/feature_detection_and_description.rst
+++ b/modules/legacy/doc/feature_detection_and_description.rst
@ -75,7 +75,7 @@ Class containing a base structure for ``RTreeClassifier``. ::
            void estimateQuantPercForPosteriors(float perc[2]);
    };

-.. Sample code::
+.. note::

   * : PYTHON : An example using Randomized Tree training for letter recognition can be found at opencv_source_code/samples/python2/letter_recog.py

@ -101,7 +101,7 @@ Trains a randomized tree using an input set of keypoints.

    :param num_quant_bits: Number of bits used for quantization.

-.. Sample code::
+.. note::

   * : An example on training a Random Tree Classifier for letter recognition can be found at opencv_source_code\samples\cpp\letter_recog.cpp

--- a/modules/ml/doc/k_nearest_neighbors.rst
+++ b/modules/ml/doc/k_nearest_neighbors.rst
@ -11,11 +11,11 @@ CvKNearest

 The class implements K-Nearest Neighbors model as described in the beginning of this section.

-.. Sample code::
+.. note::

-   * : PYTHON : An example of digit recognition using KNearest can be found at opencv_source/samples/python2/digits.py
-   * : PYTHON : An example of grid search digit recognition using KNearest can be found at opencv_source/samples/python2/digits_adjust.py
-   * : PYTHON : An example of video digit recognition using KNearest can be found at opencv_source/samples/python2/digits_video.py
+   * (Python) An example of digit recognition using KNearest can be found at opencv_source/samples/python2/digits.py
+   * (Python) An example of grid search digit recognition using KNearest can be found at opencv_source/samples/python2/digits_adjust.py
+   * (Python) An example of video digit recognition using KNearest can be found at opencv_source/samples/python2/digits_video.py

 CvKNearest::CvKNearest
 ----------------------
--- a/modules/ml/doc/support_vector_machines.rst
+++ b/modules/ml/doc/support_vector_machines.rst
@ -158,11 +158,11 @@ CvSVM

 Support Vector Machines.

-.. Sample code::
+.. note::

-   * : PYTHON : An example of digit recognition using SVM can be found at opencv_source/samples/python2/digits.py
-   * : PYTHON : An example of grid search digit recognition using SVM can be found at opencv_source/samples/python2/digits_adjust.py
-   * : PYTHON : An example of video digit recognition using SVM can be found at opencv_source/samples/python2/digits_video.py
+   * (Python) An example of digit recognition using SVM can be found at opencv_source/samples/python2/digits.py
+   * (Python) An example of grid search digit recognition using SVM can be found at opencv_source/samples/python2/digits_adjust.py
+   * (Python) An example of video digit recognition using SVM can be found at opencv_source/samples/python2/digits_video.py

 CvSVM::CvSVM
 ------------
--- a/modules/nonfree/doc/feature_detection.rst
+++ b/modules/nonfree/doc/feature_detection.rst
@ -84,10 +84,10 @@ SURF

 .. [Bay06] Bay, H. and Tuytelaars, T. and Van Gool, L. "SURF: Speeded Up Robust Features", 9th European Conference on Computer Vision, 2006

-.. Sample code::
+.. note::

-   * : An example using the SURF feature detector can be found at opencv_source_code/samples/cpp/generic_descriptor_match.cpp
-   * : Another example using the SURF feature detector, extractor and matcher can be found at opencv_source_code/samples/cpp/matcher_simple.cpp
+   * An example using the SURF feature detector can be found at opencv_source_code/samples/cpp/generic_descriptor_match.cpp
+   * Another example using the SURF feature detector, extractor and matcher can be found at opencv_source_code/samples/cpp/matcher_simple.cpp

 SURF::SURF
 ----------
@ -243,9 +243,9 @@ The class ``SURF_GPU`` uses some buffers and provides access to it. All buffers

 .. seealso:: :ocv:class:`SURF`

-.. Sample code::
+.. note::

-   * : An example for using the SURF keypoint matcher on GPU can be found at opencv_source_code/samples/gpu/surf_keypoint_matcher.cpp
+   * An example for using the SURF keypoint matcher on GPU can be found at opencv_source_code/samples/gpu/surf_keypoint_matcher.cpp

 ocl::SURF_OCL
 -------------
@ -345,6 +345,6 @@ The class ``SURF_OCL`` uses some buffers and provides access to it. All buffers

 .. seealso:: :ocv:class:`SURF`

-.. Sample code::
+.. note::

-   * : OCL : An example of the SURF detector can be found at opencv_source_code/samples/ocl/surf_matcher.cpp
+   * OCL : An example of the SURF detector can be found at opencv_source_code/samples/ocl/surf_matcher.cpp
--- a/modules/objdetect/doc/cascade_classification.rst
+++ b/modules/objdetect/doc/cascade_classification.rst
@ -216,9 +216,9 @@ Detects objects of different sizes in the input image. The detected objects are

 The function is parallelized with the TBB library.

-.. Sample code::
+.. note::

-   * : PYTHON : A face detection example using cascade classifiers can be found at opencv_source_code/samples/python2/facedetect.py
+   * (Python) A face detection example using cascade classifiers can be found at opencv_source_code/samples/python2/facedetect.py


 CascadeClassifier::setImage
--- a/modules/ocl/doc/feature_detection_and_description.rst
+++ b/modules/ocl/doc/feature_detection_and_description.rst
@ -363,9 +363,9 @@ The class implements Histogram of Oriented Gradients ([Dalal2005]_) object detec

 Interfaces of all methods are kept similar to the ``CPU HOG`` descriptor and detector analogues as much as possible.

-.. Sample code::
+.. note::

-   * : OCL : An example using the HOG descriptor can be found at opencv_source_code/samples/ocl/hog.cpp
+   (Ocl) An example using the HOG descriptor can be found at opencv_source_code/samples/ocl/hog.cpp

 ocl::HOGDescriptor::HOGDescriptor
 -------------------------------------
--- a/modules/ocl/doc/image_processing.rst
+++ b/modules/ocl/doc/image_processing.rst
@ -257,10 +257,10 @@ The class can calculate an optical flow for a sparse feature set or dense optica

 .. seealso:: :ocv:func:`calcOpticalFlowPyrLK`

-.. Sample code::
+.. note::

-   * : OCL : An example the Lucas Kanade optical flow pyramid method can be found at opencv_source_code/samples/ocl/pyrlk_optical_flow.cpp
-   * : OCL : An example for square detection can be found at opencv_source_code/samples/ocl/squares.cpp
+   (Ocl) An example the Lucas Kanade optical flow pyramid method can be found at opencv_source_code/samples/ocl/pyrlk_optical_flow.cpp
+   (Ocl) An example for square detection can be found at opencv_source_code/samples/ocl/squares.cpp

 ocl::PyrLKOpticalFlow::sparse
 -----------------------------
--- a/modules/ocl/doc/object_detection.rst
+++ b/modules/ocl/doc/object_detection.rst
@ -17,9 +17,9 @@ Cascade classifier class used for object detection. Supports HAAR cascade classi
                                              Size minSize = Size(), Size maxSize = Size());
    };

-.. Sample code::
+.. note::

-   * : OCL : A face detection example using cascade classifiers can be found at opencv_source_code/samples/ocl/facedetect.cpp
+   (Ocl) A face detection example using cascade classifiers can be found at opencv_source_code/samples/ocl/facedetect.cpp

 ocl::OclCascadeClassifier::oclHaarDetectObjects
 ------------------------------------------------------
--- a/modules/ocl/doc/operations_on_matrices.rst
+++ b/modules/ocl/doc/operations_on_matrices.rst
@ -482,3 +482,39 @@ Performs generalized matrix multiplication.
            * **GEMM_2_T** transpose  ``src2``

 .. seealso:: :ocv:func:`gemm`
+
+ocl::sortByKey
+------------------
+Returns void
+
+.. ocv:function:: void ocl::sortByKey(oclMat& keys, oclMat& values, int method, bool isGreaterThan = false)
+
+    :param keys:   The keys to be used as sorting indices.
+
+    :param values: The array of values.
+
+    :param isGreaterThan: Determine sorting order.
+
+    :param method: supported sorting methods:
+            * **SORT_BITONIC**   bitonic sort, only support power-of-2 buffer size
+            * **SORT_SELECTION** selection sort, currently cannot sort duplicate keys
+            * **SORT_MERGE**     merge sort
+            * **SORT_RADIX**     radix sort, only support signed int/float keys(``CV_32S``/``CV_32F``)
+            
+Returns the sorted result of all the elements in values based on equivalent keys.
+
+The element unit in the values to be sorted is determined from the data type, 
+i.e., a ``CV_32FC2`` input ``{a1a2, b1b2}`` will be considered as two elements, regardless its matrix dimension.
+
+Both keys and values will be sorted inplace. 
+
+Keys needs to be a **single** channel `oclMat`.
+
+Example::
+    input -
+    keys   = {2,    3,   1}   (CV_8UC1)
+    values = {10,5, 4,3, 6,2} (CV_8UC2)
+    sortByKey(keys, values, SORT_SELECTION, false);
+    output -
+    keys   = {1,    2,   3}   (CV_8UC1)
+    values = {6,2, 10,5, 4,3} (CV_8UC2)
--- a/modules/ocl/include/opencv2/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl.hpp
@ -1772,6 +1772,31 @@ namespace cv
            oclMat diff_buf;
            oclMat norm_buf;
        };
+        // current supported sorting methods
+        enum
+        {
+            SORT_BITONIC,   // only support power-of-2 buffer size
+            SORT_SELECTION, // cannot sort duplicate keys
+            SORT_MERGE,
+            SORT_RADIX      // only support signed int/float keys(CV_32S/CV_32F)
+        };
+        //! Returns the sorted result of all the elements in input based on equivalent keys.
+        //
+        //  The element unit in the values to be sorted is determined from the data type, 
+        //  i.e., a CV_32FC2 input {a1a2, b1b2} will be considered as two elements, regardless its
+        //  matrix dimension.
+        //  both keys and values will be sorted inplace
+        //  Key needs to be single channel oclMat.
+        //
+        //  Example:
+        //  input -
+        //    keys   = {2,    3,   1}   (CV_8UC1)
+        //    values = {10,5, 4,3, 6,2} (CV_8UC2)
+        //  sortByKey(keys, values, SORT_SELECTION, false);
+        //  output -
+        //    keys   = {1,    2,   3}   (CV_8UC1)
+        //    values = {6,2, 10,5, 4,3} (CV_8UC2)
+        void CV_EXPORTS sortByKey(oclMat& keys, oclMat& values, int method, bool isGreaterThan = false);
    }
 }
 #if defined _MSC_VER && _MSC_VER >= 1200
--- a/modules/ocl/src/opencl/kernel_radix_sort_by_key.cl
+++ b/modules/ocl/src/opencl/kernel_radix_sort_by_key.cl
@ -0,0 +1,176 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@outlook.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable 
+
+#ifndef N   // number of radices
+#define N 4
+#endif
+
+#ifndef K_T
+#define K_T float
+#endif
+
+#ifndef V_T
+#define V_T float
+#endif
+
+#ifndef IS_GT
+#define IS_GT 0
+#endif
+
+
+// from Thrust::b40c, link:
+// https://github.com/thrust/thrust/blob/master/thrust/system/cuda/detail/detail/b40c/radixsort_key_conversion.h
+__inline uint convertKey(uint converted_key)
+{
+#ifdef K_FLT
+    unsigned int mask = (converted_key & 0x80000000) ? 0xffffffff : 0x80000000;
+    converted_key ^= mask;
+#elif defined(K_INT)
+    const uint SIGN_MASK = 1u << ((sizeof(int) * 8) - 1);
+    converted_key ^= SIGN_MASK;	
+#else
+
+#endif
+    return converted_key;
+}
+
+//FIXME(pengx17): 
+// exclusive scan, need to be optimized as this is too naive...
+kernel
+    void naiveScanAddition(
+    __global int * input,
+    __global int * output,
+    int size
+    )
+{
+    if(get_global_id(0) == 0)
+    {
+        output[0] = 0;
+        for(int i = 1; i < size; i ++)
+        {
+            output[i] = output[i - 1] + input[i - 1];
+        }
+    }
+}
+
+// following is ported from
+// https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_uint_kernels.cl
+kernel
+    void histogramRadixN (
+    __global K_T* unsortedKeys,
+    __global int * buckets,
+    uint shiftCount
+    )
+{
+    const int RADIX_T     = N;
+    const int RADICES_T   = (1 << RADIX_T);
+    const int NUM_OF_ELEMENTS_PER_WORK_ITEM_T = RADICES_T; 
+    const int MASK_T      = (1 << RADIX_T) - 1;
+    int localBuckets[16] = {0,0,0,0,0,0,0,0,
+                            0,0,0,0,0,0,0,0};
+    int globalId    = get_global_id(0);
+    int numOfGroups = get_num_groups(0);
+
+    /* Calculate thread-histograms */
+    for(int i = 0; i < NUM_OF_ELEMENTS_PER_WORK_ITEM_T; ++i)
+    {
+        uint value = convertKey(as_uint(unsortedKeys[mad24(globalId, NUM_OF_ELEMENTS_PER_WORK_ITEM_T, i)]));
+        value = (value >> shiftCount) & MASK_T;
+#if IS_GT
+        localBuckets[RADICES_T - value - 1]++;
+#else
+        localBuckets[value]++;
+#endif
+    }
+
+    for(int i = 0; i < NUM_OF_ELEMENTS_PER_WORK_ITEM_T; ++i)
+    {
+        buckets[mad24(i, RADICES_T * numOfGroups, globalId) ] = localBuckets[i];
+    }
+}
+
+kernel
+    void permuteRadixN (
+    __global K_T*  unsortedKeys,
+    __global V_T*  unsortedVals,
+    __global int* scanedBuckets,
+    uint shiftCount,
+    __global K_T*  sortedKeys,
+    __global V_T*  sortedVals
+    )
+{
+    const int RADIX_T     = N;
+    const int RADICES_T   = (1 << RADIX_T);
+    const int MASK_T = (1<<RADIX_T)  -1;
+
+    int globalId  = get_global_id(0);
+    int numOfGroups = get_num_groups(0);
+    const int NUM_OF_ELEMENTS_PER_WORK_GROUP_T = numOfGroups << N;
+    int  localIndex[16];
+
+    /*Load the index to local memory*/
+    for(int i = 0; i < RADICES_T; ++i)
+    {
+#if IS_GT
+        localIndex[i] = scanedBuckets[mad24(RADICES_T - i - 1, NUM_OF_ELEMENTS_PER_WORK_GROUP_T, globalId)];
+#else
+        localIndex[i] = scanedBuckets[mad24(i, NUM_OF_ELEMENTS_PER_WORK_GROUP_T, globalId)];
+#endif
+    }
+    /* Permute elements to appropriate location */
+    for(int i = 0; i < RADICES_T; ++i)
+    {
+        int old_idx = mad24(globalId, RADICES_T, i);
+        K_T  ovalue = unsortedKeys[old_idx];
+        uint value = convertKey(as_uint(ovalue));
+        uint maskedValue = (value >> shiftCount) & MASK_T;
+        uint index = localIndex[maskedValue];
+        sortedKeys[index] = ovalue;
+        sortedVals[index] = unsortedVals[old_idx];
+        localIndex[maskedValue] = index + 1;
+    }
+}
--- a/modules/ocl/src/opencl/kernel_sort_by_key.cl
+++ b/modules/ocl/src/opencl/kernel_sort_by_key.cl
@ -0,0 +1,245 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@outlook.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef K_T
+#define K_T float
+#endif
+
+#ifndef V_T
+#define V_T float
+#endif
+
+#ifndef IS_GT
+#define IS_GT false
+#endif
+
+#if IS_GT
+#define my_comp(x,y) ((x) > (y))
+#else
+#define my_comp(x,y) ((x) < (y))
+#endif
+
+/////////////////////// Bitonic sort ////////////////////////////
+// ported from 
+// https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_by_key_kernels.cl
+__kernel
+    void bitonicSort
+    (
+        __global K_T * keys,
+        __global V_T * vals,
+        int count,
+        int stage,
+        int passOfStage
+    )
+{
+    const int threadId = get_global_id(0);
+    if(threadId >= count / 2)
+    {
+        return;
+    }
+    const int pairDistance = 1 << (stage - passOfStage);
+    const int blockWidth   = 2 * pairDistance;
+
+    int leftId = min( (threadId % pairDistance) 
+                   + (threadId / pairDistance) * blockWidth, count );
+
+    int rightId = min( leftId + pairDistance, count );
+
+    int temp;
+
+    const V_T lval = vals[leftId];
+    const V_T rval = vals[rightId]; 
+
+    const K_T lkey = keys[leftId];
+    const K_T rkey = keys[rightId];
+
+    int sameDirectionBlockWidth = 1 << stage;
+
+    if((threadId/sameDirectionBlockWidth) % 2 == 1)
+    {
+        temp = rightId;
+        rightId = leftId;
+        leftId = temp;
+    }
+
+    const bool compareResult = my_comp(lkey, rkey);
+
+    if(compareResult)
+    {
+        keys[rightId] = rkey;
+        keys[leftId]  = lkey;
+        vals[rightId] = rval;
+        vals[leftId]  = lval;
+    }
+    else
+    {
+        keys[rightId] = lkey;
+        keys[leftId]  = rkey;
+        vals[rightId] = lval;
+        vals[leftId]  = rval;
+    }
+}
+
+/////////////////////// Selection sort ////////////////////////////
+//kernel is ported from Bolt library:
+//https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_kernels.cl
+__kernel
+    void selectionSortLocal
+    (
+        __global K_T * keys,
+        __global V_T * vals,
+        const int count,
+        __local  K_T * scratch
+    )
+{
+    int          i  = get_local_id(0); // index in workgroup
+    int numOfGroups = get_num_groups(0); // index in workgroup
+    int groupID     = get_group_id(0);
+    int         wg  = get_local_size(0); // workgroup size = block size
+    int n; // number of elements to be processed for this work group
+
+    int offset   = groupID * wg;
+    int same     = 0;
+    
+    vals      += offset;
+    keys      += offset;
+    n = (groupID == (numOfGroups-1))? (count - wg*(numOfGroups-1)) : wg;
+
+    int clamped_i= min(i, n - 1);
+
+    K_T key1 = keys[clamped_i], key2;
+    V_T val1 = vals[clamped_i];
+    scratch[i] = key1;
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if(i >= n)
+    {
+        return;
+    }
+
+    int pos = 0;
+    for (int j=0;j<n;++j)
+    {
+        key2  = scratch[j];
+        if(my_comp(key2, key1)) 
+            pos++;//calculate the rank of this element in this work group
+        else 
+        {
+            if(my_comp(key1, key2))
+                continue;
+            else 
+            {
+                // key1 and key2 are same
+                same++;
+            }
+        }
+    }
+    for (int j=0; j< same; j++)
+    {
+        vals[pos + j] = val1;
+        keys[pos + j] = key1;
+    }
+}
+__kernel
+    void selectionSortFinal
+    (
+        __global K_T * keys,
+        __global V_T * vals,
+        const int count
+    )
+{
+    const int          i  = get_local_id(0); // index in workgroup
+    const int numOfGroups = get_num_groups(0); // index in workgroup
+    const int groupID     = get_group_id(0);
+    const int         wg  = get_local_size(0); // workgroup size = block size
+    int pos = 0, same = 0;
+    const int offset = get_group_id(0) * wg;
+    const int remainder = count - wg*(numOfGroups-1);
+
+    if((offset + i ) >= count)
+        return;
+    V_T val1 = vals[offset + i];
+
+    K_T key1 = keys[offset + i];
+    K_T key2;
+
+    for(int j=0; j<numOfGroups-1; j++ )
+    {
+        for(int k=0; k<wg; k++)
+        {
+            key2 = keys[j*wg + k]; 
+            if(my_comp(key1, key2))
+                break;
+            else
+            {
+                //Increment only if the value is not the same. 
+                if(my_comp(key2, key1))
+                    pos++;
+                else 
+                    same++;
+            }
+        }
+    }
+
+    for(int k=0; k<remainder; k++)
+    {
+        key2 = keys[(numOfGroups-1)*wg + k]; 
+        if(my_comp(key1, key2))
+            break;
+        else
+        {
+            //Don't increment if the value is the same. 
+            if(my_comp(key2, key1))
+                pos++;
+            else 
+                same++;
+        }
+    }  
+    for (int j=0; j< same; j++)
+    {
+        vals[pos + j] = val1;
+        keys[pos + j] = key1;
+    }
+}
--- a/modules/ocl/src/opencl/kernel_stablesort_by_key.cl
+++ b/modules/ocl/src/opencl/kernel_stablesort_by_key.cl
@ -0,0 +1,296 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@outlook.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef K_T
+#define K_T float
+#endif
+
+#ifndef V_T
+#define V_T float
+#endif
+
+#ifndef IS_GT
+#define IS_GT false
+#endif
+
+#if IS_GT
+#define my_comp(x,y) ((x) > (y))
+#else
+#define my_comp(x,y) ((x) < (y))
+#endif
+
+///////////// parallel merge sort ///////////////
+// ported from https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/stablesort_by_key_kernels.cl
+uint lowerBoundLinear( global K_T* data, uint left, uint right, K_T searchVal)
+{
+    //  The values firstIndex and lastIndex get modified within the loop, narrowing down the potential sequence
+    uint firstIndex = left;
+    uint lastIndex = right;
+
+    //  This loops through [firstIndex, lastIndex)
+    //  Since firstIndex and lastIndex will be different for every thread depending on the nested branch,
+    //  this while loop will be divergent within a wavefront
+    while( firstIndex < lastIndex )
+    {
+        K_T dataVal = data[ firstIndex ];
+
+        //  This branch will create divergent wavefronts
+        if( my_comp( dataVal, searchVal ) )
+        {
+            firstIndex = firstIndex+1;
+        }
+        else
+        {
+            break;
+        }
+    }
+
+    return firstIndex;
+}
+
+//  This implements a binary search routine to look for an 'insertion point' in a sequence, denoted
+//  by a base pointer and left and right index for a particular candidate value.  The comparison operator is
+//  passed as a functor parameter my_comp
+//  This function returns an index that is the first index whos value would be equal to the searched value
+uint lowerBoundBinary( global K_T* data, uint left, uint right, K_T searchVal)
+{
+    //  The values firstIndex and lastIndex get modified within the loop, narrowing down the potential sequence
+    uint firstIndex = left;
+    uint lastIndex = right;
+
+    //  This loops through [firstIndex, lastIndex)
+    //  Since firstIndex and lastIndex will be different for every thread depending on the nested branch,
+    //  this while loop will be divergent within a wavefront
+    while( firstIndex < lastIndex )
+    {
+        //  midIndex is the average of first and last, rounded down
+        uint midIndex = ( firstIndex + lastIndex ) / 2;
+        K_T midValue = data[ midIndex ];
+
+        //  This branch will create divergent wavefronts
+        if( my_comp( midValue, searchVal ) )
+        {
+            firstIndex = midIndex+1;
+            // printf( "lowerBound: lastIndex[ %i ]=%i\n", get_local_id( 0 ), lastIndex );
+        }
+        else
+        {
+            lastIndex = midIndex;
+            // printf( "lowerBound: firstIndex[ %i ]=%i\n", get_local_id( 0 ), firstIndex );
+        }
+    }
+
+    return firstIndex;
+}
+
+//  This implements a binary search routine to look for an 'insertion point' in a sequence, denoted
+//  by a base pointer and left and right index for a particular candidate value.  The comparison operator is
+//  passed as a functor parameter my_comp
+//  This function returns an index that is the first index whos value would be greater than the searched value
+//  If the search value is not found in the sequence, upperbound returns the same result as lowerbound
+uint upperBoundBinary( global K_T* data, uint left, uint right, K_T searchVal)
+{
+    uint upperBound = lowerBoundBinary( data, left, right, searchVal );
+
+    // printf( "upperBoundBinary: upperBound[ %i, %i ]= %i\n", left, right, upperBound );
+    //  If upperBound == right, then  searchVal was not found in the sequence.  Just return.
+    if( upperBound != right )
+    {
+        //  While the values are equal i.e. !(x < y) && !(y < x) increment the index
+        K_T upperValue = data[ upperBound ];
+        while( !my_comp( upperValue, searchVal ) && !my_comp( searchVal, upperValue) && (upperBound != right) )
+        {
+            upperBound++;
+            upperValue = data[ upperBound ];
+        }
+    }
+
+    return upperBound;
+}
+
+//  This kernel implements merging of blocks of sorted data.  The input to this kernel most likely is
+//  the output of blockInsertionSortTemplate.  It is expected that the source array contains multiple
+//  blocks, each block is independently sorted.  The goal is to write into the output buffer half as
+//  many blocks, of double the size.  The even and odd blocks are stably merged together to form
+//  a new sorted block of twice the size.  The algorithm is out-of-place.
+kernel void merge(
+    global K_T*   iKey_ptr,
+    global V_T*   iValue_ptr,
+    global K_T*   oKey_ptr,
+    global V_T*   oValue_ptr,
+    const uint    srcVecSize,
+    const uint    srcLogicalBlockSize,
+    local K_T*    key_lds,
+    local V_T*    val_lds
+)
+{
+    size_t globalID     = get_global_id( 0 );
+    size_t groupID      = get_group_id( 0 );
+    size_t localID      = get_local_id( 0 );
+    size_t wgSize       = get_local_size( 0 );
+
+    //  Abort threads that are passed the end of the input vector
+    if( globalID >= srcVecSize )
+        return; // on SI this doesn't mess-up barriers
+
+    //  For an element in sequence A, find the lowerbound index for it in sequence B
+    uint srcBlockNum   = globalID / srcLogicalBlockSize;
+    uint srcBlockIndex = globalID % srcLogicalBlockSize;
+
+    // printf( "mergeTemplate: srcBlockNum[%i]=%i\n", srcBlockNum, srcBlockIndex );
+
+    //  Pairs of even-odd blocks will be merged together
+    //  An even block should search for an insertion point in the next odd block,
+    //  and the odd block should look for an insertion point in the corresponding previous even block
+    uint dstLogicalBlockSize = srcLogicalBlockSize<<1;
+    uint leftBlockIndex = globalID & ~((dstLogicalBlockSize) - 1 );
+    leftBlockIndex += (srcBlockNum & 0x1) ? 0 : srcLogicalBlockSize;
+    leftBlockIndex = min( leftBlockIndex, srcVecSize );
+    uint rightBlockIndex = min( leftBlockIndex + srcLogicalBlockSize, srcVecSize );
+
+    // if( localID == 0 )
+    // {
+    // printf( "mergeTemplate: wavefront[ %i ] logicalBlock[ %i ] logicalIndex[ %i ] leftBlockIndex[ %i ] <=> rightBlockIndex[ %i ]\n", groupID, srcBlockNum, srcBlockIndex, leftBlockIndex, rightBlockIndex );
+    // }
+
+    //  For a particular element in the input array, find the lowerbound index for it in the search sequence given by leftBlockIndex & rightBlockIndex
+    // uint insertionIndex = lowerBoundLinear( iKey_ptr, leftBlockIndex, rightBlockIndex, iKey_ptr[ globalID ], my_comp ) - leftBlockIndex;
+    uint insertionIndex = 0;
+    if( (srcBlockNum & 0x1) == 0 )
+    {
+        insertionIndex = lowerBoundBinary( iKey_ptr, leftBlockIndex, rightBlockIndex, iKey_ptr[ globalID ] ) - leftBlockIndex;
+    }
+    else
+    {
+        insertionIndex = upperBoundBinary( iKey_ptr, leftBlockIndex, rightBlockIndex, iKey_ptr[ globalID ] ) - leftBlockIndex;
+    }
+
+    //  The index of an element in the result sequence is the summation of it's indixes in the two input
+    //  sequences
+    uint dstBlockIndex = srcBlockIndex + insertionIndex;
+    uint dstBlockNum = srcBlockNum/2;
+
+    // if( (dstBlockNum*dstLogicalBlockSize)+dstBlockIndex == 395 )
+    // {
+    // printf( "mergeTemplate: (dstBlockNum[ %i ] * dstLogicalBlockSize[ %i ]) + dstBlockIndex[ %i ] = srcBlockIndex[ %i ] + insertionIndex[ %i ]\n", dstBlockNum, dstLogicalBlockSize, dstBlockIndex, srcBlockIndex, insertionIndex );
+    // printf( "mergeTemplate: dstBlockIndex[ %i ] = iKey_ptr[ %i ] ( %i )\n", (dstBlockNum*dstLogicalBlockSize)+dstBlockIndex, globalID, iKey_ptr[ globalID ] );
+    // }
+    oKey_ptr[ (dstBlockNum*dstLogicalBlockSize)+dstBlockIndex ] = iKey_ptr[ globalID ];
+    oValue_ptr[ (dstBlockNum*dstLogicalBlockSize)+dstBlockIndex ] = iValue_ptr[ globalID ];
+    // printf( "mergeTemplate: leftResultIndex[ %i ]=%i + %i\n", leftResultIndex, srcBlockIndex, leftInsertionIndex );
+}
+
+kernel void blockInsertionSort(
+    global K_T*   key_ptr,
+    global V_T*   value_ptr,
+    const uint    vecSize,
+    local K_T*    key_lds,
+    local V_T*    val_lds
+)
+{
+    size_t gloId    = get_global_id( 0 );
+    size_t groId    = get_group_id( 0 );
+    size_t locId    = get_local_id( 0 );
+    size_t wgSize   = get_local_size( 0 );
+
+    bool in_range = gloId < vecSize;
+    K_T key;
+    V_T val;
+    //  Abort threads that are passed the end of the input vector
+    if (in_range)
+    {
+        //  Make a copy of the entire input array into fast local memory
+        key = key_ptr[ gloId ];
+        val = value_ptr[ gloId ];
+        key_lds[ locId ] = key;
+        val_lds[ locId ] = val;
+    }
+    barrier( CLK_LOCAL_MEM_FENCE );
+    //  Sorts a workgroup using a naive insertion sort
+    //  The sort uses one thread within a workgroup to sort the entire workgroup
+    if( locId == 0 && in_range )
+    {
+        //  The last workgroup may have an irregular size, so we calculate a per-block endIndex
+        //  endIndex is essentially emulating a mod operator with subtraction and multiply
+        size_t endIndex = vecSize - ( groId * wgSize );
+        endIndex = min( endIndex, wgSize );
+
+        // printf( "Debug: endIndex[%i]=%i\n", groId, endIndex );
+
+        //  Indices are signed because the while loop will generate a -1 index inside of the max function
+        for( int currIndex = 1; currIndex < endIndex; ++currIndex )
+        {
+            key = key_lds[ currIndex ];
+            val = val_lds[ currIndex ];
+            int scanIndex = currIndex;
+            K_T ldsKey = key_lds[scanIndex - 1];
+            while( scanIndex > 0 && my_comp( key, ldsKey ) )
+            {
+                V_T ldsVal = val_lds[scanIndex - 1];
+
+                //  If the keys are being swapped, make sure the values are swapped identicaly
+                key_lds[ scanIndex ] = ldsKey;
+                val_lds[ scanIndex ] = ldsVal;
+
+                scanIndex = scanIndex - 1;
+                ldsKey = key_lds[ max( 0, scanIndex - 1 ) ];  // scanIndex-1 may be -1
+            }
+            key_lds[ scanIndex ] = key;
+            val_lds[ scanIndex ] = val;
+        }
+    }
+    barrier( CLK_LOCAL_MEM_FENCE );
+
+    if(in_range)
+    {
+        key = key_lds[ locId ];
+        key_ptr[ gloId ] = key;
+
+        val = val_lds[ locId ];
+        value_ptr[ gloId ] = val;
+    }
+}
+
+///////////// Radix sort from b40c library /////////////
--- a/modules/ocl/src/sort_by_key.cpp
+++ b/modules/ocl/src/sort_by_key.cpp
@ -0,0 +1,454 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@outlook.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include <iomanip>
+#include "precomp.hpp"
+
+namespace cv
+{
+namespace ocl
+{
+
+extern const char * kernel_sort_by_key;
+extern const char * kernel_stablesort_by_key;
+extern const char * kernel_radix_sort_by_key;
+
+void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, int method, bool isGreaterThan);
+
+//TODO(pengx17): change this value depending on device other than a constant
+const static unsigned int GROUP_SIZE = 256;
+
+const char * depth_strings[] =
+{
+    "uchar",   //CV_8U
+    "char",    //CV_8S
+    "ushort",  //CV_16U
+    "short",   //CV_16S
+    "int",     //CV_32S
+    "float",   //CV_32F
+    "double"   //CV_64F
+};
+
+void static genSortBuildOption(const oclMat& keys, const oclMat& vals, bool isGreaterThan, char * build_opt_buf)
+{
+    sprintf(build_opt_buf, "-D IS_GT=%d -D K_T=%s -D V_T=%s",
+            isGreaterThan?1:0, depth_strings[keys.depth()], depth_strings[vals.depth()]);
+    if(vals.oclchannels() > 1)
+    {
+        sprintf( build_opt_buf + strlen(build_opt_buf), "%d", vals.oclchannels());
+    }
+}
+inline bool isSizePowerOf2(size_t size)
+{
+    return ((size - 1) & (size)) == 0;
+}
+
+namespace bitonic_sort
+{
+static void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, bool isGreaterThan)
+{
+    CV_Assert(isSizePowerOf2(vecSize));
+
+    Context * cxt = Context::getContext();
+    size_t globalThreads[3] = {vecSize / 2, 1, 1};
+    size_t localThreads[3]  = {GROUP_SIZE, 1, 1};
+
+    // 2^numStages should be equal to vecSize or the output is invalid
+    int numStages = 0;
+    for(int i = vecSize; i > 1; i >>= 1)
+    {
+        ++numStages;
+    }
+    char build_opt_buf [100];
+    genSortBuildOption(keys, vals, isGreaterThan, build_opt_buf);
+    const int argc = 5;
+    std::vector< std::pair<size_t, const void *> > args(argc);
+    String kernelname = "bitonicSort";
+
+    args[0] = std::make_pair(sizeof(cl_mem), (void *)&keys.data);
+    args[1] = std::make_pair(sizeof(cl_mem), (void *)&vals.data);
+    args[2] = std::make_pair(sizeof(cl_int), (void *)&vecSize);
+
+    for(int stage = 0; stage < numStages; ++stage)
+    {
+        args[3] = std::make_pair(sizeof(cl_int), (void *)&stage);
+        for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
+        {
+            args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
+            openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, localThreads, args, -1, -1, build_opt_buf);
+        }
+    }
+}
+}  /* bitonic_sort */
+
+namespace selection_sort
+{
+// FIXME:
+// This function cannot sort arrays with duplicated keys
+static void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, bool isGreaterThan)
+{
+    CV_Error(-1, "This function is incorrect at the moment.");
+    Context * cxt = Context::getContext();
+
+    size_t globalThreads[3] = {vecSize, 1, 1};
+    size_t localThreads[3]  = {GROUP_SIZE, 1, 1};
+
+    std::vector< std::pair<size_t, const void *> > args;
+    char build_opt_buf [100];
+    genSortBuildOption(keys, vals, isGreaterThan, build_opt_buf);
+
+    //local
+    String kernelname = "selectionSortLocal";
+    int lds_size = GROUP_SIZE * keys.elemSize();
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&keys.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&vals.data));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&vecSize));
+    args.push_back(std::make_pair(lds_size,       (void*)NULL));
+
+    openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, localThreads, args, -1, -1, build_opt_buf);
+
+    //final
+    kernelname = "selectionSortFinal";
+    args.pop_back();
+    openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, localThreads, args, -1, -1, build_opt_buf);
+}
+
+}  /* selection_sort */
+
+
+namespace radix_sort
+{
+//FIXME(pengx17): 
+// exclusive scan, need to be optimized as this is too naive...
+//void naive_scan_addition(oclMat& input, oclMat& output)
+//{
+//    Context * cxt = Context::getContext();
+//    size_t vecSize = input.cols;
+//    size_t globalThreads[3] = {1, 1, 1};
+//    size_t localThreads[3]  = {1, 1, 1};
+//
+//    String kernelname = "naiveScanAddition";
+//
+//    std::vector< std::pair<size_t, const void *> > args;
+//    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&input.data));
+//    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&output.data));
+//    args.push_back(std::make_pair(sizeof(cl_int), (void *)&vecSize));
+//    openCLExecuteKernel(cxt, &kernel_radix_sort_by_key, kernelname, globalThreads, localThreads, args, -1, -1);
+//}
+
+void static naive_scan_addition_cpu(oclMat& input, oclMat& output)
+{
+    Mat m_input = input, m_output(output.size(), output.type());
+    MatIterator_<int> i_mit = m_input.begin<int>();
+    MatIterator_<int> o_mit = m_output.begin<int>();
+    *o_mit = 0;
+    ++i_mit;
+    ++o_mit;
+    for(; i_mit != m_input.end<int>(); ++i_mit, ++o_mit)
+    {
+        *o_mit = *(o_mit - 1) + *(i_mit - 1);
+    }
+    output = m_output;
+}
+
+
+//radix sort ported from Bolt
+static void sortByKey(oclMat& keys, oclMat& vals, size_t origVecSize, bool isGreaterThan)
+{
+    CV_Assert(keys.depth() == CV_32S || keys.depth() == CV_32F); // we assume keys are 4 bytes
+
+    bool isKeyFloat = keys.type() == CV_32F;
+
+    const int RADIX = 4; //Now you cannot replace this with Radix 8 since there is a
+                         //local array of 16 elements in the histogram kernel.
+    const int RADICES = (1 << RADIX); //Values handeled by each work-item?
+
+    bool  newBuffer = false;
+    size_t vecSize = origVecSize;
+
+    unsigned int groupSize  = RADICES;
+
+    size_t mulFactor = groupSize * RADICES;
+
+    oclMat buffer_keys, buffer_vals;
+
+    if(origVecSize % mulFactor != 0)
+    {
+        vecSize = ((vecSize + mulFactor) / mulFactor) * mulFactor;
+        buffer_keys.create(1, vecSize, keys.type());
+        buffer_vals.create(1, vecSize, vals.type());
+        Scalar padding_value;
+        oclMat roi_buffer_vals = buffer_vals(Rect(0,0,origVecSize,1));
+
+        if(isGreaterThan)
+        {
+            switch(buffer_keys.depth())
+            {
+            case CV_32F:
+                padding_value = Scalar::all(-FLT_MAX);
+                break;
+            case CV_32S:
+                padding_value = Scalar::all(INT_MIN);
+                break;
+            }
+        }
+        else
+        {
+            switch(buffer_keys.depth())
+            {
+            case CV_32F:
+                padding_value = Scalar::all(FLT_MAX);
+                break;
+            case CV_32S:
+                padding_value = Scalar::all(INT_MAX);
+                break;
+            }
+        }
+        ocl::copyMakeBorder(
+            keys(Rect(0,0,origVecSize,1)), buffer_keys, 
+            0, 0, 0, vecSize - origVecSize, 
+            BORDER_CONSTANT, padding_value);
+        vals(Rect(0,0,origVecSize,1)).copyTo(roi_buffer_vals);
+        newBuffer = true;
+    }
+    else
+    {
+        buffer_keys = keys;
+        buffer_vals = vals;
+        newBuffer = false;
+    }
+    oclMat swap_input_keys(1, vecSize, keys.type());
+    oclMat swap_input_vals(1, vecSize, vals.type());
+    oclMat hist_bin_keys(1, vecSize, CV_32SC1);
+    oclMat hist_bin_dest_keys(1, vecSize, CV_32SC1);
+
+    Context * cxt = Context::getContext();
+
+    size_t globalThreads[3] = {vecSize / RADICES, 1, 1};
+    size_t localThreads[3]  = {groupSize, 1, 1};
+
+    std::vector< std::pair<size_t, const void *> > args;
+    char build_opt_buf [100];
+    genSortBuildOption(keys, vals, isGreaterThan, build_opt_buf);
+
+    //additional build option for radix sort
+    sprintf(build_opt_buf + strlen(build_opt_buf), " -D K_%s", isKeyFloat?"FLT":"INT"); 
+
+    String kernelnames[2] = {String("histogramRadixN"), String("permuteRadixN")};
+
+    int swap = 0;
+    for(int bits = 0; bits < (static_cast<int>(keys.elemSize()) * 8); bits += RADIX)
+    {
+        args.clear();
+        //Do a histogram pass locally
+        if(swap == 0)
+        {
+            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&buffer_keys.data));
+        }
+        else
+        {
+            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&swap_input_keys.data));
+        }
+        args.push_back(std::make_pair(sizeof(cl_mem), (void *)&hist_bin_keys.data));
+        args.push_back(std::make_pair(sizeof(cl_int), (void *)&bits));
+        openCLExecuteKernel(cxt, &kernel_radix_sort_by_key, kernelnames[0], globalThreads, localThreads,
+            args, -1, -1, build_opt_buf);
+
+        args.clear();
+        //Perform a global scan
+        naive_scan_addition_cpu(hist_bin_keys, hist_bin_dest_keys);
+        // end of scan
+        if(swap == 0)
+        {
+            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&buffer_keys.data));
+            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&buffer_vals.data));
+        }
+        else
+        {
+            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&swap_input_keys.data));
+            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&swap_input_vals.data));
+        }
+        args.push_back(std::make_pair(sizeof(cl_mem), (void *)&hist_bin_dest_keys.data));
+        args.push_back(std::make_pair(sizeof(cl_int), (void *)&bits));
+
+        if(swap == 0)
+        {
+            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&swap_input_keys.data));
+            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&swap_input_vals.data));
+        }
+        else
+        {
+            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&buffer_keys.data));
+            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&buffer_vals.data));
+        }
+        openCLExecuteKernel(cxt, &kernel_radix_sort_by_key, kernelnames[1], globalThreads, localThreads,
+            args, -1, -1, build_opt_buf);
+        swap = swap ? 0 : 1;
+    }
+    if(newBuffer)
+    {
+        buffer_keys(Rect(0,0,origVecSize,1)).copyTo(keys);
+        buffer_vals(Rect(0,0,origVecSize,1)).copyTo(vals);
+    }
+}
+
+}  /* radix_sort */
+
+namespace merge_sort
+{
+static void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, bool isGreaterThan)
+{
+    Context * cxt = Context::getContext();
+
+    size_t globalThreads[3] = {vecSize, 1, 1};
+    size_t localThreads[3]  = {GROUP_SIZE, 1, 1};
+
+    std::vector< std::pair<size_t, const void *> > args;
+    char build_opt_buf [100];
+    genSortBuildOption(keys, vals, isGreaterThan, build_opt_buf);
+
+    String kernelname[] = {String("blockInsertionSort"), String("merge")};
+    int keylds_size = GROUP_SIZE * keys.elemSize();
+    int vallds_size = GROUP_SIZE * vals.elemSize();
+    args.push_back(std::make_pair(sizeof(cl_mem),  (void *)&keys.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),  (void *)&vals.data));
+    args.push_back(std::make_pair(sizeof(cl_uint), (void *)&vecSize));
+    args.push_back(std::make_pair(keylds_size,     (void*)NULL));
+    args.push_back(std::make_pair(vallds_size,     (void*)NULL));
+
+    openCLExecuteKernel(cxt, &kernel_stablesort_by_key, kernelname[0], globalThreads, localThreads, args, -1, -1, build_opt_buf);
+
+    //  Early exit for the case of no merge passes, values are already in destination vector
+    if(vecSize <= GROUP_SIZE)
+    {
+        return;
+    }
+
+    //  An odd number of elements requires an extra merge pass to sort
+    size_t numMerges = 0;
+    //  Calculate the log2 of vecSize, taking into acvecSize our block size from kernel 1 is 64
+    //  this is how many merge passes we want
+    size_t log2BlockSize = vecSize >> 6;
+    for( ; log2BlockSize > 1; log2BlockSize >>= 1 )
+    {
+        ++numMerges;
+    }
+    //  Check to see if the input vector size is a power of 2, if not we will need last merge pass
+    numMerges += isSizePowerOf2(vecSize)? 1: 0;
+
+    //  Allocate a flipflop buffer because the merge passes are out of place
+    oclMat tmpKeyBuffer(keys.size(), keys.type());
+    oclMat tmpValBuffer(vals.size(), vals.type());
+    args.resize(8);
+
+    args[4] = std::make_pair(sizeof(cl_uint), (void *)&vecSize);
+    args[6] = std::make_pair(keylds_size,    (void*)NULL);
+    args[7] = std::make_pair(vallds_size,    (void*)NULL);
+
+    for(size_t pass = 1; pass <= numMerges; ++pass )
+    {
+        //  For each pass, flip the input-output buffers
+        if( pass & 0x1 )
+        {
+            args[0] = std::make_pair(sizeof(cl_mem), (void *)&keys.data);
+            args[1] = std::make_pair(sizeof(cl_mem), (void *)&vals.data);
+            args[2] = std::make_pair(sizeof(cl_mem), (void *)&tmpKeyBuffer.data);
+            args[3] = std::make_pair(sizeof(cl_mem), (void *)&tmpValBuffer.data);
+        }
+        else
+        {
+            args[0] = std::make_pair(sizeof(cl_mem), (void *)&tmpKeyBuffer.data);
+            args[1] = std::make_pair(sizeof(cl_mem), (void *)&tmpValBuffer.data);
+            args[2] = std::make_pair(sizeof(cl_mem), (void *)&keys.data);
+            args[3] = std::make_pair(sizeof(cl_mem), (void *)&vals.data);
+        }
+        //  For each pass, the merge window doubles
+        unsigned int srcLogicalBlockSize = static_cast<unsigned int>( localThreads[0] << (pass-1) );
+        args[5] = std::make_pair(sizeof(cl_uint), (void *)&srcLogicalBlockSize);
+        openCLExecuteKernel(cxt, &kernel_stablesort_by_key, kernelname[1], globalThreads, localThreads, args, -1, -1, build_opt_buf);
+    }
+    //  If there are an odd number of merges, then the output data is sitting in the temp buffer.  We need to copy
+    //  the results back into the input array
+    if( numMerges & 1 )
+    {
+        tmpKeyBuffer.copyTo(keys);
+        tmpValBuffer.copyTo(vals);
+    }
+}
+}  /* merge_sort */
+
+}
+} /* namespace cv { namespace ocl */
+
+
+void cv::ocl::sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, int method, bool isGreaterThan)
+{
+    CV_Assert( keys.rows == 1 ); // we only allow one dimensional input
+    CV_Assert( keys.channels() == 1 ); // we only allow one channel keys
+    CV_Assert( vecSize <= static_cast<size_t>(keys.cols) );
+    switch(method)
+    {
+    case SORT_BITONIC:
+        bitonic_sort::sortByKey(keys, vals, vecSize, isGreaterThan);
+        break;
+    case SORT_SELECTION:
+        selection_sort::sortByKey(keys, vals, vecSize, isGreaterThan);
+        break;
+    case SORT_RADIX:
+        radix_sort::sortByKey(keys, vals, vecSize, isGreaterThan);
+        break;
+    case SORT_MERGE:
+        merge_sort::sortByKey(keys, vals, vecSize, isGreaterThan);
+        break;
+    }
+}
+
+void cv::ocl::sortByKey(oclMat& keys, oclMat& vals, int method, bool isGreaterThan)
+{
+    CV_Assert( keys.size() == vals.size() );
+    CV_Assert( keys.rows == 1 ); // we only allow one dimensional input
+    size_t vecSize = static_cast<size_t>(keys.cols);
+    sortByKey(keys, vals, vecSize, method, isGreaterThan);
+}
--- a/modules/ocl/test/test_sort.cpp
+++ b/modules/ocl/test/test_sort.cpp
@ -0,0 +1,244 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@outlook.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include <map>
+#include <functional>
+#include "precomp.hpp"
+
+using namespace std;
+using namespace cvtest;
+using namespace testing;
+using namespace cv;
+
+
+namespace
+{
+IMPLEMENT_PARAM_CLASS(IsGreaterThan, bool)
+IMPLEMENT_PARAM_CLASS(InputSize, int)
+IMPLEMENT_PARAM_CLASS(SortMethod, int)
+
+
+template<class T> 
+struct KV_CVTYPE{ static int toType() {return 0;} };
+
+template<> struct KV_CVTYPE<int>  { static int toType() {return CV_32SC1;} };
+template<> struct KV_CVTYPE<float>{ static int toType() {return CV_32FC1;} };
+template<> struct KV_CVTYPE<Vec2i>{ static int toType() {return CV_32SC2;} };
+template<> struct KV_CVTYPE<Vec2f>{ static int toType() {return CV_32FC2;} };
+
+template<class key_type, class val_type>
+bool kvgreater(pair<key_type, val_type> p1, pair<key_type, val_type> p2)
+{
+    return p1.first > p2.first;
+}
+
+template<class key_type, class val_type>
+bool kvless(pair<key_type, val_type> p1, pair<key_type, val_type> p2)
+{
+    return p1.first < p2.first;
+}
+
+template<class key_type, class val_type>
+void toKVPair(
+    MatConstIterator_<key_type> kit,
+    MatConstIterator_<val_type> vit,
+    int vecSize,
+    vector<pair<key_type, val_type> >& kvres
+    )
+{
+    kvres.clear();
+    for(int i = 0; i < vecSize; i ++)
+    {
+        kvres.push_back(make_pair(*kit, *vit));
+        ++kit;
+        ++vit;
+    }
+}
+
+template<class key_type, class val_type>
+void kvquicksort(Mat& keys, Mat& vals, bool isGreater = false)
+{
+    vector<pair<key_type, val_type> > kvres;
+    toKVPair(keys.begin<key_type>(), vals.begin<val_type>(), keys.cols, kvres);
+    
+    if(isGreater)
+    {
+        std::sort(kvres.begin(), kvres.end(), kvgreater<key_type, val_type>);
+    }
+    else
+    {
+        std::sort(kvres.begin(), kvres.end(), kvless<key_type, val_type>);
+    }
+    key_type * kptr = keys.ptr<key_type>();
+    val_type * vptr = vals.ptr<val_type>();
+    for(int i = 0; i < keys.cols; i ++)
+    {
+        kptr[i] = kvres[i].first;
+        vptr[i] = kvres[i].second;
+    }
+}
+
+class SortByKey_STL
+{
+public:
+    static void sort(cv::Mat&, cv::Mat&, bool is_gt);
+private:
+    typedef void (*quick_sorter)(cv::Mat&, cv::Mat&, bool);
+    SortByKey_STL();
+    quick_sorter quick_sorters[CV_64FC4][CV_64FC4];
+    static SortByKey_STL instance;
+};
+
+SortByKey_STL SortByKey_STL::instance = SortByKey_STL();
+
+SortByKey_STL::SortByKey_STL()
+{
+    memset(instance.quick_sorters, 0, sizeof(quick_sorters));
+#define NEW_SORTER(KT, VT) \
+    instance.quick_sorters[KV_CVTYPE<KT>::toType()][KV_CVTYPE<VT>::toType()] = kvquicksort<KT, VT>;
+
+    NEW_SORTER(int, int);
+    NEW_SORTER(int, Vec2i);
+    NEW_SORTER(int, float);
+    NEW_SORTER(int, Vec2f);
+
+    NEW_SORTER(float, int);
+    NEW_SORTER(float, Vec2i);
+    NEW_SORTER(float, float);
+    NEW_SORTER(float, Vec2f);
+#undef NEW_SORTER
+}
+
+void SortByKey_STL::sort(cv::Mat& keys, cv::Mat& vals, bool is_gt)
+{
+    instance.quick_sorters[keys.type()][vals.type()](keys, vals, is_gt);
+}
+
+bool checkUnstableSorterResult(const Mat& gkeys_, const Mat& gvals_,
+                               const Mat& /*dkeys_*/, const Mat& dvals_)
+{
+    int cn_val = gvals_.channels();
+    int count  = gkeys_.cols;
+
+    //for convenience we convert depth to float and channels to 1
+    Mat gkeys, gvals, dkeys, dvals;
+    gkeys_.reshape(1).convertTo(gkeys, CV_32F);
+    gvals_.reshape(1).convertTo(gvals, CV_32F);
+    //dkeys_.reshape(1).convertTo(dkeys, CV_32F);
+    dvals_.reshape(1).convertTo(dvals, CV_32F);
+    float * gkptr = gkeys.ptr<float>();
+    float * gvptr = gvals.ptr<float>();
+    //float * dkptr = dkeys.ptr<float>();
+    float * dvptr = dvals.ptr<float>();
+
+    for(int i = 0; i < count - 1; ++i)
+    {
+        int iden_count = 0;
+        // firstly calculate the number of identical keys
+        while(gkptr[i + iden_count] == gkptr[i + 1 + iden_count])
+        {
+            ++ iden_count;
+        }
+        
+        // sort dv and gv
+        int num_of_val = (iden_count + 1) * cn_val;
+        std::sort(gvptr + i * cn_val, gvptr + i * cn_val + num_of_val);
+        std::sort(dvptr + i * cn_val, dvptr + i * cn_val + num_of_val);
+
+        // then check if [i, i + iden_count) is the same
+        for(int j = 0; j < num_of_val; ++j)
+        {
+            if(gvptr[i + j] != dvptr[i + j])
+            {
+                return false;
+            }
+        }
+        i += iden_count;
+    }
+    return true;
+}
+}
+
+#define INPUT_SIZES  Values(InputSize(0x10), InputSize(0x100), InputSize(0x10000)) //2^4, 2^8, 2^16
+#define KEY_TYPES    Values(MatType(CV_32SC1), MatType(CV_32FC1))
+#define VAL_TYPES    Values(MatType(CV_32SC1), MatType(CV_32SC2), MatType(CV_32FC1), MatType(CV_32FC2))
+#define SORT_METHODS Values(SortMethod(cv::ocl::SORT_BITONIC),SortMethod(cv::ocl::SORT_MERGE),SortMethod(cv::ocl::SORT_RADIX)/*,SortMethod(cv::ocl::SORT_SELECTION)*/)
+#define F_OR_T       Values(IsGreaterThan(false), IsGreaterThan(true))
+
+PARAM_TEST_CASE(SortByKey, InputSize, MatType, MatType, SortMethod, IsGreaterThan)
+{
+    InputSize input_size;
+    MatType key_type, val_type;
+    SortMethod method;
+    IsGreaterThan is_gt;
+
+    Mat mat_key, mat_val;
+    virtual void SetUp()
+    {
+        input_size = GET_PARAM(0);
+        key_type   = GET_PARAM(1);
+        val_type   = GET_PARAM(2);
+        method     = GET_PARAM(3);
+        is_gt      = GET_PARAM(4);
+
+        using namespace cv;
+        // fill key and val
+        mat_key = randomMat(Size(input_size, 1), key_type, INT_MIN, INT_MAX);
+        mat_val = randomMat(Size(input_size, 1), val_type, INT_MIN, INT_MAX);
+    }
+};
+
+TEST_P(SortByKey, Accuracy)
+{
+    using namespace cv;
+    ocl::oclMat oclmat_key(mat_key);
+    ocl::oclMat oclmat_val(mat_val);
+
+    ocl::sortByKey(oclmat_key, oclmat_val, method, is_gt);
+    SortByKey_STL::sort(mat_key, mat_val, is_gt);
+
+    EXPECT_MAT_NEAR(mat_key, oclmat_key, 0.0);
+    EXPECT_TRUE(checkUnstableSorterResult(mat_key, mat_val, oclmat_key, oclmat_val));
+}
+INSTANTIATE_TEST_CASE_P(OCL_SORT, SortByKey, Combine(INPUT_SIZES, KEY_TYPES, VAL_TYPES, SORT_METHODS, F_OR_T));
--- a/modules/photo/doc/inpainting.rst
+++ b/modules/photo/doc/inpainting.rst
@ -31,14 +31,13 @@ The function reconstructs the selected image area from the pixel near the area b
 http://en.wikipedia.org/wiki/Inpainting
 for more details.

-.. Sample code::
+.. note::

-   * : An example using the inpainting technique can be found at opencv_source_code/samples/cpp/inpaint.cpp
+   * An example using the inpainting technique can be found at opencv_source_code/samples/cpp/inpaint.cpp

-   * : PYTHON : An example using the inpainting technique can be found at opencv_source_code/samples/python2/inpaint.py
+   * (Python) An example using the inpainting technique can be found at opencv_source_code/samples/python2/inpaint.py


 .. [Telea04] Telea, Alexandru. "An image inpainting technique based on the fast marching method." Journal of graphics tools 9, no. 1 (2004): 23-34.

 .. [Navier01] Bertalmio, Marcelo, Andrea L. Bertozzi, and Guillermo Sapiro. "Navier-stokes, fluid dynamics, and image and video inpainting." In Computer Vision and Pattern Recognition, 2001. CVPR 2001. Proceedings of the 2001 IEEE Computer Society Conference on, vol. 1, pp. I-355. IEEE, 2001.
-
--- a/modules/stitching/doc/high_level.rst
+++ b/modules/stitching/doc/high_level.rst
@ -88,10 +88,10 @@ High level image stitcher. It's possible to use this class without being aware o
        /* hidden */
    };

-.. Sample code::
+.. note::

-   * : A basic example on image stitching can be found at opencv_source_code/samples/cpp/stitching.cpp
-   * : A detailed example on image stitching can be found at opencv_source_code/samples/cpp/stitching_detailed.cpp
+   * A basic example on image stitching can be found at opencv_source_code/samples/cpp/stitching.cpp
+   * A detailed example on image stitching can be found at opencv_source_code/samples/cpp/stitching_detailed.cpp

 Stitcher::createDefault
 -----------------------
--- a/modules/ts/CMakeLists.txt
+++ b/modules/ts/CMakeLists.txt
@ -4,10 +4,7 @@ if(IOS)
  ocv_module_disable(ts)
 endif()

-if(MINGW)
 set(OPENCV_MODULE_TYPE STATIC)
-endif()
-
 set(OPENCV_MODULE_IS_PART_OF_WORLD FALSE)

 if(HAVE_CUDA)
@ -22,10 +19,4 @@ ocv_glob_module_sources()
 ocv_module_include_directories()
 ocv_create_module()

-if(BUILD_SHARED_LIBS AND NOT MINGW)
-  add_definitions(-DGTEST_CREATE_SHARED_LIBRARY=1)
-else()
-  add_definitions(-DGTEST_CREATE_SHARED_LIBRARY=0)
-endif()
-
 ocv_add_precompiled_headers(${the_module})
--- a/modules/ts/include/opencv2/ts.hpp
+++ b/modules/ts/include/opencv2/ts.hpp
@ -4,14 +4,6 @@
 #include "opencv2/core/cvdef.h"
 #include <stdarg.h> // for va_list

-#include "cvconfig.h"
-
-#ifndef GTEST_CREATE_SHARED_LIBRARY
-#ifdef BUILD_SHARED_LIBS
-#define GTEST_LINKED_AS_SHARED_LIBRARY 1
-#endif
-#endif
-
 #ifdef HAVE_WINRT
    #pragma warning(disable:4447) // Disable warning 'main' signature found without threading model
 #endif
--- a/modules/ts/include/opencv2/ts/ts_perf.hpp
+++ b/modules/ts/include/opencv2/ts/ts_perf.hpp
@ -1,14 +1,6 @@
 #ifndef __OPENCV_TS_PERF_HPP__
 #define __OPENCV_TS_PERF_HPP__

-#include "cvconfig.h"
-
-#ifndef GTEST_CREATE_SHARED_LIBRARY
-#  ifdef BUILD_SHARED_LIBS
-#    define GTEST_LINKED_AS_SHARED_LIBRARY 1
-#  endif
-#endif
-
 #include "opencv2/core.hpp"
 #include "ts_gtest.h"

@ -477,9 +469,16 @@ CV_EXPORTS void PrintTo(const Size& sz, ::std::ostream* os);
    INSTANTIATE_TEST_CASE_P(/*none*/, fixture##_##name, params);\
    void fixture##_##name::PerfTestBody()

+#if defined(_MSC_VER) && (_MSC_VER <= 1400)
+#define CV_PERF_TEST_MAIN_INTERNALS_ARGS(...)	\
+    while (++argc >= (--argc,-1)) {__VA_ARGS__; break;} /*this ugly construction is needed for VS 2005*/
+#else
+#define CV_PERF_TEST_MAIN_INTERNALS_ARGS(...)	\
+    __VA_ARGS__;
+#endif

 #define CV_PERF_TEST_MAIN_INTERNALS(modulename, impls, ...)	\
-    while (++argc >= (--argc,-1)) {__VA_ARGS__; break;} /*this ugly construction is needed for VS 2005*/\
+    CV_PERF_TEST_MAIN_INTERNALS_ARGS(__VA_ARGS__) \
    ::perf::Regression::Init(#modulename);\
    ::perf::TestBase::Init(std::vector<std::string>(impls, impls + sizeof impls / sizeof *impls),\
                           argc, argv);\
--- a/modules/ts/misc/xls-report.py
+++ b/modules/ts/misc/xls-report.py
@ -259,7 +259,8 @@ def main():
        for (w, caption) in [
                (2500, 'Module'),
                (10000, 'Test'),
-                (2500, 'Image\nsize'),
+                (2000, 'Image\nwidth'),
+                (2000, 'Image\nheight'),
                (2000, 'Data\ntype'),
                (7500, 'Other parameters')]:
            sheet.col(col).width = w
@ -311,17 +312,19 @@ def main():

                image_size = next(ifilter(re_image_size.match, param_list), None)
                if image_size is not None:
-                    sheet.write(row, 2, image_size)
+                    (image_width, image_height) = map(int, image_size.split('x', 1))
+                    sheet.write(row, 2, image_width)
+                    sheet.write(row, 3, image_height)
                    del param_list[param_list.index(image_size)]

                data_type = next(ifilter(re_data_type.match, param_list), None)
                if data_type is not None:
-                    sheet.write(row, 3, data_type)
+                    sheet.write(row, 4, data_type)
                    del param_list[param_list.index(data_type)]

-                sheet.row(row).write(4, ' | '.join(param_list))
+                sheet.row(row).write(5, ' | '.join(param_list))

-                col = 5
+                col = 6

                for c in config_names:
                    if c in configs:
@ -331,18 +334,13 @@ def main():
                    col += 1
                    if args.show_times_per_pixel:
                        sheet.write(row, col,
-                          xlwt.Formula(
-                            '''
-                              {0} * 1000000 / (
-                                VALUE(MID({1}; 1; SEARCH("x"; {1}) - 1))
-                                  * VALUE(MID({1}; SEARCH("x"; {1}) + 1; LEN({1})))
-                              )
-                            '''.replace('\n', '').replace(' ', '').format(
+                          xlwt.Formula('{0} * 1000000 / ({1} * {2})'.format(
                              xlwt.Utils.rowcol_to_cell(row, col - 1),
-                              xlwt.Utils.rowcol_to_cell(row, 2)
+                              xlwt.Utils.rowcol_to_cell(row, 2),
+                              xlwt.Utils.rowcol_to_cell(row, 3)
+                          )),
+                          time_style
                        )
-                          ),
-                          time_style)
                        col += 1

                col += 1 # blank column
--- a/modules/video/doc/motion_analysis_and_object_tracking.rst
+++ b/modules/video/doc/motion_analysis_and_object_tracking.rst
@ -41,12 +41,12 @@ Calculates an optical flow for a sparse feature set using the iterative Lucas-Ka

 The function implements a sparse iterative version of the Lucas-Kanade optical flow in pyramids. See [Bouguet00]_. The function is parallelized with the TBB library.

-.. Sample code::
+.. note::

-   * : An example using the Lucas-Kanade optical flow algorithm can be found at opencv_source_code/samples/cpp/lkdemo.cpp
+   * An example using the Lucas-Kanade optical flow algorithm can be found at opencv_source_code/samples/cpp/lkdemo.cpp

-   * : PYTHON : An example using the Lucas-Kanade optical flow algorithm can be found at opencv_source_code/samples/python2/lk_track.py
-   * : PYTHON : An example using the Lucas-Kanade tracker for homography matching can be found at opencv_source_code/samples/python2/lk_homography.py
+   * (Python) An example using the Lucas-Kanade optical flow algorithm can be found at opencv_source_code/samples/python2/lk_track.py
+   * (Python) An example using the Lucas-Kanade tracker for homography matching can be found at opencv_source_code/samples/python2/lk_homography.py

 buildOpticalFlowPyramid
 -----------------------
@ -115,11 +115,11 @@ The function finds an optical flow for each ``prev`` pixel using the [Farneback2

    \texttt{prev} (y,x)  \sim \texttt{next} ( y + \texttt{flow} (y,x)[1],  x + \texttt{flow} (y,x)[0])

-.. Sample code::
+.. note::

-   * : An example using the optical flow algorithm described by Gunnar Farneback can be found at opencv_source_code/samples/cpp/fback.cpp
+   * An example using the optical flow algorithm described by Gunnar Farneback can be found at opencv_source_code/samples/cpp/fback.cpp

-   * : PYTHON : An example using the optical flow algorithm described by Gunnar Farneback can be found at opencv_source_code/samples/python2/opt_flow.py
+   * (Python) An example using the optical flow algorithm described by Gunnar Farneback can be found at opencv_source_code/samples/python2/opt_flow.py

 estimateRigidTransform
 --------------------------
@ -284,9 +284,9 @@ In fact,
 :ocv:func:`fastAtan2` and
 :ocv:func:`phase` are used so that the computed angle is measured in degrees and covers the full range 0..360. Also, the ``mask`` is filled to indicate pixels where the computed angle is valid.

-.. Sample code::
+.. note::

-   * : PYTHON : An example on how to perform a motion template technique can be found at opencv_source_code/samples/python2/motempl.py
+   * (Python) An example on how to perform a motion template technique can be found at opencv_source_code/samples/python2/motempl.py

 calcGlobalOrientation
 -------------------------
@ -368,9 +368,9 @@ First, it finds an object center using

 See the OpenCV sample ``camshiftdemo.c`` that tracks colored objects.

-.. Sample code::
+.. note::

-   * : PYTHON : A sample explaining the camshift tracking algorithm can be found at opencv_source_code/samples/python2/camshift.py
+   * (Python) A sample explaining the camshift tracking algorithm can be found at opencv_source_code/samples/python2/camshift.py

 meanShift
 ---------
@ -398,9 +398,9 @@ The function implements the iterative object search algorithm. It takes the inpu
 :ocv:func:`contourArea` ), and rendering the  remaining contours with
 :ocv:func:`drawContours` .

-.. Sample code::
+.. note::

-   * : A mean-shift tracking sample can be found at opencv_source_code/samples/cpp/camshiftdemo.cpp
+   * A mean-shift tracking sample can be found at opencv_source_code/samples/cpp/camshiftdemo.cpp

 KalmanFilter
 ------------
@ -411,9 +411,9 @@ KalmanFilter
 The class implements a standard Kalman filter
 http://en.wikipedia.org/wiki/Kalman_filter, [Welch95]_. However, you can modify ``transitionMatrix``, ``controlMatrix``, and ``measurementMatrix`` to get an extended Kalman filter functionality. See the OpenCV sample ``kalman.cpp`` .

-.. Sample code::
+.. note::

-   * : An example using the standard Kalman filter can be found at opencv_source_code/samples/cpp/kalman.cpp
+   * An example using the standard Kalman filter can be found at opencv_source_code/samples/cpp/kalman.cpp


 KalmanFilter::KalmanFilter
@ -902,9 +902,9 @@ Calculate an optical flow using "SimpleFlow" algorithm.

 See [Tao2012]_. And site of project - http://graphics.berkeley.edu/papers/Tao-SAN-2012-05/.

-.. Sample code::
+.. note::

-   * : An example using the simpleFlow algorithm can be found at opencv_source_code/samples/cpp/simpleflow_demo.cpp
+   * An example using the simpleFlow algorithm can be found at opencv_source_code/samples/cpp/simpleflow_demo.cpp

 createOptFlow_DualTVL1
 ----------------------
--- a/platforms/scripts/camera_build.conf
+++ b/platforms/scripts/camera_build.conf
@ -21,3 +21,7 @@ native_camera_r4.2.0; armeabi-v7a; 14; $ANDROID_STUB_ROOT/4.2.0
 native_camera_r4.2.0; armeabi;     14; $ANDROID_STUB_ROOT/4.2.0
 native_camera_r4.2.0; x86;         14; $ANDROID_STUB_ROOT/4.2.0
 native_camera_r4.2.0; mips;        14; $ANDROID_STUB_ROOT/4.2.0
+native_camera_r4.3.0; armeabi;     14; $ANDROID_STUB_ROOT/4.3.0
+native_camera_r4.3.0; armeabi-v7a; 14; $ANDROID_STUB_ROOT/4.3.0
+native_camera_r4.3.0; x86;         14; $ANDROID_STUB_ROOT/4.3.0
+native_camera_r4.3.0; mips;        14; $ANDROID_STUB_ROOT/4.3.0
--- a/samples/android/CMakeLists.txt
+++ b/samples/android/CMakeLists.txt
@ -9,6 +9,7 @@ ocv_warnings_disable(CMAKE_CXX_FLAGS -Wmissing-declarations)
 add_subdirectory(15-puzzle)
 add_subdirectory(face-detection)
 add_subdirectory(image-manipulations)
+add_subdirectory(camera-calibration)
 add_subdirectory(color-blob-detection)
 add_subdirectory(tutorial-1-camerapreview)
 add_subdirectory(tutorial-2-mixedprocessing)
--- a/samples/android/camera-calibration/AndroidManifest.xml
+++ b/samples/android/camera-calibration/AndroidManifest.xml
@ -26,7 +26,7 @@
                      android:largeScreens="true"
                      android:anyDensity="true" />

-    <uses-sdk android:minSdkVersion="8" android:targetSdkVersion="11" />
+    <uses-sdk android:minSdkVersion="8" />

    <uses-permission android:name="android.permission.CAMERA"/>

--- a/samples/android/camera-calibration/src/org/opencv/samples/cameracalibration/CameraCalibrator.java
+++ b/samples/android/camera-calibration/src/org/opencv/samples/cameracalibration/CameraCalibrator.java
@ -122,7 +122,7 @@ public class CameraCalibrator {
    }

    private void findPattern(Mat grayFrame) {
-        mPatternWasFound = Calib3d.findCirclesGridDefault(grayFrame, mPatternSize,
+        mPatternWasFound = Calib3d.findCirclesGrid(grayFrame, mPatternSize,
                mCorners, Calib3d.CALIB_CB_ASYMMETRIC_GRID);
    }