diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2f4fd3323d..07d8ec6f25 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -162,6 +162,7 @@ OCV_OPTION(WITH_XIMEA          "Include XIMEA cameras support"               OFF
 OCV_OPTION(WITH_XINE           "Include Xine support (GPL)"                  OFF  IF (UNIX AND NOT APPLE AND NOT ANDROID) )
 OCV_OPTION(WITH_CLP            "Include Clp support (EPL)"                   OFF)
 OCV_OPTION(WITH_OPENCL         "Include OpenCL Runtime support"              ON   IF (NOT IOS) )
+OCV_OPTION(WITH_OPENCL_SVM     "Include OpenCL Shared Virtual Memory support" OFF ) # experimental
 OCV_OPTION(WITH_OPENCLAMDFFT   "Include AMD OpenCL FFT library support"      ON   IF (NOT ANDROID AND NOT IOS) )
 OCV_OPTION(WITH_OPENCLAMDBLAS  "Include AMD OpenCL BLAS library support"     ON   IF (NOT ANDROID AND NOT IOS) )
 OCV_OPTION(WITH_DIRECTX        "Include DirectX support"                     ON   IF WIN32 )
diff --git a/cmake/OpenCVDetectOpenCL.cmake b/cmake/OpenCVDetectOpenCL.cmake
index f732546e51..ce76ad1732 100644
--- a/cmake/OpenCVDetectOpenCL.cmake
+++ b/cmake/OpenCVDetectOpenCL.cmake
@@ -26,6 +26,10 @@ if(OPENCL_FOUND)
 
   set(HAVE_OPENCL 1)
 
+  if(WITH_OPENCL_SVM)
+    set(HAVE_OPENCL_SVM 1)
+  endif()
+
   if(HAVE_OPENCL_STATIC)
     set(OPENCL_LIBRARIES "${OPENCL_LIBRARY}")
   else()
diff --git a/cmake/templates/cvconfig.h.in b/cmake/templates/cvconfig.h.in
index 3eea4fafe4..f8c1c40357 100644
--- a/cmake/templates/cvconfig.h.in
+++ b/cmake/templates/cvconfig.h.in
@@ -122,6 +122,7 @@
 /* OpenCL Support */
 #cmakedefine HAVE_OPENCL
 #cmakedefine HAVE_OPENCL_STATIC
+#cmakedefine HAVE_OPENCL_SVM
 
 /* OpenEXR codec */
 #cmakedefine HAVE_OPENEXR
diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp
index 522b8b8154..8b0d94f6e1 100644
--- a/modules/core/include/opencv2/core/mat.hpp
+++ b/modules/core/include/opencv2/core/mat.hpp
@@ -415,7 +415,7 @@ public:
                       const size_t dstofs[], const size_t dststep[], bool sync) const;
 
     // default implementation returns DummyBufferPoolController
-    virtual BufferPoolController* getBufferPoolController() const;
+    virtual BufferPoolController* getBufferPoolController(const char* id = NULL) const;
 };
 
 
@@ -481,7 +481,7 @@ struct CV_EXPORTS UMatData
     int refcount;
     uchar* data;
     uchar* origdata;
-    size_t size, capacity;
+    size_t size;
 
     int flags;
     void* handle;
diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp
index 3b023fb09e..f87e15ee6a 100644
--- a/modules/core/include/opencv2/core/ocl.hpp
+++ b/modules/core/include/opencv2/core/ocl.hpp
@@ -56,6 +56,8 @@ CV_EXPORTS_W bool haveAmdFft();
 CV_EXPORTS_W void setUseOpenCL(bool flag);
 CV_EXPORTS_W void finish();
 
+CV_EXPORTS bool haveSVM();
+
 class CV_EXPORTS Context;
 class CV_EXPORTS Device;
 class CV_EXPORTS Kernel;
@@ -248,7 +250,10 @@ public:
     void* ptr() const;
 
     friend void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
-protected:
+
+    bool useSVM() const;
+    void setUseSVM(bool enabled);
+
     struct Impl;
     Impl* p;
 };
@@ -666,8 +671,17 @@ protected:
 
 CV_EXPORTS MatAllocator* getOpenCLAllocator();
 
-CV_EXPORTS_W bool isPerformanceCheckBypassed();
-#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::isPerformanceCheckBypassed() || (condition))
+
+#ifdef __OPENCV_BUILD
+namespace internal {
+
+CV_EXPORTS bool isPerformanceCheckBypassed();
+#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::internal::isPerformanceCheckBypassed() || (condition))
+
+CV_EXPORTS bool isCLBuffer(UMat& u);
+
+} // namespace internal
+#endif
 
 //! @}
 
diff --git a/modules/core/include/opencv2/core/opencl/opencl_svm.hpp b/modules/core/include/opencv2/core/opencl/opencl_svm.hpp
new file mode 100644
index 0000000000..e9f7ba0232
--- /dev/null
+++ b/modules/core/include/opencv2/core/opencl/opencl_svm.hpp
@@ -0,0 +1,81 @@
+/* See LICENSE file in the root OpenCV directory */
+
+#ifndef __OPENCV_CORE_OPENCL_SVM_HPP__
+#define __OPENCV_CORE_OPENCL_SVM_HPP__
+
+//
+// Internal usage only (binary compatibility is not guaranteed)
+//
+#ifndef __OPENCV_BUILD
+#error Internal header file
+#endif
+
+#if defined(HAVE_OPENCL) && defined(HAVE_OPENCL_SVM)
+#include "runtime/opencl_core.hpp"
+#include "runtime/opencl_svm_20.hpp"
+#include "runtime/opencl_svm_hsa_extension.hpp"
+
+namespace cv { namespace ocl { namespace svm {
+
+struct SVMCapabilities
+{
+    enum Value
+    {
+        SVM_COARSE_GRAIN_BUFFER = (1 << 0),
+        SVM_FINE_GRAIN_BUFFER = (1 << 1),
+        SVM_FINE_GRAIN_SYSTEM = (1 << 2),
+        SVM_ATOMICS = (1 << 3),
+    };
+    int value_;
+
+    SVMCapabilities(int capabilities = 0) : value_(capabilities) { }
+    operator int() const { return value_; }
+
+    inline bool isNoSVMSupport() const { return value_ == 0; }
+    inline bool isSupportCoarseGrainBuffer() const { return (value_ & SVM_COARSE_GRAIN_BUFFER) != 0; }
+    inline bool isSupportFineGrainBuffer() const { return (value_ & SVM_FINE_GRAIN_BUFFER) != 0; }
+    inline bool isSupportFineGrainSystem() const { return (value_ & SVM_FINE_GRAIN_SYSTEM) != 0; }
+    inline bool isSupportAtomics() const { return (value_ & SVM_ATOMICS) != 0; }
+};
+
+CV_EXPORTS const SVMCapabilities getSVMCapabilitites(const ocl::Context& context);
+
+struct SVMFunctions
+{
+    clSVMAllocAMD_fn fn_clSVMAlloc;
+    clSVMFreeAMD_fn fn_clSVMFree;
+    clSetKernelArgSVMPointerAMD_fn fn_clSetKernelArgSVMPointer;
+    //clSetKernelExecInfoAMD_fn fn_clSetKernelExecInfo;
+    //clEnqueueSVMFreeAMD_fn fn_clEnqueueSVMFree;
+    clEnqueueSVMMemcpyAMD_fn fn_clEnqueueSVMMemcpy;
+    clEnqueueSVMMemFillAMD_fn fn_clEnqueueSVMMemFill;
+    clEnqueueSVMMapAMD_fn fn_clEnqueueSVMMap;
+    clEnqueueSVMUnmapAMD_fn fn_clEnqueueSVMUnmap;
+
+    inline SVMFunctions()
+        : fn_clSVMAlloc(NULL), fn_clSVMFree(NULL),
+          fn_clSetKernelArgSVMPointer(NULL), /*fn_clSetKernelExecInfo(NULL),*/
+          /*fn_clEnqueueSVMFree(NULL),*/ fn_clEnqueueSVMMemcpy(NULL), fn_clEnqueueSVMMemFill(NULL),
+          fn_clEnqueueSVMMap(NULL), fn_clEnqueueSVMUnmap(NULL)
+    {
+        // nothing
+    }
+
+    inline bool isValid() const
+    {
+        return fn_clSVMAlloc != NULL && fn_clSVMFree && fn_clSetKernelArgSVMPointer &&
+                /*fn_clSetKernelExecInfo && fn_clEnqueueSVMFree &&*/ fn_clEnqueueSVMMemcpy &&
+                fn_clEnqueueSVMMemFill && fn_clEnqueueSVMMap && fn_clEnqueueSVMUnmap;
+    }
+};
+
+// We should guarantee that SVMFunctions lifetime is not less than context's lifetime
+CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context);
+
+CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags);
+
+}}} //namespace cv::ocl::svm
+#endif
+
+#endif // __OPENCV_CORE_OPENCL_SVM_HPP__
+/* End of file. */
diff --git a/modules/core/include/opencv2/core/opencl/runtime/opencl_core.hpp b/modules/core/include/opencv2/core/opencl/runtime/opencl_core.hpp
index b19563cbc2..bd30f813d7 100644
--- a/modules/core/include/opencv2/core/opencl/runtime/opencl_core.hpp
+++ b/modules/core/include/opencv2/core/opencl/runtime/opencl_core.hpp
@@ -62,6 +62,18 @@
 #endif
 #endif
 
+#ifdef HAVE_OPENCL_SVM
+#define clSVMAlloc clSVMAlloc_
+#define clSVMFree clSVMFree_
+#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_
+#define clSetKernelExecInfo clSetKernelExecInfo_
+#define clEnqueueSVMFree clEnqueueSVMFree_
+#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_
+#define clEnqueueSVMMemFill clEnqueueSVMMemFill_
+#define clEnqueueSVMMap clEnqueueSVMMap_
+#define clEnqueueSVMUnmap clEnqueueSVMUnmap_
+#endif
+
 #include "autogenerated/opencl_core.hpp"
 
 #endif // HAVE_OPENCL_STATIC
diff --git a/modules/core/include/opencv2/core/opencl/runtime/opencl_svm_20.hpp b/modules/core/include/opencv2/core/opencl/runtime/opencl_svm_20.hpp
new file mode 100644
index 0000000000..7f0ff91d12
--- /dev/null
+++ b/modules/core/include/opencv2/core/opencl/runtime/opencl_svm_20.hpp
@@ -0,0 +1,52 @@
+/* See LICENSE file in the root OpenCV directory */
+
+#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__
+#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__
+
+#if defined(HAVE_OPENCL_SVM)
+#include "opencl_core.hpp"
+
+#include "opencl_svm_definitions.hpp"
+
+#ifndef HAVE_OPENCL_STATIC
+
+#undef clSVMAlloc
+#define clSVMAlloc clSVMAlloc_pfn
+#undef clSVMFree
+#define clSVMFree clSVMFree_pfn
+#undef clSetKernelArgSVMPointer
+#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_pfn
+#undef clSetKernelExecInfo
+//#define clSetKernelExecInfo clSetKernelExecInfo_pfn
+#undef clEnqueueSVMFree
+//#define clEnqueueSVMFree clEnqueueSVMFree_pfn
+#undef clEnqueueSVMMemcpy
+#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_pfn
+#undef clEnqueueSVMMemFill
+#define clEnqueueSVMMemFill clEnqueueSVMMemFill_pfn
+#undef clEnqueueSVMMap
+#define clEnqueueSVMMap clEnqueueSVMMap_pfn
+#undef clEnqueueSVMUnmap
+#define clEnqueueSVMUnmap clEnqueueSVMUnmap_pfn
+
+extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSVMAlloc)(cl_context context, cl_svm_mem_flags flags, size_t size, unsigned int alignment);
+extern CL_RUNTIME_EXPORT void (CL_API_CALL *clSVMFree)(cl_context context, void* svm_pointer);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clSetKernelArgSVMPointer)(cl_kernel kernel, cl_uint arg_index, const void* arg_value);
+//extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSetKernelExecInfo)(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void* param_value);
+//extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMFree)(cl_command_queue command_queue, cl_uint num_svm_pointers, void* svm_pointers[],
+//        void (CL_CALLBACK *pfn_free_func)(cl_command_queue queue, cl_uint num_svm_pointers, void* svm_pointers[], void* user_data), void* user_data,
+//        cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMemcpy)(cl_command_queue command_queue, cl_bool blocking_copy, void* dst_ptr, const void* src_ptr, size_t size,
+        cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMemFill)(cl_command_queue command_queue, void* svm_ptr, const void* pattern, size_t pattern_size, size_t size,
+        cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMap)(cl_command_queue command_queue, cl_bool blocking_map, cl_map_flags map_flags, void* svm_ptr, size_t size,
+        cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMUnmap)(cl_command_queue command_queue, void* svm_ptr,
+        cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
+
+#endif // HAVE_OPENCL_STATIC
+
+#endif // HAVE_OPENCL_SVM
+
+#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__
diff --git a/modules/core/include/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp b/modules/core/include/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp
new file mode 100644
index 0000000000..a4fd5fc810
--- /dev/null
+++ b/modules/core/include/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp
@@ -0,0 +1,42 @@
+/* See LICENSE file in the root OpenCV directory */
+
+#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__
+#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__
+
+#if defined(HAVE_OPENCL_SVM)
+#if defined(CL_VERSION_2_0)
+
+// OpenCL 2.0 contains SVM definitions
+
+#else
+
+typedef cl_bitfield cl_device_svm_capabilities;
+typedef cl_bitfield cl_svm_mem_flags;
+typedef cl_uint     cl_kernel_exec_info;
+
+//
+// TODO Add real values after OpenCL 2.0 release
+//
+
+#ifndef CL_DEVICE_SVM_CAPABILITIES
+#define CL_DEVICE_SVM_CAPABILITIES 0x1053
+
+#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER             (1 << 0)
+#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER               (1 << 1)
+#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM               (1 << 2)
+#define CL_DEVICE_SVM_ATOMICS                         (1 << 3)
+#endif
+
+#ifndef CL_MEM_SVM_FINE_GRAIN_BUFFER
+#define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10)
+#endif
+
+#ifndef CL_MEM_SVM_ATOMICS
+#define CL_MEM_SVM_ATOMICS (1 << 11)
+#endif
+
+
+#endif // CL_VERSION_2_0
+#endif // HAVE_OPENCL_SVM
+
+#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__
diff --git a/modules/core/include/opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp b/modules/core/include/opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp
new file mode 100644
index 0000000000..9e50408f06
--- /dev/null
+++ b/modules/core/include/opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp
@@ -0,0 +1,166 @@
+/* See LICENSE file in the root OpenCV directory */
+
+#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__
+#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__
+
+#if defined(HAVE_OPENCL_SVM)
+#include "opencl_core.hpp"
+
+#ifndef CL_DEVICE_SVM_CAPABILITIES_AMD
+//
+//  Part of the file is an extract from the cl_ext.h file from AMD APP SDK package.
+//  Below is the original copyright.
+//
+/*******************************************************************************
+ * Copyright (c) 2008-2013 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ ******************************************************************************/
+
+/*******************************************
+ * Shared Virtual Memory (SVM) extension
+ *******************************************/
+typedef cl_bitfield                      cl_device_svm_capabilities_amd;
+typedef cl_bitfield                      cl_svm_mem_flags_amd;
+typedef cl_uint                          cl_kernel_exec_info_amd;
+
+/* cl_device_info */
+#define CL_DEVICE_SVM_CAPABILITIES_AMD                     0x1053
+#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT_AMD  0x1054
+
+/* cl_device_svm_capabilities_amd */
+#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD             (1 << 0)
+#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_AMD               (1 << 1)
+#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_AMD               (1 << 2)
+#define CL_DEVICE_SVM_ATOMICS_AMD                         (1 << 3)
+
+/* cl_svm_mem_flags_amd */
+#define CL_MEM_SVM_FINE_GRAIN_BUFFER_AMD                  (1 << 10)
+#define CL_MEM_SVM_ATOMICS_AMD                            (1 << 11)
+
+/* cl_mem_info */
+#define CL_MEM_USES_SVM_POINTER_AMD                       0x1109
+
+/* cl_kernel_exec_info_amd */
+#define CL_KERNEL_EXEC_INFO_SVM_PTRS_AMD                  0x11B6
+#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_AMD     0x11B7
+
+/* cl_command_type */
+#define CL_COMMAND_SVM_FREE_AMD                           0x1209
+#define CL_COMMAND_SVM_MEMCPY_AMD                         0x120A
+#define CL_COMMAND_SVM_MEMFILL_AMD                        0x120B
+#define CL_COMMAND_SVM_MAP_AMD                            0x120C
+#define CL_COMMAND_SVM_UNMAP_AMD                          0x120D
+
+typedef CL_API_ENTRY void*
+(CL_API_CALL * clSVMAllocAMD_fn)(
+    cl_context            /* context */,
+    cl_svm_mem_flags_amd  /* flags */,
+    size_t                /* size */,
+    unsigned int          /* alignment */
+) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY void
+(CL_API_CALL * clSVMFreeAMD_fn)(
+    cl_context  /* context */,
+    void*       /* svm_pointer */
+) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int
+(CL_API_CALL * clEnqueueSVMFreeAMD_fn)(
+    cl_command_queue /* command_queue */,
+    cl_uint          /* num_svm_pointers */,
+    void**           /* svm_pointers */,
+    void (CL_CALLBACK *)( /*pfn_free_func*/
+        cl_command_queue /* queue */,
+        cl_uint          /* num_svm_pointers */,
+        void**           /* svm_pointers */,
+        void*            /* user_data */),
+    void*             /* user_data */,
+    cl_uint           /* num_events_in_wait_list */,
+    const cl_event*   /* event_wait_list */,
+    cl_event*         /* event */
+) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int
+(CL_API_CALL * clEnqueueSVMMemcpyAMD_fn)(
+    cl_command_queue /* command_queue */,
+    cl_bool          /* blocking_copy */,
+    void*            /* dst_ptr */,
+    const void*      /* src_ptr */,
+    size_t           /* size */,
+    cl_uint          /* num_events_in_wait_list */,
+    const cl_event*  /* event_wait_list */,
+    cl_event*        /* event */
+) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int
+(CL_API_CALL * clEnqueueSVMMemFillAMD_fn)(
+    cl_command_queue /* command_queue */,
+    void*            /* svm_ptr */,
+    const void*      /* pattern */,
+    size_t           /* pattern_size */,
+    size_t           /* size */,
+    cl_uint          /* num_events_in_wait_list */,
+    const cl_event*  /* event_wait_list */,
+    cl_event*        /* event */
+) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int
+(CL_API_CALL * clEnqueueSVMMapAMD_fn)(
+    cl_command_queue /* command_queue */,
+    cl_bool          /* blocking_map */,
+    cl_map_flags     /* map_flags */,
+    void*            /* svm_ptr */,
+    size_t           /* size */,
+    cl_uint          /* num_events_in_wait_list */,
+    const cl_event*  /* event_wait_list */,
+    cl_event*        /* event */
+) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int
+(CL_API_CALL * clEnqueueSVMUnmapAMD_fn)(
+    cl_command_queue /* command_queue */,
+    void*            /* svm_ptr */,
+    cl_uint          /* num_events_in_wait_list */,
+    const cl_event*  /* event_wait_list */,
+    cl_event*        /* event */
+) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int
+(CL_API_CALL * clSetKernelArgSVMPointerAMD_fn)(
+    cl_kernel     /* kernel */,
+    cl_uint       /* arg_index */,
+    const void *  /* arg_value */
+) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int
+(CL_API_CALL * clSetKernelExecInfoAMD_fn)(
+     cl_kernel                /* kernel */,
+     cl_kernel_exec_info_amd  /* param_name */,
+     size_t                   /* param_value_size */,
+     const void *             /* param_value */
+) CL_EXT_SUFFIX__VERSION_1_2;
+
+#endif
+
+#endif // HAVE_OPENCL_SVM
+
+#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__
diff --git a/modules/core/src/matmul.cpp b/modules/core/src/matmul.cpp
index 6c8bad2444..feffc8d32f 100644
--- a/modules/core/src/matmul.cpp
+++ b/modules/core/src/matmul.cpp
@@ -721,6 +721,16 @@ static bool ocl_gemm_amdblas( InputArray matA, InputArray matB, double alpha,
         return false;
 
     UMat A = matA.getUMat(), B = matB.getUMat(), D = matD.getUMat();
+    if (!ocl::internal::isCLBuffer(A) || !ocl::internal::isCLBuffer(B) || !ocl::internal::isCLBuffer(D))
+    {
+        return false;
+    }
+    if (haveC)
+    {
+        UMat C = matC.getUMat();
+        if (!ocl::internal::isCLBuffer(C))
+            return false;
+    }
     if (haveC)
         ctrans ? transpose(matC, D) : matC.copyTo(D);
     else
diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp
index 3bd9f2c622..e1e9caa837 100644
--- a/modules/core/src/matrix.cpp
+++ b/modules/core/src/matrix.cpp
@@ -159,8 +159,9 @@ void MatAllocator::copy(UMatData* usrc, UMatData* udst, int dims, const size_t s
         memcpy(ptrs[1], ptrs[0], planesz);
 }
 
-BufferPoolController* MatAllocator::getBufferPoolController() const
+BufferPoolController* MatAllocator::getBufferPoolController(const char* id) const
 {
+    (void)id;
     static DummyBufferPoolController dummy;
     return &dummy;
 }
diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp
index c18d8ba61a..efe3b936d5 100644
--- a/modules/core/src/ocl.cpp
+++ b/modules/core/src/ocl.cpp
@@ -48,6 +48,8 @@
 
 #define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0
 #define CV_OPENCL_SHOW_RUN_ERRORS       0
+#define CV_OPENCL_SHOW_SVM_ERROR_LOG    1
+#define CV_OPENCL_SHOW_SVM_LOG          0
 
 #include "opencv2/core/bufferpool.hpp"
 #ifndef LOG_BUFFER_POOL
@@ -111,6 +113,20 @@ static size_t getConfigurationParameterForSize(const char* name, size_t defaultV
     CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str()));
 }
 
+#if CV_OPENCL_SHOW_SVM_LOG
+// TODO add timestamp logging
+#define CV_OPENCL_SVM_TRACE_P printf("line %d (ocl.cpp): ", __LINE__); printf
+#else
+#define CV_OPENCL_SVM_TRACE_P(...)
+#endif
+
+#if CV_OPENCL_SHOW_SVM_ERROR_LOG
+// TODO add timestamp logging
+#define CV_OPENCL_SVM_TRACE_ERROR_P printf("Error on line %d (ocl.cpp): ", __LINE__); printf
+#else
+#define CV_OPENCL_SVM_TRACE_ERROR_P(...)
+#endif
+
 #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
 #include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
 
@@ -920,6 +936,7 @@ OCL_FUNC(cl_int, clGetSupportedImageFormats,
  cl_uint * num_image_formats),
  (context, flags, image_type, num_entries, image_formats, num_image_formats))
 
+
 /*
 OCL_FUNC(cl_int, clGetMemObjectInfo,
  (cl_mem memobj,
@@ -1342,6 +1359,12 @@ static bool isRaiseError()
 #define CV_OclDbgAssert(expr) do { if (isRaiseError()) { CV_Assert(expr); } else { (void)(expr); } } while ((void)0, 0)
 #endif
 
+#ifdef HAVE_OPENCL_SVM
+#include "opencv2/core/opencl/runtime/opencl_svm_20.hpp"
+#include "opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp"
+#include "opencv2/core/opencl/opencl_svm.hpp"
+#endif
+
 namespace cv { namespace ocl {
 
 struct UMat2D
@@ -1627,6 +1650,15 @@ bool haveAmdFft()
 
 #endif
 
+bool haveSVM()
+{
+#ifdef HAVE_OPENCL_SVM
+    return true;
+#else
+    return false;
+#endif
+}
+
 void finish()
 {
     Queue::getDefault().finish();
@@ -2357,12 +2389,86 @@ not_found:
 }
 #endif
 
+#ifdef HAVE_OPENCL_SVM
+namespace svm {
+
+enum AllocatorFlags { // don't use first 16 bits
+        OPENCL_SVM_COARSE_GRAIN_BUFFER = 1 << 16, // clSVMAlloc + SVM map/unmap
+        OPENCL_SVM_FINE_GRAIN_BUFFER = 2 << 16, // clSVMAlloc
+        OPENCL_SVM_FINE_GRAIN_SYSTEM = 3 << 16, // direct access
+        OPENCL_SVM_BUFFER_MASK = 3 << 16,
+        OPENCL_SVM_BUFFER_MAP = 4 << 16
+};
+
+static bool checkForceSVMUmatUsage()
+{
+    static bool initialized = false;
+    static bool force = false;
+    if (!initialized)
+    {
+        force = getBoolParameter("OPENCV_OPENCL_SVM_FORCE_UMAT_USAGE", false);
+        initialized = true;
+    }
+    return force;
+}
+static bool checkDisableSVMUMatUsage()
+{
+    static bool initialized = false;
+    static bool force = false;
+    if (!initialized)
+    {
+        force = getBoolParameter("OPENCV_OPENCL_SVM_DISABLE_UMAT_USAGE", false);
+        initialized = true;
+    }
+    return force;
+}
+static bool checkDisableSVM()
+{
+    static bool initialized = false;
+    static bool force = false;
+    if (!initialized)
+    {
+        force = getBoolParameter("OPENCV_OPENCL_SVM_DISABLE", false);
+        initialized = true;
+    }
+    return force;
+}
+// see SVMCapabilities
+static unsigned int getSVMCapabilitiesMask()
+{
+    static bool initialized = false;
+    static unsigned int mask = 0;
+    if (!initialized)
+    {
+        const char* envValue = getenv("OPENCV_OPENCL_SVM_CAPABILITIES_MASK");
+        if (envValue == NULL)
+        {
+            return ~0U; // all bits 1
+        }
+        mask = atoi(envValue);
+        initialized = true;
+    }
+    return mask;
+}
+} // namespace
+#endif
+
 struct Context::Impl
 {
-    Impl()
+    static Context::Impl* get(Context& context) { return context.p; }
+
+    void __init()
     {
         refcount = 1;
         handle = 0;
+#ifdef HAVE_OPENCL_SVM
+        svmInitialized = false;
+#endif
+    }
+
+    Impl()
+    {
+        __init();
     }
 
     void setDefault()
@@ -2401,8 +2507,7 @@ struct Context::Impl
 
     Impl(int dtype0)
     {
-        refcount = 1;
-        handle = 0;
+        __init();
 
         cl_int retval = 0;
         cl_platform_id pl = (cl_platform_id)Platform::getDefault().ptr();
@@ -2419,7 +2524,7 @@ struct Context::Impl
         AutoBuffer<void*> dlistbuf(nd0*2+1);
         cl_device_id* dlist = (cl_device_id*)(void**)dlistbuf;
         cl_device_id* dlist_new = dlist + nd0;
-        CV_OclDbgAssert(clGetDeviceIDs(	pl, dtype, nd0, dlist, &nd0 ) == CL_SUCCESS);
+        CV_OclDbgAssert(clGetDeviceIDs( pl, dtype, nd0, dlist, &nd0 ) == CL_SUCCESS);
         String name0;
 
         for(i = 0; i < nd0; i++)
@@ -2496,6 +2601,144 @@ struct Context::Impl
     };
     typedef std::map<HashKey, Program> phash_t;
     phash_t phash;
+
+#ifdef HAVE_OPENCL_SVM
+    bool svmInitialized;
+    bool svmAvailable;
+    bool svmEnabled;
+    svm::SVMCapabilities svmCapabilities;
+    svm::SVMFunctions svmFunctions;
+
+    void svmInit()
+    {
+        CV_Assert(handle != NULL);
+        const Device& device = devices[0];
+        cl_device_svm_capabilities deviceCaps = 0;
+        CV_Assert(((void)0, CL_DEVICE_SVM_CAPABILITIES == CL_DEVICE_SVM_CAPABILITIES_AMD)); // Check assumption
+        cl_int status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_SVM_CAPABILITIES, sizeof(deviceCaps), &deviceCaps, NULL);
+        if (status != CL_SUCCESS)
+        {
+            CV_OPENCL_SVM_TRACE_ERROR_P("CL_DEVICE_SVM_CAPABILITIES via clGetDeviceInfo failed: %d\n", status);
+            goto noSVM;
+        }
+        CV_OPENCL_SVM_TRACE_P("CL_DEVICE_SVM_CAPABILITIES returned: 0x%x\n", (int)deviceCaps);
+        CV_Assert(((void)0, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER == CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD)); // Check assumption
+        svmCapabilities.value_ =
+                ((deviceCaps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_COARSE_GRAIN_BUFFER : 0) |
+                ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_FINE_GRAIN_BUFFER : 0) |
+                ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) ? svm::SVMCapabilities::SVM_FINE_GRAIN_SYSTEM : 0) |
+                ((deviceCaps & CL_DEVICE_SVM_ATOMICS) ? svm::SVMCapabilities::SVM_ATOMICS : 0);
+        svmCapabilities.value_ &= svm::getSVMCapabilitiesMask();
+        if (svmCapabilities.value_ == 0)
+        {
+            CV_OPENCL_SVM_TRACE_ERROR_P("svmCapabilities is empty\n");
+            goto noSVM;
+        }
+        try
+        {
+            // Try OpenCL 2.0
+            CV_OPENCL_SVM_TRACE_P("Try SVM from OpenCL 2.0 ...\n");
+            void* ptr = clSVMAlloc(handle, CL_MEM_READ_WRITE, 100, 0);
+            if (!ptr)
+            {
+                CV_OPENCL_SVM_TRACE_ERROR_P("clSVMAlloc returned NULL...\n");
+                CV_ErrorNoReturn(Error::StsBadArg, "clSVMAlloc returned NULL");
+            }
+            try
+            {
+                bool error = false;
+                cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
+                if (CL_SUCCESS != clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE, ptr, 100, 0, NULL, NULL))
+                {
+                    CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMMap failed...\n");
+                    CV_ErrorNoReturn(Error::StsBadArg, "clEnqueueSVMMap FAILED");
+                }
+                clFinish(q);
+                try
+                {
+                    ((int*)ptr)[0] = 100;
+                }
+                catch (...)
+                {
+                    CV_OPENCL_SVM_TRACE_ERROR_P("SVM buffer access test FAILED\n");
+                    error = true;
+                }
+                if (CL_SUCCESS != clEnqueueSVMUnmap(q, ptr, 0, NULL, NULL))
+                {
+                    CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMUnmap failed...\n");
+                    CV_ErrorNoReturn(Error::StsBadArg, "clEnqueueSVMUnmap FAILED");
+                }
+                clFinish(q);
+                if (error)
+                {
+                    CV_ErrorNoReturn(Error::StsBadArg, "OpenCL SVM buffer access test was FAILED");
+                }
+            }
+            catch (...)
+            {
+                CV_OPENCL_SVM_TRACE_ERROR_P("OpenCL SVM buffer access test was FAILED\n");
+                clSVMFree(handle, ptr);
+                throw;
+            }
+            clSVMFree(handle, ptr);
+            svmFunctions.fn_clSVMAlloc = clSVMAlloc;
+            svmFunctions.fn_clSVMFree = clSVMFree;
+            svmFunctions.fn_clSetKernelArgSVMPointer = clSetKernelArgSVMPointer;
+            //svmFunctions.fn_clSetKernelExecInfo = clSetKernelExecInfo;
+            //svmFunctions.fn_clEnqueueSVMFree = clEnqueueSVMFree;
+            svmFunctions.fn_clEnqueueSVMMemcpy = clEnqueueSVMMemcpy;
+            svmFunctions.fn_clEnqueueSVMMemFill = clEnqueueSVMMemFill;
+            svmFunctions.fn_clEnqueueSVMMap = clEnqueueSVMMap;
+            svmFunctions.fn_clEnqueueSVMUnmap = clEnqueueSVMUnmap;
+        }
+        catch (...)
+        {
+            CV_OPENCL_SVM_TRACE_P("clSVMAlloc failed, trying HSA extension...\n");
+            try
+            {
+                // Try HSA extension
+                String extensions = device.extensions();
+                if (extensions.find("cl_amd_svm") == String::npos)
+                {
+                    CV_OPENCL_SVM_TRACE_P("Device extension doesn't have cl_amd_svm: %s\n", extensions.c_str());
+                    goto noSVM;
+                }
+                cl_platform_id p = NULL;
+                status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &p, NULL);
+                CV_Assert(status == CL_SUCCESS);
+                svmFunctions.fn_clSVMAlloc = (clSVMAllocAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMAllocAMD");
+                svmFunctions.fn_clSVMFree = (clSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMFreeAMD");
+                svmFunctions.fn_clSetKernelArgSVMPointer = (clSetKernelArgSVMPointerAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelArgSVMPointerAMD");
+                //svmFunctions.fn_clSetKernelExecInfo = (clSetKernelExecInfoAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelExecInfoAMD");
+                //svmFunctions.fn_clEnqueueSVMFree = (clEnqueueSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMFreeAMD");
+                svmFunctions.fn_clEnqueueSVMMemcpy = (clEnqueueSVMMemcpyAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemcpyAMD");
+                svmFunctions.fn_clEnqueueSVMMemFill = (clEnqueueSVMMemFillAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemFillAMD");
+                svmFunctions.fn_clEnqueueSVMMap = (clEnqueueSVMMapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMapAMD");
+                svmFunctions.fn_clEnqueueSVMUnmap = (clEnqueueSVMUnmapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMUnmapAMD");
+                CV_Assert(svmFunctions.isValid());
+            }
+            catch (...)
+            {
+                CV_OPENCL_SVM_TRACE_P("Something is totally wrong\n");
+                goto noSVM;
+            }
+        }
+
+        svmAvailable = true;
+        svmEnabled = !svm::checkDisableSVM();
+        svmInitialized = true;
+        CV_OPENCL_SVM_TRACE_P("OpenCV OpenCL SVM support initialized\n");
+        return;
+    noSVM:
+        CV_OPENCL_SVM_TRACE_P("OpenCL SVM is not detected\n");
+        svmAvailable = false;
+        svmEnabled = false;
+        svmCapabilities.value_ = 0;
+        svmInitialized = true;
+        svmFunctions.fn_clSVMAlloc = NULL;
+        return;
+    }
+#endif
 };
 
 
@@ -2610,6 +2853,71 @@ Program Context::getProg(const ProgramSource& prog,
     return p ? p->getProg(prog, buildopts, errmsg) : Program();
 }
 
+
+
+#ifdef HAVE_OPENCL_SVM
+bool Context::useSVM() const
+{
+    Context::Impl* i = p;
+    CV_Assert(i);
+    if (!i->svmInitialized)
+        i->svmInit();
+    return i->svmEnabled;
+}
+void Context::setUseSVM(bool enabled)
+{
+    Context::Impl* i = p;
+    CV_Assert(i);
+    if (!i->svmInitialized)
+        i->svmInit();
+    if (enabled && !i->svmAvailable)
+    {
+        CV_ErrorNoReturn(Error::StsError, "OpenCL Shared Virtual Memory (SVM) is not supported by OpenCL device");
+    }
+    i->svmEnabled = enabled;
+}
+#else
+bool Context::useSVM() const { return false; }
+void Context::setUseSVM(bool enabled) { CV_Assert(!enabled); }
+#endif
+
+#ifdef HAVE_OPENCL_SVM
+namespace svm {
+
+const SVMCapabilities getSVMCapabilitites(const ocl::Context& context)
+{
+    Context::Impl* i = context.p;
+    CV_Assert(i);
+    if (!i->svmInitialized)
+        i->svmInit();
+    return i->svmCapabilities;
+}
+
+CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context)
+{
+    Context::Impl* i = context.p;
+    CV_Assert(i);
+    CV_Assert(i->svmInitialized); // getSVMCapabilitites() must be called first
+    CV_Assert(i->svmFunctions.fn_clSVMAlloc != NULL);
+    return &i->svmFunctions;
+}
+
+CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags)
+{
+    if (checkForceSVMUmatUsage())
+        return true;
+    if (checkDisableSVMUMatUsage())
+        return false;
+    if ((usageFlags & USAGE_ALLOCATE_SHARED_MEMORY) != 0)
+        return true;
+    return false; // don't use SVM by default
+}
+
+} // namespace cv::ocl::svm
+#endif // HAVE_OPENCL_SVM
+
+
+
 void initializeContextFromHandle(Context& ctx, void* platform, void* _context, void* _device)
 {
     cl_context context = (cl_context)_context;
@@ -2979,12 +3287,33 @@ int Kernel::set(int i, const KernelArg& arg)
             return -1;
         }
 
+#ifdef HAVE_OPENCL_SVM
+        if ((arg.m->u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
+        {
+            const Context& ctx = Context::getDefault();
+            const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
+            uchar*& svmDataPtr = (uchar*&)arg.m->u->handle;
+            CV_OPENCL_SVM_TRACE_P("clSetKernelArgSVMPointer: %p\n", svmDataPtr);
+#if 1 // TODO
+            cl_int status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, svmDataPtr);
+#else
+            cl_int status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, &svmDataPtr);
+#endif
+            CV_Assert(status == CL_SUCCESS);
+        }
+        else
+#endif
+        {
+            CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h) == CL_SUCCESS);
+        }
+
         if (ptronly)
-            CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i++, sizeof(h), &h) == CL_SUCCESS);
+        {
+            i++;
+        }
         else if( arg.m->dims <= 2 )
         {
             UMat2D u2d(*arg.m);
-            CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h) == CL_SUCCESS);
             CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step) == CL_SUCCESS);
             CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset) == CL_SUCCESS);
             i += 3;
@@ -3000,7 +3329,6 @@ int Kernel::set(int i, const KernelArg& arg)
         else
         {
             UMat3D u3d(*arg.m);
-            CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h) == CL_SUCCESS);
             CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.slicestep), &u3d.slicestep) == CL_SUCCESS);
             CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.step), &u3d.step) == CL_SUCCESS);
             CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(u3d.offset), &u3d.offset) == CL_SUCCESS);
@@ -3433,39 +3761,55 @@ ProgramSource::hash_t ProgramSource::hash() const
 
 //////////////////////////////////////////// OpenCLAllocator //////////////////////////////////////////////////
 
+template<typename T>
 class OpenCLBufferPool
 {
 protected:
     ~OpenCLBufferPool() { }
 public:
-    virtual cl_mem allocate(size_t size, CV_OUT size_t& capacity) = 0;
-    virtual void release(cl_mem handle, size_t capacity) = 0;
+    virtual T allocate(size_t size) = 0;
+    virtual void release(T buffer) = 0;
 };
 
-class OpenCLBufferPoolImpl : public BufferPoolController, public OpenCLBufferPool
+template <typename Derived, typename BufferEntry, typename T>
+class OpenCLBufferPoolBaseImpl : public BufferPoolController, public OpenCLBufferPool<T>
 {
-public:
-    struct BufferEntry
-    {
-        cl_mem clBuffer_;
-        size_t capacity_;
-    };
+private:
+    inline Derived& derived() { return *static_cast<Derived*>(this); }
 protected:
     Mutex mutex_;
 
     size_t currentReservedSize;
     size_t maxReservedSize;
 
-    std::list<BufferEntry> reservedEntries_; // LRU order
+    std::list<BufferEntry> allocatedEntries_; // Allocated and used entries
+    std::list<BufferEntry> reservedEntries_; // LRU order. Allocated, but not used entries
+
+    // synchronized
+    bool _findAndRemoveEntryFromAllocatedList(CV_OUT BufferEntry& entry, T buffer)
+    {
+        typename std::list<BufferEntry>::iterator i = allocatedEntries_.begin();
+        for (; i != allocatedEntries_.end(); ++i)
+        {
+            BufferEntry& e = *i;
+            if (e.clBuffer_ == buffer)
+            {
+                entry = e;
+                allocatedEntries_.erase(i);
+                return true;
+            }
+        }
+        return false;
+    }
 
     // synchronized
     bool _findAndRemoveEntryFromReservedList(CV_OUT BufferEntry& entry, const size_t size)
     {
         if (reservedEntries_.empty())
             return false;
-        std::list<BufferEntry>::iterator i = reservedEntries_.begin();
-        std::list<BufferEntry>::iterator result_pos = reservedEntries_.end();
-        BufferEntry result = {NULL, 0};
+        typename std::list<BufferEntry>::iterator i = reservedEntries_.begin();
+        typename std::list<BufferEntry>::iterator result_pos = reservedEntries_.end();
+        BufferEntry result;
         size_t minDiff = (size_t)(-1);
         for (; i != reservedEntries_.end(); ++i)
         {
@@ -3489,6 +3833,7 @@ protected:
             reservedEntries_.erase(result_pos);
             entry = result;
             currentReservedSize -= entry.capacity_;
+            allocatedEntries_.push_back(entry);
             return true;
         }
         return false;
@@ -3503,7 +3848,7 @@ protected:
             const BufferEntry& entry = reservedEntries_.back();
             CV_DbgAssert(currentReservedSize >= entry.capacity_);
             currentReservedSize -= entry.capacity_;
-            _releaseBufferEntry(entry);
+            derived()._releaseBufferEntry(entry);
             reservedEntries_.pop_back();
         }
     }
@@ -3523,72 +3868,45 @@ protected:
             return 1024*1024;
     }
 
-    void _allocateBufferEntry(BufferEntry& entry, size_t size)
-    {
-        CV_DbgAssert(entry.clBuffer_ == NULL);
-        entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
-        Context& ctx = Context::getDefault();
-        cl_int retval = CL_SUCCESS;
-        entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE, entry.capacity_, 0, &retval);
-        CV_Assert(retval == CL_SUCCESS);
-        CV_Assert(entry.clBuffer_ != NULL);
-        if(retval == CL_SUCCESS)
-        {
-            CV_IMPL_ADD(CV_IMPL_OCL);
-        }
-        LOG_BUFFER_POOL("OpenCL allocate %lld (0x%llx) bytes: %p\n",
-                (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
-    }
-
-    void _releaseBufferEntry(const BufferEntry& entry)
-    {
-        CV_Assert(entry.capacity_ != 0);
-        CV_Assert(entry.clBuffer_ != NULL);
-        LOG_BUFFER_POOL("OpenCL release buffer: %p, %lld (0x%llx) bytes\n",
-                entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
-        clReleaseMemObject(entry.clBuffer_);
-    }
 public:
-    OpenCLBufferPoolImpl()
-        : currentReservedSize(0), maxReservedSize(0)
+    OpenCLBufferPoolBaseImpl()
+        : currentReservedSize(0),
+          maxReservedSize(0)
     {
-        int poolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0;
-        maxReservedSize = getConfigurationParameterForSize("OPENCV_OPENCL_BUFFERPOOL_LIMIT", poolSize);
+        // nothing
     }
-    virtual ~OpenCLBufferPoolImpl()
+    virtual ~OpenCLBufferPoolBaseImpl()
     {
         freeAllReservedBuffers();
         CV_Assert(reservedEntries_.empty());
     }
 public:
-    virtual cl_mem allocate(size_t size, CV_OUT size_t& capacity)
+    virtual T allocate(size_t size)
     {
-        BufferEntry entry = {NULL, 0};
-        if (maxReservedSize > 0)
+        AutoLock locker(mutex_);
+        BufferEntry entry;
+        if (maxReservedSize > 0 && _findAndRemoveEntryFromReservedList(entry, size))
         {
-            AutoLock locker(mutex_);
-            if (_findAndRemoveEntryFromReservedList(entry, size))
-            {
-                CV_DbgAssert(size <= entry.capacity_);
-                LOG_BUFFER_POOL("Reuse reserved buffer: %p\n", entry.clBuffer_);
-                capacity = entry.capacity_;
-                return entry.clBuffer_;
-            }
+            CV_DbgAssert(size <= entry.capacity_);
+            LOG_BUFFER_POOL("Reuse reserved buffer: %p\n", entry.clBuffer_);
+        }
+        else
+        {
+            derived()._allocateBufferEntry(entry, size);
         }
-        _allocateBufferEntry(entry, size);
-        capacity = entry.capacity_;
         return entry.clBuffer_;
     }
-    virtual void release(cl_mem handle, size_t capacity)
+    virtual void release(T buffer)
     {
-        BufferEntry entry = {handle, capacity};
+        AutoLock locker(mutex_);
+        BufferEntry entry;
+        CV_Assert(_findAndRemoveEntryFromAllocatedList(entry, buffer));
         if (maxReservedSize == 0 || entry.capacity_ > maxReservedSize / 8)
         {
-            _releaseBufferEntry(entry);
+            derived()._releaseBufferEntry(entry);
         }
         else
         {
-            AutoLock locker(mutex_);
             reservedEntries_.push_front(entry);
             currentReservedSize += entry.capacity_;
             _checkSizeOfReservedEntries();
@@ -3604,7 +3922,7 @@ public:
         maxReservedSize = size;
         if (maxReservedSize < oldMaxReservedSize)
         {
-            std::list<BufferEntry>::iterator i = reservedEntries_.begin();
+            typename std::list<BufferEntry>::iterator i = reservedEntries_.begin();
             for (; i != reservedEntries_.end();)
             {
                 const BufferEntry& entry = *i;
@@ -3612,7 +3930,7 @@ public:
                 {
                     CV_DbgAssert(currentReservedSize >= entry.capacity_);
                     currentReservedSize -= entry.capacity_;
-                    _releaseBufferEntry(entry);
+                    derived()._releaseBufferEntry(entry);
                     i = reservedEntries_.erase(i);
                     continue;
                 }
@@ -3624,16 +3942,123 @@ public:
     virtual void freeAllReservedBuffers()
     {
         AutoLock locker(mutex_);
-        std::list<BufferEntry>::const_iterator i = reservedEntries_.begin();
+        typename std::list<BufferEntry>::const_iterator i = reservedEntries_.begin();
         for (; i != reservedEntries_.end(); ++i)
         {
             const BufferEntry& entry = *i;
-            _releaseBufferEntry(entry);
+            derived()._releaseBufferEntry(entry);
         }
         reservedEntries_.clear();
     }
 };
 
+struct CLBufferEntry
+{
+    cl_mem clBuffer_;
+    size_t capacity_;
+    CLBufferEntry() : clBuffer_((cl_mem)NULL), capacity_(0) { }
+};
+
+class OpenCLBufferPoolImpl : public OpenCLBufferPoolBaseImpl<OpenCLBufferPoolImpl, CLBufferEntry, cl_mem>
+{
+public:
+    typedef struct CLBufferEntry BufferEntry;
+protected:
+    int createFlags_;
+public:
+    OpenCLBufferPoolImpl(int createFlags = 0)
+        : createFlags_(createFlags)
+    {
+    }
+
+    void _allocateBufferEntry(BufferEntry& entry, size_t size)
+    {
+        CV_DbgAssert(entry.clBuffer_ == NULL);
+        entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
+        Context& ctx = Context::getDefault();
+        cl_int retval = CL_SUCCESS;
+        entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE|createFlags_, entry.capacity_, 0, &retval);
+        CV_Assert(retval == CL_SUCCESS);
+        CV_Assert(entry.clBuffer_ != NULL);
+        if(retval == CL_SUCCESS)
+        {
+            CV_IMPL_ADD(CV_IMPL_OCL);
+        }
+        LOG_BUFFER_POOL("OpenCL allocate %lld (0x%llx) bytes: %p\n",
+                (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
+        allocatedEntries_.push_back(entry);
+    }
+
+    void _releaseBufferEntry(const BufferEntry& entry)
+    {
+        CV_Assert(entry.capacity_ != 0);
+        CV_Assert(entry.clBuffer_ != NULL);
+        LOG_BUFFER_POOL("OpenCL release buffer: %p, %lld (0x%llx) bytes\n",
+                entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
+        clReleaseMemObject(entry.clBuffer_);
+    }
+};
+
+#ifdef HAVE_OPENCL_SVM
+struct CLSVMBufferEntry
+{
+    void* clBuffer_;
+    size_t capacity_;
+    CLSVMBufferEntry() : clBuffer_(NULL), capacity_(0) { }
+};
+class OpenCLSVMBufferPoolImpl : public OpenCLBufferPoolBaseImpl<OpenCLSVMBufferPoolImpl, CLSVMBufferEntry, void*>
+{
+public:
+    typedef struct CLSVMBufferEntry BufferEntry;
+public:
+    OpenCLSVMBufferPoolImpl()
+    {
+    }
+
+    void _allocateBufferEntry(BufferEntry& entry, size_t size)
+    {
+        CV_DbgAssert(entry.clBuffer_ == NULL);
+        entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
+
+        Context& ctx = Context::getDefault();
+        const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
+        bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
+        cl_svm_mem_flags memFlags = CL_MEM_READ_WRITE |
+                (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0);
+
+        const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
+        CV_DbgAssert(svmFns->isValid());
+
+        CV_OPENCL_SVM_TRACE_P("clSVMAlloc: %d\n", (int)entry.capacity_);
+        void *buf = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, entry.capacity_, 0);
+        CV_Assert(buf);
+
+        entry.clBuffer_ = buf;
+        {
+            CV_IMPL_ADD(CV_IMPL_OCL);
+        }
+        LOG_BUFFER_POOL("OpenCL SVM allocate %lld (0x%llx) bytes: %p\n",
+                (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
+        allocatedEntries_.push_back(entry);
+    }
+
+    void _releaseBufferEntry(const BufferEntry& entry)
+    {
+        CV_Assert(entry.capacity_ != 0);
+        CV_Assert(entry.clBuffer_ != NULL);
+        LOG_BUFFER_POOL("OpenCL release SVM buffer: %p, %lld (0x%llx) bytes\n",
+                entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
+        Context& ctx = Context::getDefault();
+        const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
+        CV_DbgAssert(svmFns->isValid());
+        CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n",  entry.clBuffer_);
+        svmFns->fn_clSVMFree((cl_context)ctx.ptr(), entry.clBuffer_);
+    }
+};
+#endif
+
+
+
 #if defined _MSC_VER
 #pragma warning(disable:4127) // conditional expression is constant
 #endif
@@ -3697,12 +4122,37 @@ private:
 class OpenCLAllocator : public MatAllocator
 {
     mutable OpenCLBufferPoolImpl bufferPool;
+    mutable OpenCLBufferPoolImpl bufferPoolHostPtr;
+#ifdef  HAVE_OPENCL_SVM
+    mutable OpenCLSVMBufferPoolImpl bufferPoolSVM;
+#endif
+
     enum AllocatorFlags
     {
-        ALLOCATOR_FLAGS_BUFFER_POOL_USED = 1 << 0
+        ALLOCATOR_FLAGS_BUFFER_POOL_USED = 1 << 0,
+        ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED = 1 << 1
+#ifdef HAVE_OPENCL_SVM
+        ,ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED = 1 << 2
+#endif
     };
 public:
-    OpenCLAllocator() { matStdAllocator = Mat::getStdAllocator(); }
+    OpenCLAllocator()
+        : bufferPool(0),
+          bufferPoolHostPtr(CL_MEM_ALLOC_HOST_PTR)
+    {
+        size_t defaultPoolSize, poolSize;
+        defaultPoolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0;
+        poolSize = getConfigurationParameterForSize("OPENCV_OPENCL_BUFFERPOOL_LIMIT", defaultPoolSize);
+        bufferPool.setMaxReservedSize(poolSize);
+        poolSize = getConfigurationParameterForSize("OPENCV_OPENCL_HOST_PTR_BUFFERPOOL_LIMIT", defaultPoolSize);
+        bufferPoolHostPtr.setMaxReservedSize(poolSize);
+#ifdef HAVE_OPENCL_SVM
+        poolSize = getConfigurationParameterForSize("OPENCV_OPENCL_SVM_BUFFERPOOL_LIMIT", defaultPoolSize);
+        bufferPoolSVM.setMaxReservedSize(poolSize);
+#endif
+
+        matStdAllocator = Mat::getStdAllocator();
+    }
 
     UMatData* defaultAllocate(int dims, const int* sizes, int type, void* data, size_t* step,
             int flags, UMatUsageFlags usageFlags) const
@@ -3739,33 +4189,47 @@ public:
         }
 
         Context& ctx = Context::getDefault();
+
         int createFlags = 0, flags0 = 0;
         getBestFlags(ctx, flags, usageFlags, createFlags, flags0);
 
-        size_t capacity = 0;
         void* handle = NULL;
         int allocatorFlags = 0;
+
+#ifdef HAVE_OPENCL_SVM
+        const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
+        if (ctx.useSVM() && svm::useSVM(usageFlags) && !svmCaps.isNoSVMSupport())
+        {
+            allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED;
+            handle = bufferPoolSVM.allocate(total);
+
+            // this property is constant, so single buffer pool can be used here
+            bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
+            allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER;
+        }
+        else
+#endif
         if (createFlags == 0)
         {
-            handle = bufferPool.allocate(total, capacity);
-            if (!handle)
-                return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
             allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_USED;
+            handle = bufferPool.allocate(total);
+        }
+        else if (createFlags == CL_MEM_ALLOC_HOST_PTR)
+        {
+            allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED;
+            handle = bufferPoolHostPtr.allocate(total);
         }
         else
         {
-            capacity = total;
-            cl_int retval = 0;
-            handle = clCreateBuffer((cl_context)ctx.ptr(),
-                                          CL_MEM_READ_WRITE|createFlags, total, 0, &retval);
-            if( !handle || retval != CL_SUCCESS )
-                return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
-            CV_IMPL_ADD(CV_IMPL_OCL)
+            CV_Assert(handle != NULL); // Unsupported, throw
         }
+
+        if (!handle)
+            return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
+
         UMatData* u = new UMatData(this);
         u->data = 0;
         u->size = total;
-        u->capacity = capacity;
         u->handle = handle;
         u->flags = flags0;
         u->allocatorFlags_ = allocatorFlags;
@@ -3788,22 +4252,81 @@ public:
             getBestFlags(ctx, accessFlags, usageFlags, createFlags, flags0);
 
             cl_context ctx_handle = (cl_context)ctx.ptr();
-            cl_int retval = 0;
-            int tempUMatFlags = UMatData::TEMP_UMAT;
-            u->handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|CL_MEM_READ_WRITE,
-                                       u->size, u->origdata, &retval);
-            if((!u->handle || retval != CL_SUCCESS) && !(accessFlags & ACCESS_FAST))
+            int allocatorFlags = 0;
+            int tempUMatFlags = 0;
+            void* handle = NULL;
+            cl_int retval = CL_SUCCESS;
+
+#ifdef HAVE_OPENCL_SVM
+            svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
+            bool useSVM = ctx.useSVM() && svm::useSVM(usageFlags);
+            if (useSVM && svmCaps.isSupportFineGrainSystem())
             {
-                u->handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags,
-                                           u->size, u->origdata, &retval);
-                tempUMatFlags = UMatData::TEMP_COPIED_UMAT;
+                allocatorFlags = svm::OPENCL_SVM_FINE_GRAIN_SYSTEM;
+                tempUMatFlags = UMatData::TEMP_UMAT;
+                handle = u->origdata;
+                CV_OPENCL_SVM_TRACE_P("Use fine grain system: %d (%p)\n", (int)u->size, handle);
+            }
+            else if (useSVM && (svmCaps.isSupportFineGrainBuffer() || svmCaps.isSupportCoarseGrainBuffer()))
+            {
+                if (!(accessFlags & ACCESS_FAST)) // memcpy used
+                {
+                    bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
 
+                    cl_svm_mem_flags memFlags = createFlags |
+                            (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0);
+
+                    const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
+                    CV_DbgAssert(svmFns->isValid());
+
+                    CV_OPENCL_SVM_TRACE_P("clSVMAlloc + copy: %d\n", (int)u->size);
+                    handle = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, u->size, 0);
+                    CV_Assert(handle);
+
+                    cl_command_queue q = NULL;
+                    if (!isFineGrainBuffer)
+                    {
+                        q = (cl_command_queue)Queue::getDefault().ptr();
+                        CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", handle, (int)u->size);
+                        cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE,
+                                handle, u->size,
+                                0, NULL, NULL);
+                        CV_Assert(status == CL_SUCCESS);
+
+                    }
+                    memcpy(handle, u->origdata, u->size);
+                    if (!isFineGrainBuffer)
+                    {
+                        CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", handle);
+                        cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, handle, 0, NULL, NULL);
+                        CV_Assert(status == CL_SUCCESS);
+                    }
+
+                    tempUMatFlags = UMatData::TEMP_UMAT | UMatData::TEMP_COPIED_UMAT;
+                    allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER
+                                                : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER;
+                }
+            }
+            else
+#endif
+            {
+                tempUMatFlags = UMatData::TEMP_UMAT;
+                handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags,
+                                           u->size, u->origdata, &retval);
+                if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST))
+                {
+                    handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags,
+                                               u->size, u->origdata, &retval);
+                    tempUMatFlags |= UMatData::TEMP_COPIED_UMAT;
+                }
             }
-            if(!u->handle || retval != CL_SUCCESS)
+            if(!handle || retval != CL_SUCCESS)
                 return false;
+            u->handle = handle;
             u->prevAllocator = u->currAllocator;
             u->currAllocator = this;
             u->flags |= tempUMatFlags;
+            u->allocatorFlags_ = allocatorFlags;
         }
         if(accessFlags & ACCESS_WRITE)
             u->markHostCopyObsolete(true);
@@ -3848,34 +4371,93 @@ public:
         CV_Assert(u->urefcount >= 0);
         CV_Assert(u->refcount >= 0);
 
-        // TODO: !!! when we add Shared Virtual Memory Support,
-        // this function (as well as the others) should be corrected
         CV_Assert(u->handle != 0 && u->urefcount == 0);
         if(u->tempUMat())
         {
 //            UMatDataAutoLock lock(u);
+
             if( u->hostCopyObsolete() && u->refcount > 0 )
             {
-                cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
-                if( u->tempCopiedUMat() )
+#ifdef HAVE_OPENCL_SVM
+                if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
                 {
-                    AlignedDataPtr<false, true> alignedPtr(u->origdata, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
-                    CV_OclDbgAssert(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
-                                        u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS);
+                    Context& ctx = Context::getDefault();
+                    const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
+                    CV_DbgAssert(svmFns->isValid());
+
+                    if( u->tempCopiedUMat() )
+                    {
+                        CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
+                                (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER);
+                        bool isFineGrainBuffer = (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER;
+                        cl_command_queue q = NULL;
+                        if (!isFineGrainBuffer)
+                        {
+                            CV_DbgAssert(((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0));
+                            q = (cl_command_queue)Queue::getDefault().ptr();
+                            CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
+                            cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ,
+                                    u->handle, u->size,
+                                    0, NULL, NULL);
+                            CV_Assert(status == CL_SUCCESS);
+                        }
+                        clFinish(q);
+                        memcpy(u->origdata, u->handle, u->size);
+                        if (!isFineGrainBuffer)
+                        {
+                            CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
+                            cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL);
+                            CV_Assert(status == CL_SUCCESS);
+                        }
+                    }
+                    else
+                    {
+                        CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM);
+                        // nothing
+                    }
                 }
                 else
+#endif
+                {
+                    cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
+                    if( u->tempCopiedUMat() )
+                    {
+                        AlignedDataPtr<false, true> alignedPtr(u->origdata, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
+                        CV_OclDbgAssert(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
+                                            u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS);
+                    }
+                    else
+                    {
+                        // TODO Is it really needed for clCreateBuffer with CL_MEM_USE_HOST_PTR?
+                        cl_int retval = 0;
+                        void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
+                                                        (CL_MAP_READ | CL_MAP_WRITE),
+                                                        0, u->size, 0, 0, 0, &retval);
+                        CV_OclDbgAssert(retval == CL_SUCCESS);
+                        CV_OclDbgAssert(clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0) == CL_SUCCESS);
+                        CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
+                    }
+                }
+                u->markHostCopyObsolete(false);
+            }
+#ifdef HAVE_OPENCL_SVM
+            if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
+            {
+                if( u->tempCopiedUMat() )
                 {
-                    cl_int retval = 0;
-                    void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
-                                                    (CL_MAP_READ | CL_MAP_WRITE),
-                                                    0, u->size, 0, 0, 0, &retval);
-                    CV_OclDbgAssert(retval == CL_SUCCESS);
-                    CV_OclDbgAssert(clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0) == CL_SUCCESS);
-                    CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
+                    Context& ctx = Context::getDefault();
+                    const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
+                    CV_DbgAssert(svmFns->isValid());
+
+                    CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", u->handle);
+                    svmFns->fn_clSVMFree((cl_context)ctx.ptr(), u->handle);
                 }
             }
-            u->markHostCopyObsolete(false);
-            clReleaseMemObject((cl_mem)u->handle);
+            else
+#endif
+            {
+                clReleaseMemObject((cl_mem)u->handle);
+            }
             u->handle = 0;
             u->currAllocator = u->prevAllocator;
             if(u->data && u->copyOnMap() && !(u->flags & UMatData::USER_ALLOCATED))
@@ -3894,14 +4476,42 @@ public:
             }
             if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_USED)
             {
-                bufferPool.release((cl_mem)u->handle, u->capacity);
+                bufferPool.release((cl_mem)u->handle);
+            }
+            else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED)
+            {
+                bufferPoolHostPtr.release((cl_mem)u->handle);
+            }
+#ifdef HAVE_OPENCL_SVM
+            else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED)
+            {
+                if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
+                {
+                    //nothing
+                }
+                else if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
+                        (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
+                {
+                    Context& ctx = Context::getDefault();
+                    const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
+                    CV_DbgAssert(svmFns->isValid());
+                    cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
+
+                    if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0)
+                    {
+                        CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
+                        cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL);
+                        CV_Assert(status == CL_SUCCESS);
+                    }
+                }
+                bufferPoolSVM.release((void*)u->handle);
             }
+#endif
             else
             {
                 clReleaseMemObject((cl_mem)u->handle);
             }
             u->handle = 0;
-            u->capacity = 0;
             delete u;
         }
     }
@@ -3925,13 +4535,41 @@ public:
         {
             if( !u->copyOnMap() )
             {
+                // TODO
+                // because there can be other map requests for the same UMat with different access flags,
+                // we use the universal (read-write) access mode.
+#ifdef HAVE_OPENCL_SVM
+                if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
+                {
+                    if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
+                    {
+                        Context& ctx = Context::getDefault();
+                        const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
+                        CV_DbgAssert(svmFns->isValid());
+
+                        if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0)
+                        {
+                            CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
+                            cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE,
+                                    u->handle, u->size,
+                                    0, NULL, NULL);
+                            CV_Assert(status == CL_SUCCESS);
+                            u->allocatorFlags_ |= svm::OPENCL_SVM_BUFFER_MAP;
+                        }
+                    }
+                    clFinish(q);
+                    u->data = (uchar*)u->handle;
+                    u->markHostCopyObsolete(false);
+                    u->markDeviceMemMapped(true);
+                    return;
+                }
+#endif
                 if (u->data) // FIXIT Workaround for UMat synchronization issue
                 {
                     //CV_Assert(u->hostCopyObsolete() == false);
                     return;
                 }
-                // because there can be other map requests for the same UMat with different access flags,
-                // we use the universal (read-write) access mode.
+
                 cl_int retval = 0;
                 u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
                                                      (CL_MAP_READ | CL_MAP_WRITE),
@@ -3943,6 +4581,7 @@ public:
                     return;
                 }
 
+                // TODO Is it really a good idea and was it tested well?
                 // if map failed, switch to copy-on-map mode for the particular buffer
                 u->flags |= UMatData::COPY_ON_MAP;
             }
@@ -3957,6 +4596,9 @@ public:
         if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() )
         {
             AlignedDataPtr<false, true> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
+#ifdef HAVE_OPENCL_SVM
+            CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
+#endif
             CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
                                            u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS );
             u->markHostCopyObsolete(false);
@@ -3983,6 +4625,31 @@ public:
         {
             CV_Assert(u->data != NULL);
             u->markDeviceMemMapped(false);
+#ifdef HAVE_OPENCL_SVM
+            if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
+            {
+                if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
+                {
+                    Context& ctx = Context::getDefault();
+                    const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
+                    CV_DbgAssert(svmFns->isValid());
+
+                    CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0);
+                    {
+                        CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
+                        cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
+                                0, NULL, NULL);
+                        CV_Assert(status == CL_SUCCESS);
+                        clFinish(q);
+                        u->allocatorFlags_ &= ~svm::OPENCL_SVM_BUFFER_MAP;
+                    }
+                }
+                u->data = 0;
+                u->markDeviceCopyObsolete(false);
+                u->markHostCopyObsolete(false);
+                return;
+            }
+#endif
             CV_Assert( (retval = clEnqueueUnmapMemObject(q,
                                 (cl_mem)u->handle, u->data, 0, 0, 0)) == CL_SUCCESS );
             if (Device::getDefault().isAMD())
@@ -3995,6 +4662,9 @@ public:
         else if( u->copyOnMap() && u->deviceCopyObsolete() )
         {
             AlignedDataPtr<true, false> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
+#ifdef HAVE_OPENCL_SVM
+            CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
+#endif
             CV_Assert( (retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
                                 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0)) == CL_SUCCESS );
         }
@@ -4102,17 +4772,78 @@ public:
                                             srcrawofs, new_srcofs, new_srcstep,
                                             dstrawofs, new_dstofs, new_dststep);
 
-        AlignedDataPtr<false, true> alignedPtr((uchar*)dstptr, sz[0] * dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
-        if( iscontinuous )
+#ifdef HAVE_OPENCL_SVM
+        if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
         {
-            CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
-                                           srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS );
+            CV_DbgAssert(u->data == NULL || u->data == u->handle);
+            Context& ctx = Context::getDefault();
+            const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
+            CV_DbgAssert(svmFns->isValid());
+
+            CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0);
+            if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
+            {
+                CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
+                cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ,
+                        u->handle, u->size,
+                        0, NULL, NULL);
+                CV_Assert(status == CL_SUCCESS);
+            }
+            clFinish(q);
+            if( iscontinuous )
+            {
+                memcpy(dstptr, (uchar*)u->handle + srcrawofs, total);
+            }
+            else
+            {
+                // This code is from MatAllocator::download()
+                int isz[CV_MAX_DIM];
+                uchar* srcptr = (uchar*)u->handle;
+                for( int i = 0; i < dims; i++ )
+                {
+                    CV_Assert( sz[i] <= (size_t)INT_MAX );
+                    if( sz[i] == 0 )
+                    return;
+                    if( srcofs )
+                    srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
+                    isz[i] = (int)sz[i];
+                }
+
+                Mat src(dims, isz, CV_8U, srcptr, srcstep);
+                Mat dst(dims, isz, CV_8U, dstptr, dststep);
+
+                const Mat* arrays[] = { &src, &dst };
+                uchar* ptrs[2];
+                NAryMatIterator it(arrays, ptrs, 2);
+                size_t j, planesz = it.size;
+
+                for( j = 0; j < it.nplanes; j++, ++it )
+                    memcpy(ptrs[1], ptrs[0], planesz);
+            }
+            if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
+            {
+                CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
+                cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
+                        0, NULL, NULL);
+                CV_Assert(status == CL_SUCCESS);
+                clFinish(q);
+            }
         }
         else
+#endif
         {
-            CV_Assert( clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE,
-                            new_srcofs, new_dstofs, new_sz, new_srcstep[0], new_srcstep[1],
-                            new_dststep[0], new_dststep[1], alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS );
+            AlignedDataPtr<false, true> alignedPtr((uchar*)dstptr, sz[0] * dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
+            if( iscontinuous )
+            {
+                CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
+                                               srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0) >= 0 );
+            }
+            else
+            {
+                CV_Assert( clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE,
+                                new_srcofs, new_dstofs, new_sz, new_srcstep[0], new_srcstep[1],
+                                new_dststep[0], new_dststep[1], alignedPtr.getAlignedPtr(), 0, 0, 0) >= 0 );
+            }
         }
     }
 
@@ -4153,20 +4884,91 @@ public:
         CV_Assert( u->handle != 0 );
         cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
 
-        AlignedDataPtr<true, false> alignedPtr((uchar*)srcptr, sz[0] * srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
-        if( iscontinuous )
+#ifdef HAVE_OPENCL_SVM
+        if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
         {
-            CV_Assert( clEnqueueWriteBuffer(q, (cl_mem)u->handle,
-                CL_TRUE, dstrawofs, total, srcptr, 0, 0, 0) == CL_SUCCESS );
+            CV_DbgAssert(u->data == NULL || u->data == u->handle);
+            Context& ctx = Context::getDefault();
+            const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
+            CV_DbgAssert(svmFns->isValid());
+
+            CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0);
+            if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
+            {
+                CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
+                cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_WRITE,
+                        u->handle, u->size,
+                        0, NULL, NULL);
+                CV_Assert(status == CL_SUCCESS);
+            }
+            clFinish(q);
+            if( iscontinuous )
+            {
+                memcpy((uchar*)u->handle + dstrawofs, srcptr, total);
+            }
+            else
+            {
+                // This code is from MatAllocator::upload()
+                int isz[CV_MAX_DIM];
+                uchar* dstptr = (uchar*)u->handle;
+                for( int i = 0; i < dims; i++ )
+                {
+                    CV_Assert( sz[i] <= (size_t)INT_MAX );
+                    if( sz[i] == 0 )
+                    return;
+                    if( dstofs )
+                    dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
+                    isz[i] = (int)sz[i];
+                }
+
+                Mat src(dims, isz, CV_8U, (void*)srcptr, srcstep);
+                Mat dst(dims, isz, CV_8U, dstptr, dststep);
+
+                const Mat* arrays[] = { &src, &dst };
+                uchar* ptrs[2];
+                NAryMatIterator it(arrays, ptrs, 2);
+                size_t j, planesz = it.size;
+
+                for( j = 0; j < it.nplanes; j++, ++it )
+                    memcpy(ptrs[1], ptrs[0], planesz);
+            }
+            if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
+            {
+                CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
+                cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
+                        0, NULL, NULL);
+                CV_Assert(status == CL_SUCCESS);
+                clFinish(q);
+            }
         }
         else
+#endif
         {
-            CV_Assert( clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE,
-                new_dstofs, new_srcofs, new_sz, new_dststep[0], new_dststep[1],
-                new_srcstep[0], new_srcstep[1], srcptr, 0, 0, 0) == CL_SUCCESS );
+            AlignedDataPtr<true, false> alignedPtr((uchar*)srcptr, sz[0] * srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
+            if( iscontinuous )
+            {
+                CV_Assert( clEnqueueWriteBuffer(q, (cl_mem)u->handle,
+                    CL_TRUE, dstrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0) >= 0 );
+            }
+            else
+            {
+                CV_Assert( clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE,
+                    new_dstofs, new_srcofs, new_sz, new_dststep[0], new_dststep[1],
+                    new_srcstep[0], new_srcstep[1], alignedPtr.getAlignedPtr(), 0, 0, 0) >= 0 );
+            }
         }
-
         u->markHostCopyObsolete(true);
+#ifdef HAVE_OPENCL_SVM
+        if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
+                (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
+        {
+            // nothing
+        }
+        else
+#endif
+        {
+            u->markHostCopyObsolete(true);
+        }
         u->markDeviceCopyObsolete(false);
     }
 
@@ -4198,7 +5000,17 @@ public:
         {
             download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
             dst->markHostCopyObsolete(false);
-            dst->markDeviceCopyObsolete(true);
+#ifdef HAVE_OPENCL_SVM
+            if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
+                    (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
+            {
+                // nothing
+            }
+            else
+#endif
+            {
+                dst->markDeviceCopyObsolete(true);
+            }
             return;
         }
 
@@ -4206,26 +5018,110 @@ public:
         CV_Assert(dst->refcount == 0);
         cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
 
-        cl_int retval;
-        if( iscontinuous )
+        cl_int retval = CL_SUCCESS;
+#ifdef HAVE_OPENCL_SVM
+        if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 ||
+                (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
         {
-            CV_Assert( (retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
-                                           srcrawofs, dstrawofs, total, 0, 0, 0)) == CL_SUCCESS );
+            if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 &&
+                            (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
+            {
+                Context& ctx = Context::getDefault();
+                const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
+                CV_DbgAssert(svmFns->isValid());
+
+                if( iscontinuous )
+                {
+                    CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMemcpy: %p <-- %p (%d)\n",
+                            (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs, (int)total);
+                    cl_int status = svmFns->fn_clEnqueueSVMMemcpy(q, CL_TRUE,
+                            (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs,
+                            total, 0, NULL, NULL);
+                    CV_Assert(status == CL_SUCCESS);
+                }
+                else
+                {
+                    clFinish(q);
+                    // This code is from MatAllocator::download()/upload()
+                    int isz[CV_MAX_DIM];
+                    uchar* srcptr = (uchar*)src->handle;
+                    for( int i = 0; i < dims; i++ )
+                    {
+                        CV_Assert( sz[i] <= (size_t)INT_MAX );
+                        if( sz[i] == 0 )
+                        return;
+                        if( srcofs )
+                        srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
+                        isz[i] = (int)sz[i];
+                    }
+                    Mat m_src(dims, isz, CV_8U, srcptr, srcstep);
+
+                    uchar* dstptr = (uchar*)dst->handle;
+                    for( int i = 0; i < dims; i++ )
+                    {
+                        if( dstofs )
+                        dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
+                    }
+                    Mat m_dst(dims, isz, CV_8U, dstptr, dststep);
+
+                    const Mat* arrays[] = { &m_src, &m_dst };
+                    uchar* ptrs[2];
+                    NAryMatIterator it(arrays, ptrs, 2);
+                    size_t j, planesz = it.size;
+
+                    for( j = 0; j < it.nplanes; j++, ++it )
+                        memcpy(ptrs[1], ptrs[0], planesz);
+                }
+            }
+            else
+            {
+                if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
+                {
+                    map(src, ACCESS_READ);
+                    upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
+                    unmap(src);
+                }
+                else
+                {
+                    map(dst, ACCESS_WRITE);
+                    download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
+                    unmap(dst);
+                }
+            }
         }
         else
+#endif
         {
-            CV_Assert( (retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
-                                               new_srcofs, new_dstofs, new_sz,
-                                               new_srcstep[0], new_srcstep[1],
-                                               new_dststep[0], new_dststep[1],
-                                               0, 0, 0)) == CL_SUCCESS );
+            if( iscontinuous )
+            {
+                CV_Assert( (retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
+                                               srcrawofs, dstrawofs, total, 0, 0, 0)) == CL_SUCCESS );
+            }
+            else
+            {
+                CV_Assert( (retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
+                                                   new_srcofs, new_dstofs, new_sz,
+                                                   new_srcstep[0], new_srcstep[1],
+                                                   new_dststep[0], new_dststep[1],
+                                                   0, 0, 0)) == CL_SUCCESS );
+            }
         }
-        if(retval == CL_SUCCESS)
+        if (retval == CL_SUCCESS)
         {
             CV_IMPL_ADD(CV_IMPL_OCL)
         }
 
-        dst->markHostCopyObsolete(true);
+#ifdef HAVE_OPENCL_SVM
+        if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
+                (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
+        {
+            // nothing
+        }
+        else
+#endif
+        {
+            dst->markHostCopyObsolete(true);
+        }
         dst->markDeviceCopyObsolete(false);
 
         if( _sync )
@@ -4234,7 +5130,23 @@ public:
         }
     }
 
-    BufferPoolController* getBufferPoolController() const { return &bufferPool; }
+    BufferPoolController* getBufferPoolController(const char* id) const {
+#ifdef HAVE_OPENCL_SVM
+        if ((svm::checkForceSVMUmatUsage() && (id == NULL || strcmp(id, "OCL") == 0)) || (id != NULL && strcmp(id, "SVM") == 0))
+        {
+            return &bufferPoolSVM;
+        }
+#endif
+        if (id != NULL && strcmp(id, "HOST_ALLOC") == 0)
+        {
+            return &bufferPoolHostPtr;
+        }
+        if (id != NULL && strcmp(id, "OCL") != 0)
+        {
+            CV_ErrorNoReturn(cv::Error::StsBadArg, "getBufferPoolController(): unknown BufferPool ID\n");
+        }
+        return &bufferPool;
+    }
 
     MatAllocator* matStdAllocator;
 };
@@ -4818,7 +5730,7 @@ void* Image2D::ptr() const
     return p ? p->handle : 0;
 }
 
-bool isPerformanceCheckBypassed()
+bool internal::isPerformanceCheckBypassed()
 {
     static bool initialized = false;
     static bool value = false;
@@ -4830,4 +5742,22 @@ bool isPerformanceCheckBypassed()
     return value;
 }
 
+bool internal::isCLBuffer(UMat& u)
+{
+    void* h = u.handle(ACCESS_RW);
+    if (!h)
+        return true;
+    CV_DbgAssert(u.u->currAllocator == getOpenCLAllocator());
+#if 1
+    if ((u.u->allocatorFlags_ & 0xffff0000) != 0) // OpenCL SVM flags are stored here
+        return false;
+#else
+    cl_mem_object_type type = 0;
+    cl_int ret = clGetMemObjectInfo((cl_mem)h, CL_MEM_TYPE, sizeof(type), &type, NULL);
+    if (ret != CL_SUCCESS || type != CL_MEM_OBJECT_BUFFER)
+        return false;
+#endif
+    return true;
+}
+
 }}
diff --git a/modules/core/src/opencl/runtime/opencl_core.cpp b/modules/core/src/opencl/runtime/opencl_core.cpp
index 93f6aae5de..5dd174709d 100644
--- a/modules/core/src/opencl/runtime/opencl_core.cpp
+++ b/modules/core/src/opencl/runtime/opencl_core.cpp
@@ -182,6 +182,65 @@ static void* opencl_check_fn(int ID);
 
 #define CUSTOM_FUNCTION_ID 1000
 
+#ifdef HAVE_OPENCL_SVM
+#include "opencv2/core/opencl/runtime/opencl_svm_20.hpp"
+#define SVM_FUNCTION_ID_START CUSTOM_FUNCTION_ID
+#define SVM_FUNCTION_ID_END CUSTOM_FUNCTION_ID + 100
+
+enum OPENCL_FN_SVM_ID
+{
+    OPENCL_FN_clSVMAlloc = SVM_FUNCTION_ID_START,
+    OPENCL_FN_clSVMFree,
+    OPENCL_FN_clSetKernelArgSVMPointer,
+    OPENCL_FN_clSetKernelExecInfo,
+    OPENCL_FN_clEnqueueSVMFree,
+    OPENCL_FN_clEnqueueSVMMemcpy,
+    OPENCL_FN_clEnqueueSVMMemFill,
+    OPENCL_FN_clEnqueueSVMMap,
+    OPENCL_FN_clEnqueueSVMUnmap,
+};
+
+void* (CL_API_CALL *clSVMAlloc)(cl_context context, cl_svm_mem_flags flags, size_t size, unsigned int alignment) =
+        opencl_fn4<OPENCL_FN_clSVMAlloc, void*, cl_context, cl_svm_mem_flags, size_t, unsigned int>::switch_fn;
+static const struct DynamicFnEntry _clSVMAlloc_definition = { "clSVMAlloc", (void**)&clSVMAlloc};
+void (CL_API_CALL *clSVMFree)(cl_context context, void* svm_pointer) =
+        opencl_fn2<OPENCL_FN_clSVMFree, void, cl_context, void*>::switch_fn;
+static const struct DynamicFnEntry _clSVMFree_definition = { "clSVMFree", (void**)&clSVMFree};
+cl_int (CL_API_CALL *clSetKernelArgSVMPointer)(cl_kernel kernel, cl_uint arg_index, const void* arg_value) =
+        opencl_fn3<OPENCL_FN_clSetKernelArgSVMPointer, cl_int, cl_kernel, cl_uint, const void*>::switch_fn;
+static const struct DynamicFnEntry _clSetKernelArgSVMPointer_definition = { "clSetKernelArgSVMPointer", (void**)&clSetKernelArgSVMPointer};
+//void* (CL_API_CALL *clSetKernelExecInfo)(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void* param_value) =
+//        opencl_fn4<OPENCL_FN_clSetKernelExecInfo, void*, cl_kernel, cl_kernel_exec_info, size_t, const void*>::switch_fn;
+//static const struct DynamicFnEntry _clSetKernelExecInfo_definition = { "clSetKernelExecInfo", (void**)&clSetKernelExecInfo};
+//cl_int (CL_API_CALL *clEnqueueSVMFree)(...) =
+//        opencl_fn8<OPENCL_FN_clEnqueueSVMFree, cl_int, ...>::switch_fn;
+//static const struct DynamicFnEntry _clEnqueueSVMFree_definition = { "clEnqueueSVMFree", (void**)&clEnqueueSVMFree};
+cl_int (CL_API_CALL *clEnqueueSVMMemcpy)(cl_command_queue command_queue, cl_bool blocking_copy, void* dst_ptr, const void* src_ptr, size_t size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) =
+        opencl_fn8<OPENCL_FN_clEnqueueSVMMemcpy, cl_int, cl_command_queue, cl_bool, void*, const void*, size_t, cl_uint, const cl_event*, cl_event*>::switch_fn;
+static const struct DynamicFnEntry _clEnqueueSVMMemcpy_definition = { "clEnqueueSVMMemcpy", (void**)&clEnqueueSVMMemcpy};
+cl_int (CL_API_CALL *clEnqueueSVMMemFill)(cl_command_queue command_queue, void* svm_ptr, const void* pattern, size_t pattern_size, size_t size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) =
+        opencl_fn8<OPENCL_FN_clEnqueueSVMMemFill, cl_int, cl_command_queue, void*, const void*, size_t, size_t, cl_uint, const cl_event*, cl_event*>::switch_fn;
+static const struct DynamicFnEntry _clEnqueueSVMMemFill_definition = { "clEnqueueSVMMemFill", (void**)&clEnqueueSVMMemFill};
+cl_int (CL_API_CALL *clEnqueueSVMMap)(cl_command_queue command_queue, cl_bool blocking_map, cl_map_flags map_flags, void* svm_ptr, size_t size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) =
+        opencl_fn8<OPENCL_FN_clEnqueueSVMMap, cl_int, cl_command_queue, cl_bool, cl_map_flags, void*, size_t, cl_uint, const cl_event*, cl_event*>::switch_fn;
+static const struct DynamicFnEntry _clEnqueueSVMMap_definition = { "clEnqueueSVMMap", (void**)&clEnqueueSVMMap};
+cl_int (CL_API_CALL *clEnqueueSVMUnmap)(cl_command_queue command_queue, void* svm_ptr, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) =
+        opencl_fn5<OPENCL_FN_clEnqueueSVMUnmap, cl_int, cl_command_queue, void*, cl_uint, const cl_event*, cl_event*>::switch_fn;
+static const struct DynamicFnEntry _clEnqueueSVMUnmap_definition = { "clEnqueueSVMUnmap", (void**)&clEnqueueSVMUnmap};
+
+static const struct DynamicFnEntry* opencl_svm_fn_list[] = {
+    &_clSVMAlloc_definition,
+    &_clSVMFree_definition,
+    &_clSetKernelArgSVMPointer_definition,
+    NULL/*&_clSetKernelExecInfo_definition*/,
+    NULL/*&_clEnqueueSVMFree_definition*/,
+    &_clEnqueueSVMMemcpy_definition,
+    &_clEnqueueSVMMemFill_definition,
+    &_clEnqueueSVMMap_definition,
+    &_clEnqueueSVMUnmap_definition,
+};
+#endif // HAVE_OPENCL_SVM
+
 //
 // END OF CUSTOM FUNCTIONS HERE
 //
@@ -194,6 +253,14 @@ static void* opencl_check_fn(int ID)
         assert(ID >= 0 && ID < (int)(sizeof(opencl_fn_list)/sizeof(opencl_fn_list[0])));
         e = opencl_fn_list[ID];
     }
+#ifdef HAVE_OPENCL_SVM
+    else if (ID >= SVM_FUNCTION_ID_START && ID < SVM_FUNCTION_ID_END)
+    {
+        ID = ID - SVM_FUNCTION_ID_START;
+        assert(ID >= 0 && ID < (int)(sizeof(opencl_svm_fn_list)/sizeof(opencl_svm_fn_list[0])));
+        e = opencl_svm_fn_list[ID];
+    }
+#endif
     else
     {
         CV_ErrorNoReturn(cv::Error::StsBadArg, "Invalid function ID");
diff --git a/modules/core/src/umatrix.cpp b/modules/core/src/umatrix.cpp
index ffc20777b5..1b42f1ee1e 100644
--- a/modules/core/src/umatrix.cpp
+++ b/modules/core/src/umatrix.cpp
@@ -55,7 +55,7 @@ UMatData::UMatData(const MatAllocator* allocator)
     prevAllocator = currAllocator = allocator;
     urefcount = refcount = 0;
     data = origdata = 0;
-    size = 0; capacity = 0;
+    size = 0;
     flags = 0;
     handle = 0;
     userdata = 0;
@@ -67,7 +67,7 @@ UMatData::~UMatData()
     prevAllocator = currAllocator = 0;
     urefcount = refcount = 0;
     data = origdata = 0;
-    size = 0; capacity = 0;
+    size = 0;
     flags = 0;
     handle = 0;
     userdata = 0;
@@ -221,7 +221,7 @@ UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const
         temp_u = a->allocate(dims, size.p, type(), data, step.p, accessFlags, usageFlags);
         temp_u->refcount = 1;
     }
-    UMat::getStdAllocator()->allocate(temp_u, accessFlags, usageFlags);
+    UMat::getStdAllocator()->allocate(temp_u, accessFlags, usageFlags); // TODO result is not checked
     hdr.flags = flags;
     setSize(hdr, dims, size.p, step.p);
     finalizeHdr(hdr);
@@ -575,7 +575,7 @@ Mat UMat::getMat(int accessFlags) const
 {
     if(!u)
         return Mat();
-    u->currAllocator->map(u, accessFlags | ACCESS_READ);
+    u->currAllocator->map(u, accessFlags | ACCESS_READ); // TODO Support ACCESS_WRITE without unnecessary data transfers
     CV_Assert(u->data != 0);
     Mat hdr(dims, size.p, type(), u->data + offset, step.p);
     hdr.flags = flags;