Merge remote-tracking branch 'upstream/3.4' into merge-3.4

7 years ago · 2da96be217
parent 43f821afb9 7fe0727930
commit 2da96be217
148 changed files with 2781 additions and 2046 deletions
--- a/apps/createsamples/utility.cpp
+++ b/apps/createsamples/utility.cpp
@ -1372,7 +1372,7 @@ int icvGetTraininDataFromVec( Mat& img, CvVecFile& userdata )

    size_t elements_read = fread( &tmp, sizeof( tmp ), 1, userdata.input );
    CV_Assert(elements_read == 1);
-    elements_read = fread( vector, sizeof( short ), userdata.vecsize, userdata.input );
+    elements_read = fread(vector.data(), sizeof(short), userdata.vecsize, userdata.input);
    CV_Assert(elements_read == (size_t)userdata.vecsize);

    if( feof( userdata.input ) || userdata.last++ >= userdata.count )
--- a/apps/traincascade/HOGfeatures.cpp
+++ b/apps/traincascade/HOGfeatures.cpp
@ -165,7 +165,7 @@ void CvHOGEvaluator::integralHistogram(const Mat &img, vector<Mat> &histogram, M
    Mat qangle(gradSize, CV_8U);

    AutoBuffer<int> mapbuf(gradSize.width + gradSize.height + 4);
-    int* xmap = (int*)mapbuf + 1;
+    int* xmap = mapbuf.data() + 1;
    int* ymap = xmap + gradSize.width + 2;

    const int borderType = (int)BORDER_REPLICATE;
@ -177,7 +177,7 @@ void CvHOGEvaluator::integralHistogram(const Mat &img, vector<Mat> &histogram, M

    int width = gradSize.width;
    AutoBuffer<float> _dbuf(width*4);
-    float* dbuf = _dbuf;
+    float* dbuf = _dbuf.data();
    Mat Dx(1, width, CV_32F, dbuf);
    Mat Dy(1, width, CV_32F, dbuf + width);
    Mat Mag(1, width, CV_32F, dbuf + width*2);
--- a/apps/traincascade/boost.cpp
+++ b/apps/traincascade/boost.cpp
@ -383,7 +383,7 @@ CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_id
            int ci = get_var_type(vi);
            CV_Assert( ci < 0 );

-            int *src_idx_buf = (int*)(uchar*)inn_buf;
+            int *src_idx_buf = (int*)inn_buf.data();
            float *src_val_buf = (float*)(src_idx_buf + sample_count);
            int* sample_indices_buf = (int*)(src_val_buf + sample_count);
            const int* src_idx = 0;
@ -423,7 +423,7 @@ CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_id
        }

        // subsample cv_lables
-        const int* src_lbls = get_cv_labels(data_root, (int*)(uchar*)inn_buf);
+        const int* src_lbls = get_cv_labels(data_root, (int*)inn_buf.data());
        if (is_buf_16u)
        {
            unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
@ -440,7 +440,7 @@ CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_id
        }

        // subsample sample_indices
-        const int* sample_idx_src = get_sample_indices(data_root, (int*)(uchar*)inn_buf);
+        const int* sample_idx_src = get_sample_indices(data_root, (int*)inn_buf.data());
        if (is_buf_16u)
        {
            unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
@ -815,7 +815,7 @@ struct FeatureIdxOnlyPrecalc : ParallelLoopBody
    void operator()( const Range& range ) const
    {
        cv::AutoBuffer<float> valCache(sample_count);
-        float* valCachePtr = (float*)valCache;
+        float* valCachePtr = valCache.data();
        for ( int fi = range.start; fi < range.end; fi++)
        {
            for( int si = 0; si < sample_count; si++ )
@ -1084,7 +1084,7 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
    CvMat* buf = data->buf;
    size_t length_buf_row = data->get_length_subbuf();
    cv::AutoBuffer<uchar> inn_buf(n*(3*sizeof(int)+sizeof(float)));
-    int* tempBuf = (int*)(uchar*)inn_buf;
+    int* tempBuf = (int*)inn_buf.data();
    bool splitInputData;

    complete_node_dir(node);
@ -1398,7 +1398,7 @@ void CvCascadeBoost::update_weights( CvBoostTree* tree )
    int inn_buf_size = ((params.boost_type == LOGIT) || (params.boost_type == GENTLE) ? n*sizeof(int) : 0) +
                       ( !tree ? n*sizeof(int) : 0 );
    cv::AutoBuffer<uchar> inn_buf(inn_buf_size);
-    uchar* cur_inn_buf_pos = (uchar*)inn_buf;
+    uchar* cur_inn_buf_pos = inn_buf.data();
    if ( (params.boost_type == LOGIT) || (params.boost_type == GENTLE) )
    {
        step = CV_IS_MAT_CONT(data->responses_copy->type) ?
--- a/apps/traincascade/old_ml_boost.cpp
+++ b/apps/traincascade/old_ml_boost.cpp
@ -168,7 +168,7 @@ CvBoostTree::try_split_node( CvDTreeNode* node )
        // store the responses for the corresponding training samples
        double* weak_eval = ensemble->get_weak_response()->data.db;
        cv::AutoBuffer<int> inn_buf(node->sample_count);
-        const int* labels = data->get_cv_labels( node, (int*)inn_buf );
+        const int* labels = data->get_cv_labels(node, inn_buf.data());
        int i, count = node->sample_count;
        double value = node->value;

@ -191,7 +191,7 @@ CvBoostTree::calc_node_dir( CvDTreeNode* node )
    if( data->get_var_type(vi) >= 0 ) // split on categorical var
    {
        cv::AutoBuffer<int> inn_buf(n);
-        const int* cat_labels = data->get_cat_var_data( node, vi, (int*)inn_buf );
+        const int* cat_labels = data->get_cat_var_data(node, vi, inn_buf.data());
        const int* subset = node->split->subset;
        double sum = 0, sum_abs = 0;

@ -210,7 +210,7 @@ CvBoostTree::calc_node_dir( CvDTreeNode* node )
    else // split on ordered var
    {
        cv::AutoBuffer<uchar> inn_buf(2*n*sizeof(int)+n*sizeof(float));
-        float* values_buf = (float*)(uchar*)inn_buf;
+        float* values_buf = (float*)inn_buf.data();
        int* sorted_indices_buf = (int*)(values_buf + n);
        int* sample_indices_buf = sorted_indices_buf + n;
        const float* values = 0;
@ -260,7 +260,7 @@ CvBoostTree::find_split_ord_class( CvDTreeNode* node, int vi, float init_quality
    cv::AutoBuffer<uchar> inn_buf;
    if( !_ext_buf )
        inn_buf.allocate(n*(3*sizeof(int)+sizeof(float)));
-    uchar* ext_buf = _ext_buf ? _ext_buf : (uchar*)inn_buf;
+    uchar* ext_buf = _ext_buf ? _ext_buf : inn_buf.data();
    float* values_buf = (float*)ext_buf;
    int* sorted_indices_buf = (int*)(values_buf + n);
    int* sample_indices_buf = sorted_indices_buf + n;
@ -369,7 +369,7 @@ CvBoostTree::find_split_cat_class( CvDTreeNode* node, int vi, float init_quality
    cv::AutoBuffer<uchar> inn_buf((2*mi+3)*sizeof(double) + mi*sizeof(double*));
    if( !_ext_buf)
        inn_buf.allocate( base_size + 2*n*sizeof(int) );
-    uchar* base_buf = (uchar*)inn_buf;
+    uchar* base_buf = inn_buf.data();
    uchar* ext_buf = _ext_buf ? _ext_buf : base_buf + base_size;

    int* cat_labels_buf = (int*)ext_buf;
@ -490,7 +490,7 @@ CvBoostTree::find_split_ord_reg( CvDTreeNode* node, int vi, float init_quality,
    cv::AutoBuffer<uchar> inn_buf;
    if( !_ext_buf )
        inn_buf.allocate(2*n*(sizeof(int)+sizeof(float)));
-    uchar* ext_buf = _ext_buf ? _ext_buf : (uchar*)inn_buf;
+    uchar* ext_buf = _ext_buf ? _ext_buf : inn_buf.data();

    float* values_buf = (float*)ext_buf;
    int* indices_buf = (int*)(values_buf + n);
@ -559,7 +559,7 @@ CvBoostTree::find_split_cat_reg( CvDTreeNode* node, int vi, float init_quality,
    cv::AutoBuffer<uchar> inn_buf(base_size);
    if( !_ext_buf )
        inn_buf.allocate(base_size + n*(2*sizeof(int) + sizeof(float)));
-    uchar* base_buf = (uchar*)inn_buf;
+    uchar* base_buf = inn_buf.data();
    uchar* ext_buf = _ext_buf ? _ext_buf : base_buf + base_size;

    int* cat_labels_buf = (int*)ext_buf;
@ -652,7 +652,7 @@ CvBoostTree::find_surrogate_split_ord( CvDTreeNode* node, int vi, uchar* _ext_bu
    cv::AutoBuffer<uchar> inn_buf;
    if( !_ext_buf )
        inn_buf.allocate(n*(2*sizeof(int)+sizeof(float)));
-    uchar* ext_buf = _ext_buf ? _ext_buf : (uchar*)inn_buf;
+    uchar* ext_buf = _ext_buf ? _ext_buf : inn_buf.data();
    float* values_buf = (float*)ext_buf;
    int* indices_buf = (int*)(values_buf + n);
    int* sample_indices_buf = indices_buf + n;
@ -733,7 +733,7 @@ CvBoostTree::find_surrogate_split_cat( CvDTreeNode* node, int vi, uchar* _ext_bu
    cv::AutoBuffer<uchar> inn_buf(base_size);
    if( !_ext_buf )
        inn_buf.allocate(base_size + n*sizeof(int));
-    uchar* ext_buf = _ext_buf ? _ext_buf : (uchar*)inn_buf;
+    uchar* ext_buf = _ext_buf ? _ext_buf : inn_buf.data();
    int* cat_labels_buf = (int*)ext_buf;
    const int* cat_labels = data->get_cat_var_data(node, vi, cat_labels_buf);

@ -797,7 +797,7 @@ CvBoostTree::calc_node_value( CvDTreeNode* node )
    int i, n = node->sample_count;
    const double* weights = ensemble->get_weights()->data.db;
    cv::AutoBuffer<uchar> inn_buf(n*(sizeof(int) + ( data->is_classifier ? sizeof(int) : sizeof(int) + sizeof(float))));
-    int* labels_buf = (int*)(uchar*)inn_buf;
+    int* labels_buf = (int*)inn_buf.data();
    const int* labels = data->get_cv_labels(node, labels_buf);
    double* subtree_weights = ensemble->get_subtree_weights()->data.db;
    double rcw[2] = {0,0};
@ -1147,7 +1147,7 @@ CvBoost::update_weights( CvBoostTree* tree )
            _buf_size += data->get_length_subbuf()*(sizeof(float)+sizeof(uchar));
    }
    inn_buf.allocate(_buf_size);
-    uchar* cur_buf_pos = (uchar*)inn_buf;
+    uchar* cur_buf_pos = inn_buf.data();

    if ( (params.boost_type == LOGIT) || (params.boost_type == GENTLE) )
    {
--- a/apps/traincascade/old_ml_tree.cpp
+++ b/apps/traincascade/old_ml_tree.cpp
@ -780,7 +780,7 @@ CvDTreeNode* CvDTreeTrainData::subsample_data( const CvMat* _subsample_idx )
            if( ci >= 0 || vi >= var_count )
            {
                int num_valid = 0;
-                const int* src = CvDTreeTrainData::get_cat_var_data( data_root, vi, (int*)(uchar*)inn_buf );
+                const int* src = CvDTreeTrainData::get_cat_var_data(data_root, vi, (int*)inn_buf.data());

                if (is_buf_16u)
                {
@ -810,7 +810,7 @@ CvDTreeNode* CvDTreeTrainData::subsample_data( const CvMat* _subsample_idx )
            }
            else
            {
-                int *src_idx_buf = (int*)(uchar*)inn_buf;
+                int *src_idx_buf = (int*)inn_buf.data();
                float *src_val_buf = (float*)(src_idx_buf + sample_count);
                int* sample_indices_buf = (int*)(src_val_buf + sample_count);
                const int* src_idx = 0;
@ -870,7 +870,7 @@ CvDTreeNode* CvDTreeTrainData::subsample_data( const CvMat* _subsample_idx )
            }
        }
        // sample indices subsampling
-        const int* sample_idx_src = get_sample_indices(data_root, (int*)(uchar*)inn_buf);
+        const int* sample_idx_src = get_sample_indices(data_root, (int*)inn_buf.data());
        if (is_buf_16u)
        {
            unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
@ -943,7 +943,7 @@ void CvDTreeTrainData::get_vectors( const CvMat* _subsample_idx,
        {
            float* dst = values + vi;
            uchar* m = missing ? missing + vi : 0;
-            const int* src = get_cat_var_data(data_root, vi, (int*)(uchar*)inn_buf);
+            const int* src = get_cat_var_data(data_root, vi, (int*)inn_buf.data());

            for( i = 0; i < count; i++, dst += var_count )
            {
@ -962,7 +962,7 @@ void CvDTreeTrainData::get_vectors( const CvMat* _subsample_idx,
            float* dst = values + vi;
            uchar* m = missing ? missing + vi : 0;
            int count1 = data_root->get_num_valid(vi);
-            float *src_val_buf = (float*)(uchar*)inn_buf;
+            float *src_val_buf = (float*)inn_buf.data();
            int* src_idx_buf = (int*)(src_val_buf + sample_count);
            int* sample_indices_buf = src_idx_buf + sample_count;
            const float *src_val = 0;
@ -999,7 +999,7 @@ void CvDTreeTrainData::get_vectors( const CvMat* _subsample_idx,
    {
        if( is_classifier )
        {
-            const int* src = get_class_labels(data_root, (int*)(uchar*)inn_buf);
+            const int* src = get_class_labels(data_root, (int*)inn_buf.data());
            for( i = 0; i < count; i++ )
            {
                int idx = sidx ? sidx[i] : i;
@ -1010,7 +1010,7 @@ void CvDTreeTrainData::get_vectors( const CvMat* _subsample_idx,
        }
        else
        {
-            float* val_buf = (float*)(uchar*)inn_buf;
+            float* val_buf = (float*)inn_buf.data();
            int* sample_idx_buf = (int*)(val_buf + sample_count);
            const float* _values = get_ord_responses(data_root, val_buf, sample_idx_buf);
            for( i = 0; i < count; i++ )
@ -1780,7 +1780,7 @@ double CvDTree::calc_node_dir( CvDTreeNode* node )
    if( data->get_var_type(vi) >= 0 ) // split on categorical var
    {
        cv::AutoBuffer<int> inn_buf(n*(!data->have_priors ? 1 : 2));
-        int* labels_buf = (int*)inn_buf;
+        int* labels_buf = inn_buf.data();
        const int* labels = data->get_cat_var_data( node, vi, labels_buf );
        const int* subset = node->split->subset;
        if( !data->have_priors )
@ -1824,7 +1824,7 @@ double CvDTree::calc_node_dir( CvDTreeNode* node )
        int split_point = node->split->ord.split_point;
        int n1 = node->get_num_valid(vi);
        cv::AutoBuffer<uchar> inn_buf(n*(sizeof(int)*(data->have_priors ? 3 : 2) + sizeof(float)));
-        float* val_buf = (float*)(uchar*)inn_buf;
+        float* val_buf = (float*)inn_buf.data();
        int* sorted_buf = (int*)(val_buf + n);
        int* sample_idx_buf = sorted_buf + n;
        const float* val = 0;
@ -1929,16 +1929,16 @@ void DTreeBestSplitFinder::operator()(const BlockedRange& range)
        if( data->is_classifier )
        {
            if( ci >= 0 )
-                res = tree->find_split_cat_class( node, vi, bestSplit->quality, split, (uchar*)inn_buf );
+                res = tree->find_split_cat_class( node, vi, bestSplit->quality, split, inn_buf.data() );
            else
-                res = tree->find_split_ord_class( node, vi, bestSplit->quality, split, (uchar*)inn_buf );
+                res = tree->find_split_ord_class( node, vi, bestSplit->quality, split, inn_buf.data() );
        }
        else
        {
            if( ci >= 0 )
-                res = tree->find_split_cat_reg( node, vi, bestSplit->quality, split, (uchar*)inn_buf );
+                res = tree->find_split_cat_reg( node, vi, bestSplit->quality, split, inn_buf.data() );
            else
-                res = tree->find_split_ord_reg( node, vi, bestSplit->quality, split, (uchar*)inn_buf );
+                res = tree->find_split_ord_reg( node, vi, bestSplit->quality, split, inn_buf.data() );
        }

        if( res && bestSplit->quality < split->quality )
@ -1982,7 +1982,7 @@ CvDTreeSplit* CvDTree::find_split_ord_class( CvDTreeNode* node, int vi,
    cv::AutoBuffer<uchar> inn_buf(base_size);
    if( !_ext_buf )
      inn_buf.allocate(base_size + n*(3*sizeof(int)+sizeof(float)));
-    uchar* base_buf = (uchar*)inn_buf;
+    uchar* base_buf = inn_buf.data();
    uchar* ext_buf = _ext_buf ? _ext_buf : base_buf + base_size;
    float* values_buf = (float*)ext_buf;
    int* sorted_indices_buf = (int*)(values_buf + n);
@ -2096,7 +2096,7 @@ void CvDTree::cluster_categories( const int* vectors, int n, int m,
    int iters = 0, max_iters = 100;
    int i, j, idx;
    cv::AutoBuffer<double> buf(n + k);
-    double *v_weights = buf, *c_weights = buf + n;
+    double *v_weights = buf.data(), *c_weights = buf.data() + n;
    bool modified = true;
    RNG* r = data->rng;

@ -2201,7 +2201,7 @@ CvDTreeSplit* CvDTree::find_split_cat_class( CvDTreeNode* node, int vi, float in
    cv::AutoBuffer<uchar> inn_buf(base_size);
    if( !_ext_buf )
        inn_buf.allocate(base_size + 2*n*sizeof(int));
-    uchar* base_buf = (uchar*)inn_buf;
+    uchar* base_buf = inn_buf.data();
    uchar* ext_buf = _ext_buf ? _ext_buf : base_buf + base_size;

    int* lc = (int*)base_buf;
@ -2383,7 +2383,7 @@ CvDTreeSplit* CvDTree::find_split_ord_reg( CvDTreeNode* node, int vi, float init
    cv::AutoBuffer<uchar> inn_buf;
    if( !_ext_buf )
        inn_buf.allocate(2*n*(sizeof(int) + sizeof(float)));
-    uchar* ext_buf = _ext_buf ? _ext_buf : (uchar*)inn_buf;
+    uchar* ext_buf = _ext_buf ? _ext_buf : inn_buf.data();
    float* values_buf = (float*)ext_buf;
    int* sorted_indices_buf = (int*)(values_buf + n);
    int* sample_indices_buf = sorted_indices_buf + n;
@ -2443,7 +2443,7 @@ CvDTreeSplit* CvDTree::find_split_cat_reg( CvDTreeNode* node, int vi, float init
    cv::AutoBuffer<uchar> inn_buf(base_size);
    if( !_ext_buf )
        inn_buf.allocate(base_size + n*(2*sizeof(int) + sizeof(float)));
-    uchar* base_buf = (uchar*)inn_buf;
+    uchar* base_buf = inn_buf.data();
    uchar* ext_buf = _ext_buf ? _ext_buf : base_buf + base_size;
    int* labels_buf = (int*)ext_buf;
    const int* labels = data->get_cat_var_data(node, vi, labels_buf);
@ -2534,7 +2534,7 @@ CvDTreeSplit* CvDTree::find_surrogate_split_ord( CvDTreeNode* node, int vi, ucha
    cv::AutoBuffer<uchar> inn_buf;
    if( !_ext_buf )
        inn_buf.allocate( n*(sizeof(int)*(data->have_priors ? 3 : 2) + sizeof(float)) );
-    uchar* ext_buf = _ext_buf ? _ext_buf : (uchar*)inn_buf;
+    uchar* ext_buf = _ext_buf ? _ext_buf : inn_buf.data();
    float* values_buf = (float*)ext_buf;
    int* sorted_indices_buf = (int*)(values_buf + n);
    int* sample_indices_buf = sorted_indices_buf + n;
@ -2658,7 +2658,7 @@ CvDTreeSplit* CvDTree::find_surrogate_split_cat( CvDTreeNode* node, int vi, ucha
    cv::AutoBuffer<uchar> inn_buf(base_size);
    if( !_ext_buf )
        inn_buf.allocate(base_size + n*(sizeof(int) + (data->have_priors ? sizeof(int) : 0)));
-    uchar* base_buf = (uchar*)inn_buf;
+    uchar* base_buf = inn_buf.data();
    uchar* ext_buf = _ext_buf ? _ext_buf : base_buf + base_size;

    int* labels_buf = (int*)ext_buf;
@ -2758,7 +2758,7 @@ void CvDTree::calc_node_value( CvDTreeNode* node )
    int base_size = data->is_classifier ? m*cv_n*sizeof(int) : 2*cv_n*sizeof(double)+cv_n*sizeof(int);
    int ext_size = n*(sizeof(int) + (data->is_classifier ? sizeof(int) : sizeof(int)+sizeof(float)));
    cv::AutoBuffer<uchar> inn_buf(base_size + ext_size);
-    uchar* base_buf = (uchar*)inn_buf;
+    uchar* base_buf = inn_buf.data();
    uchar* ext_buf = base_buf + base_size;

    int* cv_labels_buf = (int*)ext_buf;
@ -2961,7 +2961,7 @@ void CvDTree::complete_node_dir( CvDTreeNode* node )

            if( data->get_var_type(vi) >= 0 ) // split on categorical var
            {
-                int* labels_buf = (int*)(uchar*)inn_buf;
+                int* labels_buf = (int*)inn_buf.data();
                const int* labels = data->get_cat_var_data(node, vi, labels_buf);
                const int* subset = split->subset;

@ -2980,7 +2980,7 @@ void CvDTree::complete_node_dir( CvDTreeNode* node )
            }
            else // split on ordered var
            {
-                float* values_buf = (float*)(uchar*)inn_buf;
+                float* values_buf = (float*)inn_buf.data();
                int* sorted_indices_buf = (int*)(values_buf + n);
                int* sample_indices_buf = sorted_indices_buf + n;
                const float* values = 0;
@ -3042,7 +3042,7 @@ void CvDTree::split_node_data( CvDTreeNode* node )
    CvMat* buf = data->buf;
    size_t length_buf_row = data->get_length_subbuf();
    cv::AutoBuffer<uchar> inn_buf(n*(3*sizeof(int) + sizeof(float)));
-    int* temp_buf = (int*)(uchar*)inn_buf;
+    int* temp_buf = (int*)inn_buf.data();

    complete_node_dir(node);

--- a/doc/tutorials/dnn/dnn_android/dnn_android.markdown
+++ b/doc/tutorials/dnn/dnn_android/dnn_android.markdown
@ -12,7 +12,7 @@ Tutorial was written for the following versions of corresponding software:

 - Download and install Android Studio from https://developer.android.com/studio.

- Get the latest pre-built OpenCV for Android release from https://github.com/opencv/opencv/releases and unpack it (for example, `opencv-3.4.1-android-sdk.zip`).
+- Get the latest pre-built OpenCV for Android release from https://github.com/opencv/opencv/releases and unpack it (for example, `opencv-3.4.2-android-sdk.zip`).

 - Download MobileNet object detection model from https://github.com/chuanqi305/MobileNet-SSD. We need a configuration file `MobileNetSSD_deploy.prototxt` and weights `MobileNetSSD_deploy.caffemodel`.

--- a/doc/tutorials/features2d/akaze_matching/akaze_matching.markdown
+++ b/doc/tutorials/features2d/akaze_matching/akaze_matching.markdown
@ -7,8 +7,7 @@ Introduction
 In this tutorial we will learn how to use AKAZE @cite ANB13 local features to detect and match keypoints on
 two images.
 We will find keypoints on a pair of images with given homography matrix, match them and count the
-
-number of inliers (i. e. matches that fit in the given homography).
+number of inliers (i.e. matches that fit in the given homography).

 You can find expanded version of this example here:
 <https://github.com/pablofdezalc/test_kaze_akaze_opencv>
@ -16,7 +15,7 @@ You can find expanded version of this example here:
 Data
 ----

-We are going to use images 1 and 3 from *Graffity* sequence of Oxford dataset.
+We are going to use images 1 and 3 from *Graffiti* sequence of [Oxford dataset](http://www.robots.ox.ac.uk/~vgg/data/data-aff.html).

 ![](images/graf.png)

@ -27,107 +26,148 @@ Homography is given by a 3 by 3 matrix:
 3.4663091e-04  -1.4364524e-05   1.0000000e+00
@endcode
 You can find the images (*graf1.png*, *graf3.png*) and homography (*H1to3p.xml*) in
-*opencv/samples/cpp*.
+*opencv/samples/data/*.

 ### Source Code

-@include cpp/tutorial_code/features2D/AKAZE_match.cpp
+@add_toggle_cpp
+-   **Downloadable code**: Click
+    [here](https://raw.githubusercontent.com/opencv/opencv/master/samples/cpp/tutorial_code/features2D/AKAZE_match.cpp)
+
+-   **Code at glance:**
+    @include samples/cpp/tutorial_code/features2D/AKAZE_match.cpp
+@end_toggle
+
+@add_toggle_java
+-   **Downloadable code**: Click
+    [here](https://raw.githubusercontent.com/opencv/opencv/master/samples/java/tutorial_code/features2D/akaze_matching/AKAZEMatchDemo.java)
+
+-   **Code at glance:**
+    @include samples/java/tutorial_code/features2D/akaze_matching/AKAZEMatchDemo.java
+@end_toggle
+
+@add_toggle_python
+-   **Downloadable code**: Click
+    [here](https://raw.githubusercontent.com/opencv/opencv/master/samples/python/tutorial_code/features2D/akaze_matching/AKAZE_match.py)
+
+-   **Code at glance:**
+    @include samples/python/tutorial_code/features2D/akaze_matching/AKAZE_match.py
+@end_toggle

 ### Explanation

-#  **Load images and homography**
-    @code{.cpp}
-    Mat img1 = imread("graf1.png", IMREAD_GRAYSCALE);
-    Mat img2 = imread("graf3.png", IMREAD_GRAYSCALE);
-
-    Mat homography;
-    FileStorage fs("H1to3p.xml", FileStorage::READ);
-    fs.getFirstTopLevelNode() >> homography;
-    @endcode
-    We are loading grayscale images here. Homography is stored in the xml created with FileStorage.
-
-#  **Detect keypoints and compute descriptors using AKAZE**
-    @code{.cpp}
-    vector<KeyPoint> kpts1, kpts2;
-    Mat desc1, desc2;
-
-    AKAZE akaze;
-    akaze(img1, noArray(), kpts1, desc1);
-    akaze(img2, noArray(), kpts2, desc2);
-    @endcode
-    We create AKAZE object and use it's *operator()* functionality. Since we don't need the *mask*
-    parameter, *noArray()* is used.
-
-#  **Use brute-force matcher to find 2-nn matches**
-    @code{.cpp}
-    BFMatcher matcher(NORM_HAMMING);
-    vector< vector<DMatch> > nn_matches;
-    matcher.knnMatch(desc1, desc2, nn_matches, 2);
-    @endcode
-    We use Hamming distance, because AKAZE uses binary descriptor by default.
-
-#  **Use 2-nn matches to find correct keypoint matches**
-    @code{.cpp}
-    for(size_t i = 0; i < nn_matches.size(); i++) {
-        DMatch first = nn_matches[i][0];
-        float dist1 = nn_matches[i][0].distance;
-        float dist2 = nn_matches[i][1].distance;
-
-        if(dist1 < nn_match_ratio * dist2) {
-            matched1.push_back(kpts1[first.queryIdx]);
-            matched2.push_back(kpts2[first.trainIdx]);
-        }
-    }
-    @endcode
-    If the closest match is *ratio* closer than the second closest one, then the match is correct.
-
-#  **Check if our matches fit in the homography model**
-    @code{.cpp}
-    for(int i = 0; i < matched1.size(); i++) {
-        Mat col = Mat::ones(3, 1, CV_64F);
-        col.at<double>(0) = matched1[i].pt.x;
-        col.at<double>(1) = matched1[i].pt.y;
-
-        col = homography * col;
-        col /= col.at<double>(2);
-        float dist = sqrt( pow(col.at<double>(0) - matched2[i].pt.x, 2) +
-                           pow(col.at<double>(1) - matched2[i].pt.y, 2));
-
-        if(dist < inlier_threshold) {
-            int new_i = inliers1.size();
-            inliers1.push_back(matched1[i]);
-            inliers2.push_back(matched2[i]);
-            good_matches.push_back(DMatch(new_i, new_i, 0));
-        }
-    }
-    @endcode
-    If the distance from first keypoint's projection to the second keypoint is less than threshold,
-    then it it fits in the homography.
-
-    We create a new set of matches for the inliers, because it is required by the drawing function.
-
-#  **Output results**
-    @code{.cpp}
-    Mat res;
-    drawMatches(img1, inliers1, img2, inliers2, good_matches, res);
-    imwrite("res.png", res);
-    ...
-    @endcode
-    Here we save the resulting image and print some statistics.
-
-### Results
-
-Found matches
-------------
+-   **Load images and homography**
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/features2D/AKAZE_match.cpp load
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/features2D/akaze_matching/AKAZEMatchDemo.java load
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/features2D/akaze_matching/AKAZE_match.py load
+@end_toggle
+
+We are loading grayscale images here. Homography is stored in the xml created with FileStorage.
+
+-   **Detect keypoints and compute descriptors using AKAZE**
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/features2D/AKAZE_match.cpp AKAZE
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/features2D/akaze_matching/AKAZEMatchDemo.java AKAZE
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/features2D/akaze_matching/AKAZE_match.py AKAZE
+@end_toggle
+
+We create AKAZE and detect and compute AKAZE keypoints and descriptors. Since we don't need the *mask*
+parameter, *noArray()* is used.
+
+-   **Use brute-force matcher to find 2-nn matches**
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/features2D/AKAZE_match.cpp 2-nn matching
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/features2D/akaze_matching/AKAZEMatchDemo.java 2-nn matching
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/features2D/akaze_matching/AKAZE_match.py 2-nn matching
+@end_toggle
+
+We use Hamming distance, because AKAZE uses binary descriptor by default.
+
+-   **Use 2-nn matches and ratio criterion to find correct keypoint matches**
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/features2D/AKAZE_match.cpp ratio test filtering
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/features2D/akaze_matching/AKAZEMatchDemo.java ratio test filtering
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/features2D/akaze_matching/AKAZE_match.py ratio test filtering
+@end_toggle
+
+If the closest match distance is significantly lower than the second closest one, then the match is correct (match is not ambiguous).
+
+-   **Check if our matches fit in the homography model**
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/features2D/AKAZE_match.cpp homography check
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/features2D/akaze_matching/AKAZEMatchDemo.java homography check
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/features2D/akaze_matching/AKAZE_match.py homography check
+@end_toggle
+
+If the distance from first keypoint's projection to the second keypoint is less than threshold,
+then it fits the homography model.
+
+We create a new set of matches for the inliers, because it is required by the drawing function.
+
+-   **Output results**
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/features2D/AKAZE_match.cpp draw final matches
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/features2D/akaze_matching/AKAZEMatchDemo.java draw final matches
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/features2D/akaze_matching/AKAZE_match.py draw final matches
+@end_toggle
+
+Here we save the resulting image and print some statistics.
+
+Results
+-------
+
+### Found matches

 ![](images/res.png)

-A-KAZE Matching Results
-----------------------
+Depending on your OpenCV version, you should get results coherent with:
+
@code{.none}
 Keypoints 1:   2943
 Keypoints 2:   3511
 Matches:       447
 Inliers:       308
- Inlier Ratio: 0.689038}
+ Inlier Ratio: 0.689038
@endcode
--- a/doc/tutorials/features2d/table_of_content_features2d.markdown
+++ b/doc/tutorials/features2d/table_of_content_features2d.markdown
@ -98,6 +98,8 @@ OpenCV.

 -   @subpage tutorial_akaze_matching

+    *Languages:* C++, Java, Python
+
    *Compatibility:* \> OpenCV 3.0

    *Author:* Fedor Morozov
--- a/doc/tutorials/introduction/cross_referencing/tutorial_cross_referencing.markdown
+++ b/doc/tutorials/introduction/cross_referencing/tutorial_cross_referencing.markdown
@ -36,14 +36,14 @@ Open your Doxyfile using your favorite text editor and search for the key
 `TAGFILES`. Change it as follows:

@code
-TAGFILES = ./docs/doxygen-tags/opencv.tag=http://docs.opencv.org/3.4.1
+TAGFILES = ./docs/doxygen-tags/opencv.tag=http://docs.opencv.org/3.4.2
@endcode

 If you had other definitions already, you can append the line using a `\`:

@code
 TAGFILES = ./docs/doxygen-tags/libstdc++.tag=https://gcc.gnu.org/onlinedocs/libstdc++/latest-doxygen \
-           ./docs/doxygen-tags/opencv.tag=http://docs.opencv.org/3.4.1
+           ./docs/doxygen-tags/opencv.tag=http://docs.opencv.org/3.4.2
@endcode

 Doxygen can now use the information from the tag file to link to the OpenCV
--- a/modules/calib3d/src/calib3d_c_api.cpp
+++ b/modules/calib3d/src/calib3d_c_api.cpp
@ -0,0 +1,47 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// This file contains wrappers for legacy OpenCV C API
+
+#include "precomp.hpp"
+#include "opencv2/calib3d/calib3d_c.h"
+
+using namespace cv;
+
+CV_IMPL void
+cvDrawChessboardCorners(CvArr* _image, CvSize pattern_size,
+                        CvPoint2D32f* corners, int count, int found)
+{
+    CV_Assert(corners != NULL); //CV_CheckNULL(corners, "NULL is not allowed for 'corners' parameter");
+    Mat image = cvarrToMat(_image);
+    CV_StaticAssert(sizeof(CvPoint2D32f) == sizeof(Point2f), "");
+    drawChessboardCorners(image, pattern_size, Mat(1, count, traits::Type<Point2f>::value, corners), found != 0);
+}
+
+CV_IMPL int
+cvFindChessboardCorners(const void* arr, CvSize pattern_size,
+                        CvPoint2D32f* out_corners_, int* out_corner_count,
+                        int flags)
+{
+    if (!out_corners_)
+        CV_Error( CV_StsNullPtr, "Null pointer to corners" );
+
+    Mat image = cvarrToMat(arr);
+    std::vector<Point2f> out_corners;
+
+    if (out_corner_count)
+        *out_corner_count = 0;
+
+    bool res = cv::findChessboardCorners(image, pattern_size, out_corners, flags);
+
+    int corner_count = (int)out_corners.size();
+    if (out_corner_count)
+        *out_corner_count = corner_count;
+    CV_CheckLE(corner_count, Size(pattern_size).area(), "Unexpected number of corners");
+    for (int i = 0; i < corner_count; ++i)
+    {
+        out_corners_[i] = cvPoint2D32f(out_corners[i]);
+    }
+    return res ? 1 : 0;
+}
--- a/modules/calib3d/src/calibinit.cpp
+++ b/modules/calib3d/src/calibinit.cpp
--- a/modules/calib3d/src/precomp.hpp
+++ b/modules/calib3d/src/precomp.hpp
@ -42,12 +42,14 @@
 #ifndef __OPENCV_PRECOMP_H__
 #define __OPENCV_PRECOMP_H__

+#include "opencv2/core/utility.hpp"
+
+#include "opencv2/core/private.hpp"
+
 #include "opencv2/calib3d.hpp"
 #include "opencv2/imgproc.hpp"
 #include "opencv2/features2d.hpp"
-#include "opencv2/core/utility.hpp"

-#include "opencv2/core/private.hpp"

 #include "opencv2/core/ocl.hpp"

--- a/modules/calib3d/src/ptsetreg.cpp
+++ b/modules/calib3d/src/ptsetreg.cpp
@ -104,7 +104,7 @@ public:
                    int maxAttempts=1000 ) const
    {
        cv::AutoBuffer<int> _idx(modelPoints);
-        int* idx = _idx;
+        int* idx = _idx.data();
        int i = 0, j, k, iters = 0;
        int d1 = m1.channels() > 1 ? m1.channels() : m1.cols;
        int d2 = m2.channels() > 1 ? m2.channels() : m2.cols;
--- a/modules/calib3d/src/stereosgbm.cpp
+++ b/modules/calib3d/src/stereosgbm.cpp
@ -2451,7 +2451,7 @@ void cv::validateDisparity( InputOutputArray _disp, InputArray _cost, int minDis
    int minD = minDisparity, maxD = minDisparity + numberOfDisparities;
    int x, minX1 = std::max(maxD, 0), maxX1 = cols + std::min(minD, 0);
    AutoBuffer<int> _disp2buf(cols*2);
-    int* disp2buf = _disp2buf;
+    int* disp2buf = _disp2buf.data();
    int* disp2cost = disp2buf + cols;
    const int DISP_SHIFT = 4, DISP_SCALE = 1 << DISP_SHIFT;
    int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE;
--- a/modules/calib3d/test/test_cameracalibration.cpp
+++ b/modules/calib3d/test/test_cameracalibration.cpp
@ -1618,7 +1618,8 @@ void CV_StereoCalibrationTest::run( int )
            bool found2 = findChessboardCorners(right, patternSize, imgpt2[i]);
            if(!found1 || !found2)
            {
-                ts->printf( cvtest::TS::LOG, "The function could not detect boards on the images %s and %s, testcase %d\n",
+                ts->printf( cvtest::TS::LOG, "The function could not detect boards (%d x %d) on the images %s and %s, testcase %d\n",
+                    patternSize.width, patternSize.height,
                    imglist[i*2].c_str(), imglist[i*2+1].c_str(), testcase );
                ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
                return;
--- a/modules/core/include/opencv2/core/check.hpp
+++ b/modules/core/include/opencv2/core/check.hpp
@ -66,6 +66,7 @@ struct CheckContext {
            { CV__CHECK_FUNCTION, CV__CHECK_FILENAME, __LINE__, testOp, message, p1_str, p2_str }

 CV_EXPORTS void CV_NORETURN check_failed_auto(const int v1, const int v2, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_auto(const size_t v1, const size_t v2, const CheckContext& ctx);
 CV_EXPORTS void CV_NORETURN check_failed_auto(const float v1, const float v2, const CheckContext& ctx);
 CV_EXPORTS void CV_NORETURN check_failed_auto(const double v1, const double v2, const CheckContext& ctx);
 CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v1, const int v2, const CheckContext& ctx);
@ -73,6 +74,7 @@ CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v1, const int v2, con
 CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v1, const int v2, const CheckContext& ctx);

 CV_EXPORTS void CV_NORETURN check_failed_auto(const int v, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_auto(const size_t v, const CheckContext& ctx);
 CV_EXPORTS void CV_NORETURN check_failed_auto(const float v, const CheckContext& ctx);
 CV_EXPORTS void CV_NORETURN check_failed_auto(const double v, const CheckContext& ctx);
 CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v, const CheckContext& ctx);
@ -120,15 +122,35 @@ CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v, const CheckCon

 #define CV_CheckChannelsEQ(c1, c2, msg)  CV__CHECK(_, EQ, MatChannels, c1, c2, #c1, #c2, msg)

-
 /// Example: type == CV_8UC1 || type == CV_8UC3
 #define CV_CheckType(t, test_expr, msg)  CV__CHECK_CUSTOM_TEST(_, MatType, t, (test_expr), #t, #test_expr, msg)

 /// Example: depth == CV_32F || depth == CV_64F
 #define CV_CheckDepth(t, test_expr, msg)  CV__CHECK_CUSTOM_TEST(_, MatDepth, t, (test_expr), #t, #test_expr, msg)

+/// Example: v == A || v == B
+#define CV_Check(v, test_expr, msg)  CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg)
+
 /// Some complex conditions: CV_Check(src2, src2.empty() || (src2.type() == src1.type() && src2.size() == src1.size()), "src2 should have same size/type as src1")
-// TODO define pretty-printers: #define CV_Check(v, test_expr, msg)  CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg)
+// TODO define pretty-printers
+
+#ifndef NDEBUG
+#define CV_DbgCheck(v, test_expr, msg)  CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg)
+#define CV_DbgCheckEQ(v1, v2, msg)  CV__CHECK(_, EQ, auto, v1, v2, #v1, #v2, msg)
+#define CV_DbgCheckNE(v1, v2, msg)  CV__CHECK(_, NE, auto, v1, v2, #v1, #v2, msg)
+#define CV_DbgCheckLE(v1, v2, msg)  CV__CHECK(_, LE, auto, v1, v2, #v1, #v2, msg)
+#define CV_DbgCheckLT(v1, v2, msg)  CV__CHECK(_, LT, auto, v1, v2, #v1, #v2, msg)
+#define CV_DbgCheckGE(v1, v2, msg)  CV__CHECK(_, GE, auto, v1, v2, #v1, #v2, msg)
+#define CV_DbgCheckGT(v1, v2, msg)  CV__CHECK(_, GT, auto, v1, v2, #v1, #v2, msg)
+#else
+#define CV_DbgCheck(v, test_expr, msg)  do { } while (0)
+#define CV_DbgCheckEQ(v1, v2, msg)  do { } while (0)
+#define CV_DbgCheckNE(v1, v2, msg)  do { } while (0)
+#define CV_DbgCheckLE(v1, v2, msg)  do { } while (0)
+#define CV_DbgCheckLT(v1, v2, msg)  do { } while (0)
+#define CV_DbgCheckGE(v1, v2, msg)  do { } while (0)
+#define CV_DbgCheckGT(v1, v2, msg)  do { } while (0)
+#endif

 } // namespace

--- a/modules/core/include/opencv2/core/cvdef.h
+++ b/modules/core/include/opencv2/core/cvdef.h
@ -255,6 +255,7 @@ Cv64suf;

 #ifdef __OPENCV_BUILD
 #  define DISABLE_OPENCV_3_COMPATIBILITY
+#  define OPENCV_DISABLE_DEPRECATED_COMPATIBILITY
 #endif

 #ifdef CVAPI_EXPORTS
--- a/modules/core/include/opencv2/core/hal/intrin_sse.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_sse.hpp
@ -296,15 +296,15 @@ namespace hal_sse_internal
    to_sse_type v_sse_reinterpret_as(const from_sse_type& a) \
    { return sse_cast_intrin(a); }

-    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128i, __m128i, OPENCV_HAL_NOP);
-    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128i, __m128, _mm_castps_si128);
-    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128i, __m128d, _mm_castpd_si128);
-    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128, __m128i, _mm_castsi128_ps);
-    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128, __m128, OPENCV_HAL_NOP);
-    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128, __m128d, _mm_castpd_ps);
-    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128d, __m128i, _mm_castsi128_pd);
-    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128d, __m128, _mm_castps_pd);
-    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128d, __m128d, OPENCV_HAL_NOP);
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128i, __m128i, OPENCV_HAL_NOP)
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128i, __m128, _mm_castps_si128)
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128i, __m128d, _mm_castpd_si128)
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128, __m128i, _mm_castsi128_ps)
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128, __m128, OPENCV_HAL_NOP)
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128, __m128d, _mm_castpd_ps)
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128d, __m128i, _mm_castsi128_pd)
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128d, __m128, _mm_castps_pd)
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128d, __m128d, OPENCV_HAL_NOP)
 }

 #define OPENCV_HAL_IMPL_SSE_INITVEC(_Tpvec, _Tp, suffix, zsuffix, ssuffix, _Tps, cast) \
@ -988,8 +988,8 @@ inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
 inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
 { return cast(v_reinterpret_as_f64(a) != v_reinterpret_as_f64(b)); }

-OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2, v_reinterpret_as_u64);
-OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2, v_reinterpret_as_s64);
+OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2, v_reinterpret_as_u64)
+OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2, v_reinterpret_as_s64)

 OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_add_wrap, _mm_add_epi8)
 OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int8x16, v_add_wrap, _mm_add_epi8)
--- a/modules/core/include/opencv2/core/types_c.h
+++ b/modules/core/include/opencv2/core/types_c.h
@ -409,6 +409,11 @@ IplConvKernelFP;
 #define CV_MAT_MAGIC_VAL    0x42420000
 #define CV_TYPE_NAME_MAT    "opencv-matrix"

+#ifdef __cplusplus
+typedef struct CvMat CvMat;
+CV_INLINE CvMat cvMat(const cv::Mat& m);
+#endif
+
 /** Matrix elements are stored row by row. Element (i, j) (i - 0-based row index, j - 0-based column
 index) of a matrix can be retrieved or modified using CV_MAT_ELEM macro:

@ -531,6 +536,16 @@ inline CvMat::CvMat(const cv::Mat& m)
    step = (int)m.step[0];
    type = (type & ~cv::Mat::CONTINUOUS_FLAG) | (m.flags & cv::Mat::CONTINUOUS_FLAG);
 }
+
+inline CvMat cvMat(const cv::Mat& m)
+{
+    CvMat self;
+    CV_DbgAssert(m.dims <= 2);
+    self = cvMat(m.rows, m.dims == 1 ? 1 : m.cols, m.type(), m.data);
+    self.step = (int)m.step[0];
+    self.type = (self.type & ~cv::Mat::CONTINUOUS_FLAG) | (m.flags & cv::Mat::CONTINUOUS_FLAG);
+    return self;
+}
 #endif


@ -916,6 +931,15 @@ CV_INLINE  CvPoint2D32f  cvPoint2D32f( double x, double y )
    return p;
 }

+#ifdef __cplusplus
+template<typename _Tp>
+CvPoint2D32f cvPoint2D32f(const cv::Point_<_Tp>& pt)
+{
+    CvPoint2D32f p((float)pt.x, (float)pt.y);
+    return p;
+}
+#endif
+
 /** converts CvPoint to CvPoint2D32f. */
 CV_INLINE  CvPoint2D32f  cvPointTo32f( CvPoint point )
 {
--- a/modules/core/include/opencv2/core/utility.hpp
+++ b/modules/core/include/opencv2/core/utility.hpp
@ -143,9 +143,21 @@ public:
    //! returns the current buffer size
    size_t size() const;
    //! returns pointer to the real buffer, stack-allocated or heap-allocated
-    operator _Tp* ();
+    inline _Tp* data() { return ptr; }
    //! returns read-only pointer to the real buffer, stack-allocated or heap-allocated
-    operator const _Tp* () const;
+    inline const _Tp* data() const { return ptr; }
+
+#if !defined(OPENCV_DISABLE_DEPRECATED_COMPATIBILITY) // use to .data() calls instead
+    //! returns pointer to the real buffer, stack-allocated or heap-allocated
+    operator _Tp* () { return ptr; }
+    //! returns read-only pointer to the real buffer, stack-allocated or heap-allocated
+    operator const _Tp* () const { return ptr; }
+#else
+    //! returns a reference to the element at specified location. No bounds checking is performed in Release builds.
+    inline _Tp& operator[] (size_t i) { CV_DbgCheckLT(i, sz, "out of range"); return ptr[i]; }
+    //! returns a reference to the element at specified location. No bounds checking is performed in Release builds.
+    inline const _Tp& operator[] (size_t i) const { CV_DbgCheckLT(i, sz, "out of range"); return ptr[i]; }
+#endif

 protected:
    //! pointer to the real buffer, can point to buf if the buffer is small enough
@ -1029,14 +1041,6 @@ template<typename _Tp, size_t fixed_size> inline size_t
 AutoBuffer<_Tp, fixed_size>::size() const
 { return sz; }

-template<typename _Tp, size_t fixed_size> inline
-AutoBuffer<_Tp, fixed_size>::operator _Tp* ()
-{ return ptr; }
-
-template<typename _Tp, size_t fixed_size> inline
-AutoBuffer<_Tp, fixed_size>::operator const _Tp* () const
-{ return ptr; }
-
 template<> inline std::string CommandLineParser::get<std::string>(int index, bool space_delete) const
 {
    return get<String>(index, space_delete);
--- a/modules/core/src/arithm.cpp
+++ b/modules/core/src/arithm.cpp
@ -282,7 +282,7 @@ static void binary_op( InputArray _src1, InputArray _src2, OutputArray _dst,
        {
            blocksize = std::min(blocksize, blocksize0);
            _buf.allocate(blocksize*esz);
-            maskbuf = _buf;
+            maskbuf = _buf.data();
        }

        for( size_t i = 0; i < it.nplanes; i++, ++it )
@ -312,7 +312,7 @@ static void binary_op( InputArray _src1, InputArray _src2, OutputArray _dst,
        size_t total = it.size, blocksize = std::min(total, blocksize0);

        _buf.allocate(blocksize*(haveMask ? 2 : 1)*esz + 32);
-        scbuf = _buf;
+        scbuf = _buf.data();
        maskbuf = alignPtr(scbuf + blocksize*esz, 16);

        convertAndUnrollScalar( src2, src1.type(), scbuf, blocksize);
@ -754,7 +754,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
            blocksize = std::min(blocksize, blocksize0);

        _buf.allocate(bufesz*blocksize + 64);
-        buf = _buf;
+        buf = _buf.data();
        if( cvtsrc1 )
            buf1 = buf, buf = alignPtr(buf + blocksize*wsz, 16);
        if( cvtsrc2 )
@ -818,7 +818,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
        size_t total = it.size, blocksize = std::min(total, blocksize0);

        _buf.allocate(bufesz*blocksize + 64);
-        buf = _buf;
+        buf = _buf.data();
        if( cvtsrc1 )
            buf1 = buf, buf = alignPtr(buf + blocksize*wsz, 16);
        buf2 = buf; buf = alignPtr(buf + blocksize*wsz, 16);
@ -1256,7 +1256,7 @@ void cv::compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op)
        size_t total = it.size, blocksize = std::min(total, blocksize0);

        AutoBuffer<uchar> _buf(blocksize*esz);
-        uchar *buf = _buf;
+        uchar *buf = _buf.data();

        if( depth1 > CV_32S )
            convertAndUnrollScalar( src2, depth1, buf, blocksize );
@ -1647,7 +1647,7 @@ static bool ocl_inRange( InputArray _src, InputArray _lowerb,
        size_t blocksize = 36;

        AutoBuffer<uchar> _buf(blocksize*(((int)lbScalar + (int)ubScalar)*esz + cn) + 2*cn*sizeof(int) + 128);
-        uchar *buf = alignPtr(_buf + blocksize*cn, 16);
+        uchar *buf = alignPtr(_buf.data() + blocksize*cn, 16);

        if( ldepth != sdepth && sdepth < CV_32S )
        {
@ -1753,7 +1753,7 @@ void cv::inRange(InputArray _src, InputArray _lowerb,
    size_t total = it.size, blocksize = std::min(total, blocksize0);

    AutoBuffer<uchar> _buf(blocksize*(((int)lbScalar + (int)ubScalar)*esz + cn) + 2*cn*sizeof(int) + 128);
-    uchar *buf = _buf, *mbuf = buf, *lbuf = 0, *ubuf = 0;
+    uchar *buf = _buf.data(), *mbuf = buf, *lbuf = 0, *ubuf = 0;
    buf = alignPtr(buf + blocksize*cn, 16);

    if( lbScalar && ubScalar )
--- a/modules/core/src/batch_distance.cpp
+++ b/modules/core/src/batch_distance.cpp
@ -179,7 +179,7 @@ struct BatchDistInvoker : public ParallelLoopBody
    void operator()(const Range& range) const CV_OVERRIDE
    {
        AutoBuffer<int> buf(src2->rows);
-        int* bufptr = buf;
+        int* bufptr = buf.data();

        for( int i = range.start; i < range.end; i++ )
        {
--- a/modules/core/src/channels.cpp
+++ b/modules/core/src/channels.cpp
@ -104,7 +104,7 @@ void cv::mixChannels( const Mat* src, size_t nsrcs, Mat* dst, size_t ndsts, cons
    int depth = dst[0].depth();

    AutoBuffer<uchar> buf((nsrcs + ndsts + 1)*(sizeof(Mat*) + sizeof(uchar*)) + npairs*(sizeof(uchar*)*2 + sizeof(int)*6));
-    const Mat** arrays = (const Mat**)(uchar*)buf;
+    const Mat** arrays = (const Mat**)(uchar*)buf.data();
    uchar** ptrs = (uchar**)(arrays + nsrcs + ndsts);
    const uchar** srcs = (const uchar**)(ptrs + nsrcs + ndsts + 1);
    uchar** dsts = (uchar**)(srcs + npairs);
@ -294,7 +294,7 @@ void cv::mixChannels(InputArrayOfArrays src, InputOutputArrayOfArrays dst,

    CV_Assert(nsrc > 0 && ndst > 0);
    cv::AutoBuffer<Mat> _buf(nsrc + ndst);
-    Mat* buf = _buf;
+    Mat* buf = _buf.data();
    for( i = 0; i < nsrc; i++ )
        buf[i] = src.getMat(src_is_mat ? -1 : i);
    for( i = 0; i < ndst; i++ )
@ -327,7 +327,7 @@ void cv::mixChannels(InputArrayOfArrays src, InputOutputArrayOfArrays dst,

    CV_Assert(fromTo.size()%2 == 0 && nsrc > 0 && ndst > 0);
    cv::AutoBuffer<Mat> _buf(nsrc + ndst);
-    Mat* buf = _buf;
+    Mat* buf = _buf.data();
    for( i = 0; i < nsrc; i++ )
        buf[i] = src.getMat(src_is_mat ? -1 : i);
    for( i = 0; i < ndst; i++ )
--- a/modules/core/src/check.cpp
+++ b/modules/core/src/check.cpp
@ -101,6 +101,10 @@ void check_failed_auto(const int v1, const int v2, const CheckContext& ctx)
 {
    check_failed_auto_<int>(v1, v2, ctx);
 }
+void check_failed_auto(const size_t v1, const size_t v2, const CheckContext& ctx)
+{
+    check_failed_auto_<size_t>(v1, v2, ctx);
+}
 void check_failed_auto(const float v1, const float v2, const CheckContext& ctx)
 {
    check_failed_auto_<float>(v1, v2, ctx);
@ -147,6 +151,10 @@ void check_failed_auto(const int v, const CheckContext& ctx)
 {
    check_failed_auto_<int>(v, ctx);
 }
+void check_failed_auto(const size_t v, const CheckContext& ctx)
+{
+    check_failed_auto_<size_t>(v, ctx);
+}
 void check_failed_auto(const float v, const CheckContext& ctx)
 {
    check_failed_auto_<float>(v, ctx);
--- a/modules/core/src/conjugate_gradient.cpp
+++ b/modules/core/src/conjugate_gradient.cpp
@ -52,7 +52,7 @@ namespace cv
        double eps = getGradientEps();
        int i, n = getDims();
        AutoBuffer<double> x_buf(n);
-        double* x_ = x_buf;
+        double* x_ = x_buf.data();
        for( i = 0; i < n; i++ )
            x_[i] = x[i];
        for( i = 0; i < n; i++ )
--- a/modules/core/src/copy.cpp
+++ b/modules/core/src/copy.cpp
@ -531,7 +531,7 @@ Mat& Mat::setTo(InputArray _value, InputArray _mask)
    int blockSize0 = std::min(totalsz, (int)((BLOCK_SIZE + esz-1)/esz));
    blockSize0 -= blockSize0 % mcn;    // must be divisible without remainder for unrolling and advancing
    AutoBuffer<uchar> _scbuf(blockSize0*esz + 32);
-    uchar* scbuf = alignPtr((uchar*)_scbuf, (int)sizeof(double));
+    uchar* scbuf = alignPtr((uchar*)_scbuf.data(), (int)sizeof(double));
    convertAndUnrollScalar( value, type(), scbuf, blockSize0/mcn );

    for( size_t i = 0; i < it.nplanes; i++, ++it )
@ -559,7 +559,7 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size,
 {
    int i, j, limit = (int)(((size.width + 1)/2)*esz);
    AutoBuffer<int> _tab(size.width*esz);
-    int* tab = _tab;
+    int* tab = _tab.data();

    for( i = 0; i < size.width; i++ )
        for( size_t k = 0; k < esz; k++ )
@ -960,7 +960,7 @@ void copyMakeBorder_8u( const uchar* src, size_t srcstep, cv::Size srcroi,
    }

    cv::AutoBuffer<int> _tab((dstroi.width - srcroi.width)*cn);
-    int* tab = _tab;
+    int* tab = _tab.data();
    int right = dstroi.width - srcroi.width - left;
    int bottom = dstroi.height - srcroi.height - top;

@ -1031,7 +1031,7 @@ void copyMakeConstBorder_8u( const uchar* src, size_t srcstep, cv::Size srcroi,
 {
    int i, j;
    cv::AutoBuffer<uchar> _constBuf(dstroi.width*cn);
-    uchar* constBuf = _constBuf;
+    uchar* constBuf = _constBuf.data();
    int right = dstroi.width - srcroi.width - left;
    int bottom = dstroi.height - srcroi.height - top;

@ -1224,10 +1224,10 @@ void cv::copyMakeBorder( InputArray _src, OutputArray _dst, int top, int bottom,
            CV_Assert( value[0] == value[1] && value[0] == value[2] && value[0] == value[3] );
            cn1 = 1;
        }
-        scalarToRawData(value, buf, CV_MAKETYPE(src.depth(), cn1), cn);
+        scalarToRawData(value, buf.data(), CV_MAKETYPE(src.depth(), cn1), cn);
        copyMakeConstBorder_8u( src.ptr(), src.step, src.size(),
                                dst.ptr(), dst.step, dst.size(),
-                                top, left, (int)src.elemSize(), (uchar*)(double*)buf );
+                                top, left, (int)src.elemSize(), (uchar*)buf.data() );
    }
 }

--- a/modules/core/src/count_non_zero.cpp
+++ b/modules/core/src/count_non_zero.cpp
@ -399,7 +399,7 @@ void cv::findNonZero( InputArray _src, OutputArray _idx )
    std::vector<Point> idxvec;
    int rows = src.rows, cols = src.cols;
    AutoBuffer<int> buf_(cols + 1);
-    int* buf = buf_;
+    int* buf = buf_.data();

    for( int i = 0; i < rows; i++ )
    {
--- a/modules/core/src/dxt.cpp
+++ b/modules/core/src/dxt.cpp
@ -908,7 +908,7 @@ DFT(const OcvDftOptions & c, const Complex<T>* src, Complex<T>* dst)
            int p, q, factor2 = (factor - 1)/2;
            int d, dd, dw_f = c.tab_size/factor;
            AutoBuffer<Complex<T> > buf(factor2 * 2);
-            Complex<T>* a = buf;
+            Complex<T>* a = buf.data();
            Complex<T>* b = a + factor2;

            for( i = 0; i < c.n; i += n )
@ -2895,7 +2895,7 @@ protected:
            uchar* dptr = dptr0;

            if( needBufferA )
-                dptr = tmp_bufA;
+                dptr = tmp_bufA.data();

            contextA->apply(sptr, dptr);

@ -2921,12 +2921,12 @@ protected:
        const uchar* sptr0 = src_data;
        uchar* dptr0 = dst_data;

-        dbuf0 = buf0, dbuf1 = buf1;
+        dbuf0 = buf0.data(), dbuf1 = buf1.data();

        if( needBufferB )
        {
-            dbuf1 = tmp_bufB;
-            dbuf0 = buf1;
+            dbuf1 = tmp_bufB.data();
+            dbuf0 = buf1.data();
        }

        if( real_transform )
@ -2937,42 +2937,42 @@ protected:
            b = (count+1)/2;
            if( !inv )
            {
-                memset( buf0, 0, len*complex_elem_size );
-                CopyColumn( sptr0, src_step, buf0, complex_elem_size, len, elem_size );
+                memset( buf0.data(), 0, len*complex_elem_size );
+                CopyColumn( sptr0, src_step, buf0.data(), complex_elem_size, len, elem_size );
                sptr0 += stage_dst_channels*elem_size;
                if( even )
                {
-                    memset( buf1, 0, len*complex_elem_size );
+                    memset( buf1.data(), 0, len*complex_elem_size );
                    CopyColumn( sptr0 + (count-2)*elem_size, src_step,
-                                buf1, complex_elem_size, len, elem_size );
+                                buf1.data(), complex_elem_size, len, elem_size );
                }
            }
            else if( stage_src_channels == 1 )
            {
-                CopyColumn( sptr0, src_step, buf0, elem_size, len, elem_size );
-                ExpandCCS( buf0, len, elem_size );
+                CopyColumn( sptr0, src_step, buf0.data(), elem_size, len, elem_size );
+                ExpandCCS( buf0.data(), len, elem_size );
                if( even )
                {
                    CopyColumn( sptr0 + (count-1)*elem_size, src_step,
-                                buf1, elem_size, len, elem_size );
-                    ExpandCCS( buf1, len, elem_size );
+                                buf1.data(), elem_size, len, elem_size );
+                    ExpandCCS( buf1.data(), len, elem_size );
                }
                sptr0 += elem_size;
            }
            else
            {
-                CopyColumn( sptr0, src_step, buf0, complex_elem_size, len, complex_elem_size );
+                CopyColumn( sptr0, src_step, buf0.data(), complex_elem_size, len, complex_elem_size );
                if( even )
                {
                    CopyColumn( sptr0 + b*complex_elem_size, src_step,
-                                   buf1, complex_elem_size, len, complex_elem_size );
+                                   buf1.data(), complex_elem_size, len, complex_elem_size );
                }
                sptr0 += complex_elem_size;
            }

            if( even )
-                contextB->apply(buf1, dbuf1);
-            contextB->apply(buf0, dbuf0);
+                contextB->apply(buf1.data(), dbuf1);
+            contextB->apply(buf0.data(), dbuf0);

            if( stage_dst_channels == 1 )
            {
@ -3019,13 +3019,13 @@ protected:
        {
            if( i+1 < b )
            {
-                CopyFrom2Columns( sptr0, src_step, buf0, buf1, len, complex_elem_size );
-                contextB->apply(buf1, dbuf1);
+                CopyFrom2Columns( sptr0, src_step, buf0.data(), buf1.data(), len, complex_elem_size );
+                contextB->apply(buf1.data(), dbuf1);
            }
            else
-                CopyColumn( sptr0, src_step, buf0, complex_elem_size, len, complex_elem_size );
+                CopyColumn( sptr0, src_step, buf0.data(), complex_elem_size, len, complex_elem_size );

-            contextB->apply(buf0, dbuf0);
+            contextB->apply(buf0.data(), dbuf0);

            if( i+1 < b )
                CopyTo2Columns( dbuf0, dbuf1, dptr0, dst_step, len, complex_elem_size );
@ -3134,9 +3134,9 @@ public:
            if (len != prev_len || (!inplace_transform && opt.isInverse && real_transform))
            {
                wave_buf.allocate(opt.n*complex_elem_size);
-                opt.wave = wave_buf;
+                opt.wave = wave_buf.data();
                itab_buf.allocate(opt.n);
-                opt.itab = itab_buf;
+                opt.itab = itab_buf.data();
                DFTInit( opt.n, opt.nf, opt.factors, opt.itab, complex_elem_size,
                         opt.wave, stage == 0 && opt.isInverse && real_transform );
            }
@ -4152,31 +4152,31 @@ public:
                bool inplace_transform = opt.factors[0] == opt.factors[opt.nf-1];

                wave_buf.allocate(len*complex_elem_size);
-                opt.wave = wave_buf;
+                opt.wave = wave_buf.data();
                itab_buf.allocate(len);
-                opt.itab = itab_buf;
+                opt.itab = itab_buf.data();
                DFTInit( len, opt.nf, opt.factors, opt.itab, complex_elem_size, opt.wave, isInverse );

                dct_wave.allocate((len/2 + 1)*complex_elem_size);
                src_buf.allocate(len*elem_size);
-                src_dft_buf = src_buf;
+                src_dft_buf = src_buf.data();
                if(!inplace_transform)
                {
                    dst_buf.allocate(len*elem_size);
-                    dst_dft_buf = dst_buf;
+                    dst_dft_buf = dst_buf.data();
                }
                else
                {
-                    dst_dft_buf = src_buf;
+                    dst_dft_buf = src_buf.data();
                }
-                DCTInit( len, complex_elem_size, dct_wave, isInverse);
+                DCTInit( len, complex_elem_size, dct_wave.data(), isInverse);
                prev_len = len;
            }
            // otherwise reuse the tables calculated on the previous stage
            for(unsigned i = 0; i < static_cast<unsigned>(count); i++ )
            {
                dct_func( opt, sptr + i*sstep0, sstep1, src_dft_buf, dst_dft_buf,
-                          dptr + i*dstep0, dstep1, dct_wave);
+                          dptr + i*dstep0, dstep1, dct_wave.data());
            }
            src = dst;
            src_step = dst_step;
--- a/modules/core/src/kmeans.cpp
+++ b/modules/core/src/kmeans.cpp
@ -330,7 +330,7 @@ double cv::kmeans( InputArray _data, int K,
                else
                {
                    for (int k = 0; k < K; k++)
-                        generateRandomCenter(dims, box, centers.ptr<float>(k), rng);
+                        generateRandomCenter(dims, box.data(), centers.ptr<float>(k), rng);
                }
            }
            else
@ -429,14 +429,14 @@ double cv::kmeans( InputArray _data, int K,
            if (isLastIter)
            {
                // don't re-assign labels to avoid creation of empty clusters
-                parallel_for_(Range(0, N), KMeansDistanceComputer<true>(dists, labels, data, centers), (double)divUp((size_t)(dims * N), CV_KMEANS_PARALLEL_GRANULARITY));
+                parallel_for_(Range(0, N), KMeansDistanceComputer<true>(dists.data(), labels, data, centers), (double)divUp((size_t)(dims * N), CV_KMEANS_PARALLEL_GRANULARITY));
                compactness = sum(Mat(Size(N, 1), CV_64F, &dists[0]))[0];
                break;
            }
            else
            {
                // assign labels
-                parallel_for_(Range(0, N), KMeansDistanceComputer<false>(dists, labels, data, centers), (double)divUp((size_t)(dims * N * K), CV_KMEANS_PARALLEL_GRANULARITY));
+                parallel_for_(Range(0, N), KMeansDistanceComputer<false>(dists.data(), labels, data, centers), (double)divUp((size_t)(dims * N * K), CV_KMEANS_PARALLEL_GRANULARITY));
            }
        }

--- a/modules/core/src/lapack.cpp
+++ b/modules/core/src/lapack.cpp
@ -401,7 +401,7 @@ JacobiSVDImpl_(_Tp* At, size_t astep, _Tp* _W, _Tp* Vt, size_t vstep,
 {
    VBLAS<_Tp> vblas;
    AutoBuffer<double> Wbuf(n);
-    double* W = Wbuf;
+    double* W = Wbuf.data();
    int i, j, k, iter, max_iter = std::max(m, 30);
    _Tp c, s;
    double sd;
@ -778,7 +778,7 @@ double cv::determinant( InputArray _mat )
        {
            size_t bufSize = rows*rows*sizeof(float);
            AutoBuffer<uchar> buffer(bufSize);
-            Mat a(rows, rows, CV_32F, (uchar*)buffer);
+            Mat a(rows, rows, CV_32F, buffer.data());
            mat.copyTo(a);

            result = hal::LU32f(a.ptr<float>(), a.step, rows, 0, 0, 0);
@ -801,7 +801,7 @@ double cv::determinant( InputArray _mat )
        {
            size_t bufSize = rows*rows*sizeof(double);
            AutoBuffer<uchar> buffer(bufSize);
-            Mat a(rows, rows, CV_64F, (uchar*)buffer);
+            Mat a(rows, rows, CV_64F, buffer.data());
            mat.copyTo(a);

            result = hal::LU64f(a.ptr<double>(), a.step, rows, 0, 0, 0);
@ -846,7 +846,7 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
        int nm = std::min(m, n);

        AutoBuffer<uchar> _buf((m*nm + nm + nm*n)*esz + sizeof(double));
-        uchar* buf = alignPtr((uchar*)_buf, (int)esz);
+        uchar* buf = alignPtr((uchar*)_buf.data(), (int)esz);
        Mat u(m, nm, type, buf);
        Mat w(nm, 1, type, u.ptr() + m*nm*esz);
        Mat vt(nm, n, type, w.ptr() + nm*esz);
@ -865,7 +865,7 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
    if( method == DECOMP_EIG )
    {
        AutoBuffer<uchar> _buf((n*n*2 + n)*esz + sizeof(double));
-        uchar* buf = alignPtr((uchar*)_buf, (int)esz);
+        uchar* buf = alignPtr((uchar*)_buf.data(), (int)esz);
        Mat u(n, n, type, buf);
        Mat w(n, 1, type, u.ptr() + n*n*esz);
        Mat vt(n, n, type, w.ptr() + n*esz);
@ -1063,7 +1063,7 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )

   int elem_size = CV_ELEM_SIZE(type);
    AutoBuffer<uchar> buf(n*n*elem_size);
-    Mat src1(n, n, type, (uchar*)buf);
+    Mat src1(n, n, type, buf.data());
    src.copyTo(src1);
    setIdentity(dst);

@ -1267,7 +1267,7 @@ bool cv::solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int meth
        bufsize += n*5*esz + n*vstep + nb*sizeof(double) + 32;

    buffer.allocate(bufsize);
-    uchar* ptr = alignPtr((uchar*)buffer, 16);
+    uchar* ptr = alignPtr(buffer.data(), 16);

    Mat a(m_, n, type, ptr, astep);

@ -1445,7 +1445,7 @@ bool cv::eigen( InputArray _src, OutputArray _evals, OutputArray _evects )

    size_t elemSize = src.elemSize(), astep = alignSize(n*elemSize, 16);
    AutoBuffer<uchar> buf(n*astep + n*5*elemSize + 32);
-    uchar* ptr = alignPtr((uchar*)buf, 16);
+    uchar* ptr = alignPtr(buf.data(), 16);
    Mat a(n, n, type, ptr, astep), w(n, 1, type, ptr + astep*n);
    ptr += astep*n + elemSize*n;
    src.copyTo(a);
@ -1489,7 +1489,7 @@ static void _SVDcompute( InputArray _aarr, OutputArray _w,
    int urows = full_uv ? m : n;
    size_t esz = src.elemSize(), astep = alignSize(m*esz, 16), vstep = alignSize(n*esz, 16);
    AutoBuffer<uchar> _buf(urows*astep + n*vstep + n*esz + 32);
-    uchar* buf = alignPtr((uchar*)_buf, 16);
+    uchar* buf = alignPtr(_buf.data(), 16);
    Mat temp_a(n, m, type, buf, astep);
    Mat temp_w(n, 1, type, buf + urows*astep);
    Mat temp_u(urows, m, type, buf, astep), temp_v;
@ -1568,11 +1568,11 @@ void SVD::backSubst( InputArray _w, InputArray _u, InputArray _vt,
    if( type == CV_32F )
        SVBkSb(m, n, w.ptr<float>(), wstep, u.ptr<float>(), u.step, false,
               vt.ptr<float>(), vt.step, true, rhs.ptr<float>(), rhs.step, nb,
-               dst.ptr<float>(), dst.step, buffer);
+               dst.ptr<float>(), dst.step, buffer.data());
    else if( type == CV_64F )
        SVBkSb(m, n, w.ptr<double>(), wstep, u.ptr<double>(), u.step, false,
               vt.ptr<double>(), vt.step, true, rhs.ptr<double>(), rhs.step, nb,
-               dst.ptr<double>(), dst.step, buffer);
+               dst.ptr<double>(), dst.step, buffer.data());
    else
        CV_Error( CV_StsUnsupportedFormat, "" );
 }
--- a/modules/core/src/mathfuncs.cpp
+++ b/modules/core/src/mathfuncs.cpp
@ -586,7 +586,7 @@ void polarToCart( InputArray src1, InputArray src2,
    if( depth == CV_64F )
    {
        _buf.allocate(blockSize*2);
-        buf[0] = _buf;
+        buf[0] = _buf.data();
        buf[1] = buf[0] + blockSize;
    }

@ -1278,8 +1278,8 @@ void pow( InputArray _src, double power, OutputArray _dst )
        if( src.ptr() == dst.ptr() )
        {
            buf.allocate(blockSize*esz1);
-            fbuf = (float*)(uchar*)buf;
-            dbuf = (double*)(uchar*)buf;
+            fbuf = (float*)buf.data();
+            dbuf = (double*)buf.data();
        }

        for( size_t i = 0; i < it.nplanes; i++, ++it )
@ -1901,7 +1901,7 @@ double cv::solvePoly( InputArray _coeffs0, OutputArray _roots0, int maxIters )
    Mat roots0 = _roots0.getMat();

    AutoBuffer<C> buf(n*2+2);
-    C *coeffs = buf, *roots = coeffs + n + 1;
+    C *coeffs = buf.data(), *roots = coeffs + n + 1;
    Mat coeffs1(coeffs0.size(), CV_MAKETYPE(CV_64F, coeffs0.channels()), coeffs0.channels() == 2 ? coeffs : roots);
    coeffs0.convertTo(coeffs1, coeffs1.type());
    if( coeffs0.channels() == 1 )
--- a/modules/core/src/matmul.cpp
+++ b/modules/core/src/matmul.cpp
@ -165,7 +165,7 @@ GEMMSingleMul( const T* a_data, size_t a_step,
        if( a_step > 1 && n > 1 )
        {
            _a_buf.allocate(n);
-            a_buf = _a_buf;
+            a_buf = _a_buf.data();
        }
    }

@ -177,7 +177,7 @@ GEMMSingleMul( const T* a_data, size_t a_step,
        if( a_step > 1 && a_size.height > 1 )
        {
            _a_buf.allocate(drows);
-            a_buf = _a_buf;
+            a_buf = _a_buf.data();
            for( k = 0; k < drows; k++ )
                a_buf[k] = a_data[a_step*k];
            a_data = a_buf;
@ -186,7 +186,7 @@ GEMMSingleMul( const T* a_data, size_t a_step,
        if( b_step > 1 )
        {
            _b_buf.allocate(d_size.width);
-            b_buf = _b_buf;
+            b_buf = _b_buf.data();
            for( j = 0; j < d_size.width; j++ )
                b_buf[j] = b_data[j*b_step];
            b_data = b_buf;
@ -326,7 +326,7 @@ GEMMSingleMul( const T* a_data, size_t a_step,
    else
    {
        cv::AutoBuffer<WT> _d_buf(m);
-        WT* d_buf = _d_buf;
+        WT* d_buf = _d_buf.data();

        for( i = 0; i < drows; i++, _a_data += a_step0, _c_data += c_step0, d_data += d_step )
        {
@ -404,7 +404,7 @@ GEMMBlockMul( const T* a_data, size_t a_step,
        CV_SWAP( a_step0, a_step1, t_step );
        n = a_size.height;
        _a_buf.allocate(n);
-        a_buf = _a_buf;
+        a_buf = _a_buf.data();
    }

    if( flags & GEMM_2_T )
@ -1354,7 +1354,7 @@ static void gemmImpl( Mat A, Mat B, double alpha,
        }

        buf.allocate(d_buf_size + b_buf_size + a_buf_size);
-        d_buf = (uchar*)buf;
+        d_buf = buf.data();
        b_buf = d_buf + d_buf_size;

        if( is_a_t )
@ -2098,7 +2098,7 @@ void cv::transform( InputArray _src, OutputArray _dst, InputArray _mtx )
    if( !m.isContinuous() || m.type() != mtype || m.cols != scn + 1 )
    {
        _mbuf.allocate(dcn*(scn+1));
-        mbuf = (double*)_mbuf;
+        mbuf = _mbuf.data();
        Mat tmp(dcn, scn+1, mtype, mbuf);
        memset(tmp.ptr(), 0, tmp.total()*tmp.elemSize());
        if( m.cols == scn+1 )
@ -2273,17 +2273,16 @@ void cv::perspectiveTransform( InputArray _src, OutputArray _dst, InputArray _mt

    const int mtype = CV_64F;
    AutoBuffer<double> _mbuf;
-    double* mbuf = _mbuf;
+    double* mbuf = m.ptr<double>();

    if( !m.isContinuous() || m.type() != mtype )
    {
        _mbuf.allocate((dcn+1)*(scn+1));
-        Mat tmp(dcn+1, scn+1, mtype, (double*)_mbuf);
+        mbuf = _mbuf.data();
+        Mat tmp(dcn+1, scn+1, mtype, mbuf);
        m.convertTo(tmp, mtype);
        m = tmp;
    }
-    else
-        mbuf = m.ptr<double>();

    TransformFunc func = depth == CV_32F ?
        (TransformFunc)perspectiveTransform_32f :
@ -2612,7 +2611,7 @@ double cv::Mahalanobis( InputArray _v1, InputArray _v2, InputArray _icovar )
        const float* src2 = v2.ptr<float>();
        size_t step1 = v1.step/sizeof(src1[0]);
        size_t step2 = v2.step/sizeof(src2[0]);
-        double* diff = buf;
+        double* diff = buf.data();
        const float* mat = icovar.ptr<float>();
        size_t matstep = icovar.step/sizeof(mat[0]);

@ -2622,7 +2621,7 @@ double cv::Mahalanobis( InputArray _v1, InputArray _v2, InputArray _icovar )
                diff[i] = src1[i] - src2[i];
        }

-        diff = buf;
+        diff = buf.data();
        for( i = 0; i < len; i++, mat += matstep )
        {
            double row_sum = 0;
@ -2643,7 +2642,7 @@ double cv::Mahalanobis( InputArray _v1, InputArray _v2, InputArray _icovar )
        const double* src2 = v2.ptr<double>();
        size_t step1 = v1.step/sizeof(src1[0]);
        size_t step2 = v2.step/sizeof(src2[0]);
-        double* diff = buf;
+        double* diff = buf.data();
        const double* mat = icovar.ptr<double>();
        size_t matstep = icovar.step/sizeof(mat[0]);

@ -2653,7 +2652,7 @@ double cv::Mahalanobis( InputArray _v1, InputArray _v2, InputArray _icovar )
                diff[i] = src1[i] - src2[i];
        }

-        diff = buf;
+        diff = buf.data();
        for( i = 0; i < len; i++, mat += matstep )
        {
            double row_sum = 0;
@ -2705,7 +2704,7 @@ MulTransposedR( const Mat& srcmat, Mat& dstmat, const Mat& deltamat, double scal
        buf_size *= 5;
    }
    buf.allocate(buf_size);
-    col_buf = (dT*)(uchar*)buf;
+    col_buf = (dT*)buf.data();

    if( delta && delta_cols < size.width )
    {
@ -2834,7 +2833,7 @@ MulTransposedL( const Mat& srcmat, Mat& dstmat, const Mat& deltamat, double scal
        dT delta_buf[4];
        int delta_shift = delta_cols == size.width ? 4 : 0;
        AutoBuffer<uchar> buf(size.width*sizeof(dT));
-        dT* row_buf = (dT*)(uchar*)buf;
+        dT* row_buf = (dT*)buf.data();

        for( i = 0; i < size.height; i++, tdst += dststep )
        {
--- a/modules/core/src/matrix.cpp
+++ b/modules/core/src/matrix.cpp
@ -410,7 +410,7 @@ Mat::Mat(const Mat& m, const Range& _rowRange, const Range& _colRange)
        rs[1] = _colRange;
        for( int i = 2; i < m.dims; i++ )
            rs[i] = Range::all();
-        *this = m(rs);
+        *this = m(rs.data());
        return;
    }

@ -897,7 +897,7 @@ Mat Mat::reshape(int _cn, int _newndims, const int* _newsz) const

        Mat hdr = *this;
        hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((_cn-1) << CV_CN_SHIFT);
-        setSize(hdr, _newndims, (int*)newsz_buf, NULL, true);
+        setSize(hdr, _newndims, newsz_buf.data(), NULL, true);

        return hdr;
    }
--- a/modules/core/src/matrix_c.cpp
+++ b/modules/core/src/matrix_c.cpp
@ -169,7 +169,7 @@ Mat cvarrToMat(const CvArr* arr, bool copyData,
        if( abuf )
        {
            abuf->allocate(((size_t)total*esz + sizeof(double)-1)/sizeof(double));
-            double* bufdata = *abuf;
+            double* bufdata = abuf->data();
            cvCvtSeqToArray(seq, bufdata, CV_WHOLE_SEQ);
            return Mat(total, 1, type, bufdata);
        }
--- a/modules/core/src/matrix_decomp.cpp
+++ b/modules/core/src/matrix_decomp.cpp
@ -206,7 +206,7 @@ QRImpl(_Tp* A, size_t astep, int m, int n, int k, _Tp* b, size_t bstep, _Tp* hFa
    cv::AutoBuffer<_Tp> buffer;
    size_t buf_size = m ? m + n : hFactors != NULL;
    buffer.allocate(buf_size);
-    _Tp* vl = buffer;
+    _Tp* vl = buffer.data();
    if (hFactors == NULL)
        hFactors = vl + m;

--- a/modules/core/src/matrix_operations.cpp
+++ b/modules/core/src/matrix_operations.cpp
@ -606,7 +606,7 @@ reduceR_( const Mat& srcmat, Mat& dstmat )
    Size size = srcmat.size();
    size.width *= srcmat.channels();
    AutoBuffer<WT> buffer(size.width);
-    WT* buf = buffer;
+    WT* buf = buffer.data();
    ST* dst = dstmat.ptr<ST>();
    const T* src = srcmat.ptr<T>();
    size_t srcstep = srcmat.step/sizeof(src[0]);
@ -1125,7 +1125,6 @@ namespace cv
 template<typename T> static void sort_( const Mat& src, Mat& dst, int flags )
 {
    AutoBuffer<T> buf;
-    T* bptr;
    int n, len;
    bool sortRows = (flags & 1) == CV_SORT_EVERY_ROW;
    bool inplace = src.data == dst.data;
@ -1138,7 +1137,7 @@ template<typename T> static void sort_( const Mat& src, Mat& dst, int flags )
        n = src.cols, len = src.rows;
        buf.allocate(len);
    }
-    bptr = (T*)buf;
+    T* bptr = buf.data();

    for( int i = 0; i < n; i++ )
    {
@ -1223,7 +1222,7 @@ static bool ipp_sort(const Mat& src, Mat& dst, int flags)

        for(int i = 0; i < dst.rows; i++)
        {
-            if(CV_INSTRUMENT_FUN_IPP(ippsSortRadix_I, (void*)dst.ptr(i), dst.cols, buffer) < 0)
+            if(CV_INSTRUMENT_FUN_IPP(ippsSortRadix_I, (void*)dst.ptr(i), dst.cols, buffer.data()) < 0)
                return false;
        }
    }
@ -1248,7 +1247,7 @@ static bool ipp_sort(const Mat& src, Mat& dst, int flags)
            dstSub = Mat(dst, subRect);
            srcSub.copyTo(row);

-            if(CV_INSTRUMENT_FUN_IPP(ippsSortRadix_I, (void*)row.ptr(), dst.rows, buffer) < 0)
+            if(CV_INSTRUMENT_FUN_IPP(ippsSortRadix_I, (void*)row.ptr(), dst.rows, buffer.data()) < 0)
                return false;

            row = row.reshape(1, dstSub.rows);
@ -1286,8 +1285,8 @@ template<typename T> static void sortIdx_( const Mat& src, Mat& dst, int flags )
        buf.allocate(len);
        ibuf.allocate(len);
    }
-    T* bptr = (T*)buf;
-    int* _iptr = (int*)ibuf;
+    T* bptr = buf.data();
+    int* _iptr = ibuf.data();

    for( int i = 0; i < n; i++ )
    {
@ -1365,7 +1364,7 @@ static bool ipp_sortIdx( const Mat& src, Mat& dst, int flags )

        for(int i = 0; i < src.rows; i++)
        {
-            if(CV_INSTRUMENT_FUN_IPP(ippsSortRadixIndex, (const void*)src.ptr(i), (Ipp32s)src.step[1], (Ipp32s*)dst.ptr(i), src.cols, buffer) < 0)
+            if(CV_INSTRUMENT_FUN_IPP(ippsSortRadixIndex, (const void*)src.ptr(i), (Ipp32s)src.step[1], (Ipp32s*)dst.ptr(i), src.cols, buffer.data()) < 0)
                return false;
        }
    }
@ -1388,7 +1387,7 @@ static bool ipp_sortIdx( const Mat& src, Mat& dst, int flags )
            subRect.x = i;
            dstSub = Mat(dst, subRect);

-            if(CV_INSTRUMENT_FUN_IPP(ippsSortRadixIndex, (const void*)src.ptr(0, i), srcStep, (Ipp32s*)dstRow.ptr(), src.rows, buffer) < 0)
+            if(CV_INSTRUMENT_FUN_IPP(ippsSortRadixIndex, (const void*)src.ptr(0, i), srcStep, (Ipp32s*)dstRow.ptr(), src.rows, buffer.data()) < 0)
                return false;

            dstRow = dstRow.reshape(1, dstSub.rows);
--- a/modules/core/src/mean.cpp
+++ b/modules/core/src/mean.cpp
@ -135,7 +135,7 @@ cv::Scalar cv::mean( InputArray _src, InputArray _mask )
        intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15);
        blockSize = std::min(blockSize, intSumBlockSize);
        _buf.allocate(cn);
-        buf = _buf;
+        buf = _buf.data();

        for( k = 0; k < cn; k++ )
            buf[k] = 0;
@ -789,7 +789,7 @@ void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, Input
    int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
    int j, count = 0, nz0 = 0;
    AutoBuffer<double> _buf(cn*4);
-    double *s = (double*)_buf, *sq = s + cn;
+    double *s = (double*)_buf.data(), *sq = s + cn;
    int *sbuf = (int*)s, *sqbuf = (int*)sq;
    bool blockSum = depth <= CV_16S, blockSqSum = depth <= CV_8S;
    size_t esz = 0;
--- a/modules/core/src/merge.cpp
+++ b/modules/core/src/merge.cpp
@ -496,7 +496,7 @@ void cv::merge(const Mat* mv, size_t n, OutputArray _dst)
    size_t esz = dst.elemSize(), esz1 = dst.elemSize1();
    size_t blocksize0 = (int)((BLOCK_SIZE + esz-1)/esz);
    AutoBuffer<uchar> _buf((cn+1)*(sizeof(Mat*) + sizeof(uchar*)) + 16);
-    const Mat** arrays = (const Mat**)(uchar*)_buf;
+    const Mat** arrays = (const Mat**)_buf.data();
    uchar** ptrs = (uchar**)alignPtr(arrays + cn + 1, 16);

    arrays[0] = &dst;
--- a/modules/core/src/ocl.cpp
+++ b/modules/core/src/ocl.cpp
@ -617,12 +617,12 @@ public:
                if (fileSourceSignatureSize == sourceSignatureSize_)
                {
                    cv::AutoBuffer<char> fileSourceSignature(fileSourceSignatureSize + 1);
-                    f.read((char*)fileSourceSignature, fileSourceSignatureSize);
+                    f.read(fileSourceSignature.data(), fileSourceSignatureSize);
                    if (f.eof())
                    {
                        CV_LOG_ERROR(NULL, "Unexpected EOF");
                    }
-                    else if (memcmp(sourceSignature, (const char*)fileSourceSignature, fileSourceSignatureSize) == 0)
+                    else if (memcmp(sourceSignature, fileSourceSignature.data(), fileSourceSignatureSize) == 0)
                    {
                        isValid = true;
                    }
@ -696,10 +696,10 @@ public:
            {
                if (entry.keySize > 0)
                {
-                    f.read((char*)fileKey, entry.keySize);
+                    f.read(fileKey.data(), entry.keySize);
                    CV_Assert(!f.fail());
                }
-                if (memcmp((const char*)fileKey, key.c_str(), entry.keySize) == 0)
+                if (memcmp(fileKey.data(), key.c_str(), entry.keySize) == 0)
                {
                    buf.resize(entry.dataSize);
                    f.read(&buf[0], entry.dataSize);
@ -786,10 +786,10 @@ public:
            {
                if (entry.keySize > 0)
                {
-                    f.read((char*)fileKey, entry.keySize);
+                    f.read(fileKey.data(), entry.keySize);
                    CV_Assert(!f.fail());
                }
-                if (0 == memcmp((const char*)fileKey, key.c_str(), entry.keySize))
+                if (0 == memcmp(fileKey.data(), key.c_str(), entry.keySize))
                {
                    // duplicate
                    CV_LOG_VERBOSE(NULL, 0, "Duplicate key ignored: " << fileName_);
@ -1634,7 +1634,7 @@ inline cl_int getStringInfo(Functor f, ObjectType obj, cl_uint name, std::string
    if (required > 0)
    {
        AutoBuffer<char> buf(required + 1);
-        char* ptr = (char*)buf; // cleanup is not needed
+        char* ptr = buf.data(); // cleanup is not needed
        err = f(obj, name, required, ptr, NULL);
        if (err != CL_SUCCESS)
            return err;
@ -2002,7 +2002,7 @@ struct Context::Impl
        CV_OCL_DBG_CHECK(clGetDeviceIDs(pl, dtype, 0, 0, &nd0));

        AutoBuffer<void*> dlistbuf(nd0*2+1);
-        cl_device_id* dlist = (cl_device_id*)(void**)dlistbuf;
+        cl_device_id* dlist = (cl_device_id*)dlistbuf.data();
        cl_device_id* dlist_new = dlist + nd0;
        CV_OCL_DBG_CHECK(clGetDeviceIDs(pl, dtype, nd0, dlist, &nd0));
        String name0;
@ -2465,12 +2465,12 @@ static void get_platform_name(cl_platform_id id, String& name)

    // get platform name string
    AutoBuffer<char> buf(sz + 1);
-    CV_OCL_CHECK(clGetPlatformInfo(id, CL_PLATFORM_NAME, sz, buf, 0));
+    CV_OCL_CHECK(clGetPlatformInfo(id, CL_PLATFORM_NAME, sz, buf.data(), 0));

    // just in case, ensure trailing zero for ASCIIZ string
    buf[sz] = 0;

-    name = (const char*)buf;
+    name = buf.data();
 }

 /*
@ -3654,7 +3654,7 @@ struct Program::Impl
        {
            buffer.resize(retsz + 16);
            log_retval = clGetProgramBuildInfo(handle, deviceList[0],
-                                               CL_PROGRAM_BUILD_LOG, retsz+1, (char*)buffer, &retsz);
+                                               CL_PROGRAM_BUILD_LOG, retsz+1, buffer.data(), &retsz);
            if (log_retval == CL_SUCCESS)
            {
                if (retsz < buffer.size())
@ -3668,7 +3668,7 @@ struct Program::Impl
            }
        }

-        errmsg = String(buffer);
+        errmsg = String(buffer.data());
        printf("OpenCL program build log: %s/%s\nStatus %d: %s\n%s\n%s\n",
                sourceModule_.c_str(), sourceName_.c_str(),
                result, getOpenCLErrorString(result),
@ -3701,7 +3701,7 @@ struct Program::Impl
        {
            size_t n = ctx.ndevices();
            AutoBuffer<cl_device_id, 4> deviceListBuf(n + 1);
-            cl_device_id* deviceList = deviceListBuf;
+            cl_device_id* deviceList = deviceListBuf.data();
            for (size_t i = 0; i < n; i++)
            {
                deviceList[i] = (cl_device_id)(ctx.device(i).ptr());
@ -3770,9 +3770,9 @@ struct Program::Impl
        AutoBuffer<const uchar*> binaryPtrs_(ndevices);
        AutoBuffer<size_t> binarySizes_(ndevices);

-        cl_device_id* devices = devices_;
-        const uchar** binaryPtrs = binaryPtrs_;
-        size_t* binarySizes = binarySizes_;
+        cl_device_id* devices = devices_.data();
+        const uchar** binaryPtrs = binaryPtrs_.data();
+        size_t* binarySizes = binarySizes_.data();
        for (size_t i = 0; i < ndevices; i++)
        {
            devices[i] = (cl_device_id)ctx.device(i).ptr();
@ -3781,7 +3781,7 @@ struct Program::Impl
        }

        cl_int result = 0;
-        handle = clCreateProgramWithBinary((cl_context)ctx.ptr(), (cl_uint)ndevices, (cl_device_id*)devices_,
+        handle = clCreateProgramWithBinary((cl_context)ctx.ptr(), (cl_uint)ndevices, devices_.data(),
                                           binarySizes, binaryPtrs, NULL, &result);
        if (result != CL_SUCCESS)
        {
@ -3798,7 +3798,7 @@ struct Program::Impl
        }
        // call clBuildProgram()
        {
-            result = clBuildProgram(handle, (cl_uint)ndevices, (cl_device_id*)devices_, buildflags.c_str(), 0, 0);
+            result = clBuildProgram(handle, (cl_uint)ndevices, devices_.data(), buildflags.c_str(), 0, 0);
            CV_OCL_DBG_CHECK_RESULT(result, cv::format("clBuildProgram(binary: %s/%s)", sourceModule_.c_str(), sourceName_.c_str()).c_str());
            if (result != CL_SUCCESS)
            {
@ -6318,7 +6318,7 @@ struct Image2D::Impl
        AutoBuffer<cl_image_format> formats(numFormats);
        err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
                                         CL_MEM_OBJECT_IMAGE2D, numFormats,
-                                         formats, NULL);
+                                         formats.data(), NULL);
        CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, formats)");
        for (cl_uint i = 0; i < numFormats; ++i)
        {
--- a/modules/core/src/opengl.cpp
+++ b/modules/core/src/opengl.cpp
@ -1624,12 +1624,12 @@ Context& initializeContextFromGL()
            if (status == CL_SUCCESS)
            {
                extensionStr.allocate(extensionSize+1);
-                status = clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, extensionSize, (char*)extensionStr, NULL);
+                status = clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, extensionSize, (char*)extensionStr.data(), NULL);
            }
            if (status != CL_SUCCESS)
                CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get platform extension string");

-            if (!strstr((const char*)extensionStr, "cl_khr_gl_sharing"))
+            if (!strstr((const char*)extensionStr.data(), "cl_khr_gl_sharing"))
                continue;
        }

--- a/modules/core/src/persistence_cpp.cpp
+++ b/modules/core/src/persistence_cpp.cpp
@ -222,7 +222,7 @@ String FileStorage::getDefaultObjectName(const String& _filename)
    if( ptr == ptr2 )
        CV_Error( CV_StsBadArg, "Invalid filename" );

-    char* name = name_buf;
+    char* name = name_buf.data();

    // name must start with letter or '_'
    if( !cv_isalpha(*ptr) && *ptr!= '_' ){
@ -237,7 +237,7 @@ String FileStorage::getDefaultObjectName(const String& _filename)
        *name++ = c;
    }
    *name = '\0';
-    name = name_buf;
+    name = name_buf.data();
    if( strcmp( name, "_" ) == 0 )
        strcpy( name, stubname );
    return String(name);
--- a/modules/core/src/rand.cpp
+++ b/modules/core/src/rand.cpp
@ -542,7 +542,7 @@ void RNG::fill( InputOutputArray _mat, int disttype,
    if( disttype == UNIFORM )
    {
        _parambuf.allocate(cn*8 + n1 + n2);
-        double* parambuf = _parambuf;
+        double* parambuf = _parambuf.data();
        double* p1 = _param1.ptr<double>();
        double* p2 = _param2.ptr<double>();

@ -651,7 +651,7 @@ void RNG::fill( InputOutputArray _mat, int disttype,
    else if( disttype == CV_RAND_NORMAL )
    {
        _parambuf.allocate(MAX(n1, cn) + MAX(n2, cn));
-        double* parambuf = _parambuf;
+        double* parambuf = _parambuf.data();

        int ptype = depth == CV_64F ? CV_64F : CV_32F;
        int esz = (int)CV_ELEM_SIZE(ptype);
@ -701,7 +701,7 @@ void RNG::fill( InputOutputArray _mat, int disttype,
    if( disttype == UNIFORM )
    {
        buf.allocate(blockSize*cn*4);
-        param = (uchar*)(double*)buf;
+        param = (uchar*)(double*)buf.data();

        if( depth <= CV_32S )
        {
@ -738,7 +738,7 @@ void RNG::fill( InputOutputArray _mat, int disttype,
    else
    {
        buf.allocate((blockSize*cn+1)/2);
-        nbuf = (float*)(double*)buf;
+        nbuf = (float*)(double*)buf.data();
    }

    for( size_t i = 0; i < it.nplanes; i++, ++it )
--- a/modules/core/src/split.cpp
+++ b/modules/core/src/split.cpp
@ -485,7 +485,7 @@ void cv::split(const Mat& src, Mat* mv)
    size_t esz = src.elemSize(), esz1 = src.elemSize1();
    size_t blocksize0 = (BLOCK_SIZE + esz-1)/esz;
    AutoBuffer<uchar> _buf((cn+1)*(sizeof(Mat*) + sizeof(uchar*)) + 16);
-    const Mat** arrays = (const Mat**)(uchar*)_buf;
+    const Mat** arrays = (const Mat**)_buf.data();
    uchar** ptrs = (uchar**)alignPtr(arrays + cn + 1, 16);

    arrays[0] = &src;
--- a/modules/core/src/sum.cpp
+++ b/modules/core/src/sum.cpp
@ -617,7 +617,7 @@ cv::Scalar cv::sum( InputArray _src )
        intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15);
        blockSize = std::min(blockSize, intSumBlockSize);
        _buf.allocate(cn);
-        buf = _buf;
+        buf = _buf.data();

        for( k = 0; k < cn; k++ )
            buf[k] = 0;
--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@ -804,7 +804,7 @@ String format( const char* fmt, ... )
        va_list va;
        va_start(va, fmt);
        int bsize = static_cast<int>(buf.size());
-        int len = cv_vsnprintf((char *)buf, bsize, fmt, va);
+        int len = cv_vsnprintf(buf.data(), bsize, fmt, va);
        va_end(va);

        CV_Assert(len >= 0 && "Check format string for errors");
@ -814,7 +814,7 @@ String format( const char* fmt, ... )
            continue;
        }
        buf[bsize - 1] = 0;
-        return String((char *)buf, len);
+        return String(buf.data(), len);
    }
 }

--- a/modules/core/src/umatrix.cpp
+++ b/modules/core/src/umatrix.cpp
@ -502,7 +502,7 @@ UMat::UMat(const UMat& m, const Range& _rowRange, const Range& _colRange)
        rs[1] = _colRange;
        for( int i = 2; i < m.dims; i++ )
            rs[i] = Range::all();
-        *this = m(rs);
+        *this = m(rs.data());
        return;
    }

@ -805,7 +805,7 @@ UMat UMat::reshape(int _cn, int _newndims, const int* _newsz) const

        UMat hdr = *this;
        hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((_cn-1) << CV_CN_SHIFT);
-        setSize(hdr, _newndims, (int*)newsz_buf, NULL, true);
+        setSize(hdr, _newndims, newsz_buf.data(), NULL, true);

        return hdr;
    }
--- a/modules/core/src/utils/filesystem.cpp
+++ b/modules/core/src/utils/filesystem.cpp
@ -158,13 +158,13 @@ cv::String getcwd()
 #else
    DWORD sz = GetCurrentDirectoryA(0, NULL);
    buf.allocate((size_t)sz);
-    sz = GetCurrentDirectoryA((DWORD)buf.size(), (char*)buf);
-    return cv::String((char*)buf, (size_t)sz);
+    sz = GetCurrentDirectoryA((DWORD)buf.size(), buf.data());
+    return cv::String(buf.data(), (size_t)sz);
 #endif
 #elif defined __linux__ || defined __APPLE__ || defined __HAIKU__
    for(;;)
    {
-        char* p = ::getcwd((char*)buf, buf.size());
+        char* p = ::getcwd(buf.data(), buf.size());
        if (p == NULL)
        {
            if (errno == ERANGE)
@ -176,7 +176,7 @@ cv::String getcwd()
        }
        break;
    }
-    return cv::String((char*)buf, (size_t)strlen((char*)buf));
+    return cv::String(buf.data(), (size_t)strlen(buf.data()));
 #else
    return cv::String();
 #endif
--- a/modules/core/test/test_rand.cpp
+++ b/modules/core/test/test_rand.cpp
@ -374,9 +374,9 @@ TEST(Core_Rand, Regression_Stack_Corruption)
    int bufsz = 128; //enough for 14 doubles
    AutoBuffer<uchar> buffer(bufsz);
    size_t offset = 0;
-    cv::Mat_<cv::Point2d> x(2, 3, (cv::Point2d*)(buffer+offset)); offset += x.total()*x.elemSize();
-    double& param1 = *(double*)(buffer+offset); offset += sizeof(double);
-    double& param2 = *(double*)(buffer+offset); offset += sizeof(double);
+    cv::Mat_<cv::Point2d> x(2, 3, (cv::Point2d*)(buffer.data()+offset)); offset += x.total()*x.elemSize();
+    double& param1 = *(double*)(buffer.data()+offset); offset += sizeof(double);
+    double& param2 = *(double*)(buffer.data()+offset); offset += sizeof(double);
    param1 = -9; param2 = 2;

    cv::theRNG().fill(x, cv::RNG::NORMAL, param1, param2);
--- a/modules/cudaimgproc/src/hough_circles.cpp
+++ b/modules/cudaimgproc/src/hough_circles.cpp
@ -215,8 +215,8 @@ namespace
            AutoBuffer<ushort2> newBuf_(centersCount);
            int newCount = 0;

-            ushort2* oldBuf = oldBuf_;
-            ushort2* newBuf = newBuf_;
+            ushort2* oldBuf = oldBuf_.data();
+            ushort2* newBuf = newBuf_.data();

            cudaSafeCall( cudaMemcpy(oldBuf, centers, centersCount * sizeof(ushort2), cudaMemcpyDeviceToHost) );

--- a/modules/cudastereo/src/stereocsbp.cpp
+++ b/modules/cudastereo/src/stereocsbp.cpp
@ -172,7 +172,7 @@ namespace

        // compute sizes
        AutoBuffer<int> buf(levels_ * 3);
-        int* cols_pyr = buf;
+        int* cols_pyr = buf.data();
        int* rows_pyr = cols_pyr + levels_;
        int* nr_plane_pyr = rows_pyr + levels_;

--- a/modules/dnn/CMakeLists.txt
+++ b/modules/dnn/CMakeLists.txt
@ -48,6 +48,10 @@ if(ANDROID)
  add_definitions(-DDISABLE_POSIX_MEMALIGN -DTH_DISABLE_HEAP_TRACKING)
 endif()

+if(NOT BUILD_PROTOBUF)
+  add_definitions(-DOPENCV_DNN_EXTERNAL_PROTOBUF=1)
+endif()
+
 add_definitions(-DHAVE_PROTOBUF=1)

 #suppress warnings in autogenerated caffe.pb.* files
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@ -46,9 +46,9 @@
 #include <opencv2/core.hpp>

 #if !defined CV_DOXYGEN && !defined CV_DNN_DONT_ADD_EXPERIMENTAL_NS
-#define CV__DNN_EXPERIMENTAL_NS_BEGIN namespace experimental_dnn_v4 {
+#define CV__DNN_EXPERIMENTAL_NS_BEGIN namespace experimental_dnn_v5 {
 #define CV__DNN_EXPERIMENTAL_NS_END }
-namespace cv { namespace dnn { namespace experimental_dnn_v4 { } using namespace experimental_dnn_v4; }}
+namespace cv { namespace dnn { namespace experimental_dnn_v5 { } using namespace experimental_dnn_v5; }}
 #else
 #define CV__DNN_EXPERIMENTAL_NS_BEGIN
 #define CV__DNN_EXPERIMENTAL_NS_END
--- a/modules/dnn/src/caffe/caffe_io.cpp
+++ b/modules/dnn/src/caffe/caffe_io.cpp
@ -1120,7 +1120,11 @@ bool ReadProtoFromTextFile(const char* filename, Message* proto) {
    std::ifstream fs(filename, std::ifstream::in);
    CHECK(fs.is_open()) << "Can't open \"" << filename << "\"";
    IstreamInputStream input(&fs);
+#ifndef OPENCV_DNN_EXTERNAL_PROTOBUF
    return google::protobuf::TextFormat::Parser(true).Parse(&input, proto);
+#else
+    return google::protobuf::TextFormat::Parser().Parse(&input, proto);
+#endif
 }

 bool ReadProtoFromBinaryFile(const char* filename, Message* proto) {
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@ -409,8 +409,44 @@ struct LayerData
 struct DataLayer : public Layer
 {
    void finalize(const std::vector<Mat*>&, std::vector<Mat>&) CV_OVERRIDE {}
-    void forward(std::vector<Mat*>&, std::vector<Mat>&, std::vector<Mat> &) CV_OVERRIDE {}
-    void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) CV_OVERRIDE {}
+
+    void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) CV_OVERRIDE
+    {
+        CV_TRACE_FUNCTION();
+        CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
+                   forward_ocl(inputs, outputs, internals));
+
+        Layer::forward_fallback(inputs, outputs, internals);
+    }
+
+    void forward(std::vector<Mat*>&, std::vector<Mat>& outputs, std::vector<Mat> &) CV_OVERRIDE
+    {
+        for (int i = 0; i < inputsData.size(); ++i)
+        {
+            if (inputsData[i].type() == CV_32F && outputs[i].type() == CV_16S)
+            {
+                convertFp16(inputsData[i], outputs[i]);
+            }
+        }
+    }
+
+#ifdef HAVE_OPENCL
+    bool forward_ocl(InputArrayOfArrays, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
+    {
+        if (outputs_.depth() == CV_16S)
+        {
+            std::vector<UMat> outputs;
+            outputs_.getUMatVector(outputs);
+            for (int i = 0; i < inputsData.size(); ++i)
+            {
+                convertFp16(inputsData[i], outputs[i]);
+            }
+        }
+        return true;
+    }
+#endif

    int outputNameToIndex(const String& tgtName) CV_OVERRIDE
    {
@ -434,6 +470,7 @@ struct DataLayer : public Layer
    }

    std::vector<String> outNames;
+    std::vector<Mat> inputsData;
 };

 struct BlobManager
@ -848,9 +885,6 @@ struct Net::Impl
                poolingLayer->computeMaxIdx = true;
            }
        }
-        it = layers.find(0);
-        CV_Assert(it != layers.end());
-        it->second.skip = true;

        layersTimings.clear();
    }
@ -1355,15 +1389,27 @@ struct Net::Impl
            allocateLayer(*i, layersShapes);

        //bind inputs
-        ld.inputBlobs.resize(ninputs);
-        ld.inputBlobsWrappers.resize(ninputs);
-        for (size_t i = 0; i < ninputs; i++)
+        if (ld.id == 0)  // DataLayer
+        {
+            ninputs = netInputLayer->inputsData.size();
+            ld.inputBlobsWrappers.resize(ninputs);
+            for (size_t i = 0; i < ninputs; i++)
+            {
+                ld.inputBlobsWrappers[i] = wrap(netInputLayer->inputsData[i]);
+            }
+        }
+        else
        {
-            LayerPin from = ld.inputBlobsId[i];
-            CV_Assert(from.valid());
-            CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid);
-            ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid];
-            ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid];
+            ld.inputBlobs.resize(ninputs);
+            ld.inputBlobsWrappers.resize(ninputs);
+            for (size_t i = 0; i < ninputs; i++)
+            {
+                LayerPin from = ld.inputBlobsId[i];
+                CV_Assert(from.valid());
+                CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid);
+                ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid];
+                ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid];
+            }
        }

        LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid);
@ -1731,15 +1777,14 @@ struct Net::Impl
        ShapesVec inputShapes;
        for(int i = 0; i < layers[0].outputBlobs.size(); i++)
        {
-            CV_Assert(layers[0].outputBlobs[i].total());
-            if (layers[0].outputBlobs[i].depth() == CV_32F &&
-                preferableBackend == DNN_BACKEND_OPENCV &&
+            Mat& inp = layers[0].outputBlobs[i];
+            CV_Assert(inp.total());
+            if (preferableBackend == DNN_BACKEND_OPENCV &&
                preferableTarget == DNN_TARGET_OPENCL_FP16)
            {
-                Mat mat = layers[0].outputBlobs[i].clone();
-                convertFp16(mat, layers[0].outputBlobs[i]);
+                layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S);
            }
-            inputShapes.push_back(shape(layers[0].outputBlobs[i]));
+            inputShapes.push_back(shape(inp));
        }
        LayersShapesMap layersShapes;
        getLayersShapes(inputShapes, layersShapes);
@ -2075,7 +2120,8 @@ Mat Net::forward(const String& outputName)
    if (layerName.empty())
        layerName = getLayerNames().back();

-    impl->setUpNet();
+    std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
+    impl->setUpNet(pins);
    impl->forwardToLayer(impl->getLayerData(layerName));

    return impl->getBlob(layerName);
@ -2085,13 +2131,13 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
 {
    CV_TRACE_FUNCTION();

-    impl->setUpNet();
-
    String layerName = outputName;

    if (layerName.empty())
        layerName = getLayerNames().back();

+    std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
+    impl->setUpNet(pins);
    impl->forwardToLayer(impl->getLayerData(layerName));

    LayerPin pin = impl->getPinByAlias(layerName);
@ -2270,28 +2316,22 @@ void Net::setInput(InputArray blob, const String& name)
        CV_Error(Error::StsObjectNotFound, "Requested blob \"" + name + "\" not found");

    LayerData &ld = impl->layers[pin.lid];
-    ld.outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) );
-    ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
-    MatShape prevShape = shape(ld.outputBlobs[pin.oid]);
-    Mat blob_;
-    if (impl->preferableBackend == DNN_BACKEND_OPENCV &&
-        impl->preferableTarget == DNN_TARGET_OPENCL_FP16)
-    {
-        Mat blob_mat = blob.getMat();
-        convertFp16(blob_mat, blob_);
-    }
-    else
-    {
-        blob_ = blob.getMat();
-    }
+    const int numInputs = std::max(pin.oid+1, (int)ld.requiredOutputs.size());
+    ld.outputBlobs.resize(numInputs);
+    ld.outputBlobsWrappers.resize(numInputs);
+    impl->netInputLayer->inputsData.resize(numInputs);
+
+    MatShape prevShape = shape(impl->netInputLayer->inputsData[pin.oid]);
+    Mat blob_ = blob.getMat();
    bool oldShape = prevShape == shape(blob_);
    if (oldShape)
    {
-        blob_.copyTo(ld.outputBlobs[pin.oid]);
+        blob_.copyTo(impl->netInputLayer->inputsData[pin.oid]);
    }
    else
    {
        ld.outputBlobs[pin.oid] = blob_.clone();
+        impl->netInputLayer->inputsData[pin.oid] = ld.outputBlobs[pin.oid];
    }

    if (!ld.outputBlobsWrappers[pin.oid].empty())
@ -2729,9 +2769,9 @@ void Layer::applyHalideScheduler(Ptr<BackendNode>& node, const std::vector<Mat*>
    }
    else if (targetId == DNN_TARGET_OPENCL)
    {
-        int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : outC;
        if (outW == 1 && outH == 1)
        {
+            int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : outC;
            top.split(c, co, ci, c_split)
               .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
               .gpu_blocks(tile)
@ -2741,6 +2781,8 @@ void Layer::applyHalideScheduler(Ptr<BackendNode>& node, const std::vector<Mat*>
        {
            int x_split = outW > 8 ? (outW >= 32 ? 16 : 8) : outW;
            int y_split = outH > 8 ? (outH >= 32 ? 16 : 8) : outH;
+            // Supported vectorization widths: 2, 3, 4, 8, 16
+            int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : std::min(4, outC);
            top.split(x, xo, xi, x_split).split(y, yo, yi, y_split)
               .split(c, co, ci, c_split)
               .gpu_blocks(xo, yo, co)
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@ -82,7 +82,21 @@ public:
    virtual bool supportBackend(int backendId) CV_OVERRIDE
    {
        if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
-            return preferableTarget != DNN_TARGET_MYRIAD || type != "Deconvolution" || adjustPad == Size();
+        {
+            if (type == "Convolution")
+                return preferableTarget != DNN_TARGET_MYRIAD || dilation.width == dilation.height;
+            else
+            {
+                CV_Assert(type == "Deconvolution");
+                const int outGroupCn = blobs[0].size[1];  // Weights are in IOHW layout
+                const int group = numOutput / outGroupCn;
+                if (group != 1)
+                    return false;
+                if (preferableTarget == DNN_TARGET_OPENCL || preferableTarget == DNN_TARGET_OPENCL_FP16)
+                    return dilation.width == 1 && dilation.height == 1;
+                return true;
+            }
+        }
        else
            return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE;
    }
@ -586,7 +600,7 @@ public:
            float* data_out0_ = output_->ptr<float>();
            size_t rowbufsz = (size_t)karea*BLK_SIZE_CN*BLK_SIZE;
            AutoBuffer<float> rowbuf0_(rowbufsz + valign);
-            float* rowbuf0 = alignPtr((float*)rowbuf0_, (int)(valign*sizeof(float)));
+            float* rowbuf0 = alignPtr(rowbuf0_.data(), (int)(valign*sizeof(float)));

            // we clear the buffer once; ultimately, it lets us to avoid
            // tail processing after running the unrolled/vectorized loop.
--- a/modules/dnn/src/layers/eltwise_layer.cpp
+++ b/modules/dnn/src/layers/eltwise_layer.cpp
@ -97,8 +97,8 @@ public:
    virtual bool supportBackend(int backendId) CV_OVERRIDE
    {
        return backendId == DNN_BACKEND_OPENCV ||
-               backendId == DNN_BACKEND_HALIDE && haveHalide() ||
-               backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine();
+               backendId == DNN_BACKEND_HALIDE ||
+               backendId == DNN_BACKEND_INFERENCE_ENGINE && (op != SUM || coeffs.empty());
    }

    bool getMemoryShapes(const std::vector<MatShape> &inputs,
--- a/modules/dnn/src/layers/fully_connected_layer.cpp
+++ b/modules/dnn/src/layers/fully_connected_layer.cpp
@ -182,7 +182,7 @@ public:
            size_t stripeEnd = r.end == nstripes ? total : std::min(r.end*stripeSize, total);
            size_t wstep = weights->step1();
            AutoBuffer<float> srcbuf(vecsize_aligned + valign);
-            float* sptr = alignPtr((float*)srcbuf, (int)(valign*sizeof(float)));
+            float* sptr = alignPtr(srcbuf.data(), (int)(valign*sizeof(float)));

            for( k = vecsize; k < vecsize_aligned; k++ )
                sptr[k] = 0.f;
--- a/modules/dnn/src/layers/lrn_layer.cpp
+++ b/modules/dnn/src/layers/lrn_layer.cpp
@ -211,7 +211,7 @@ public:
            int k, channels = channels_, ksize = ksize_;

            AutoBuffer<float> buf_((channels + ksize + 1)*2);
-            float* acc = (float*)buf_;
+            float* acc = buf_.data();
            float* buf = acc + channels + ksize + 1;
            for( k = 0; k <= ksize; k++ )
                buf[-k-1] = buf[channels + k] = 0.f;
--- a/modules/dnn/src/layers/reorg_layer.cpp
+++ b/modules/dnn/src/layers/reorg_layer.cpp
@ -41,9 +41,9 @@
 //M*/

 #include "../precomp.hpp"
+#include "../op_inf_engine.hpp"
 #include <opencv2/dnn/shape_utils.hpp>
 #include <opencv2/dnn/all_layers.hpp>
-#include <iostream>

 #ifdef HAVE_OPENCL
 #include "opencl_kernels_dnn.hpp"
@ -85,6 +85,11 @@ public:
        return false;
    }

+    virtual bool supportBackend(int backendId) CV_OVERRIDE
+    {
+        return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_INFERENCE_ENGINE;
+    }
+
 #ifdef HAVE_OPENCL
    bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
    {
@ -169,6 +174,20 @@ public:
        }
    }

+    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
+    {
+#ifdef HAVE_INF_ENGINE
+        InferenceEngine::LayerParams lp;
+        lp.name = name;
+        lp.type = "ReorgYolo";
+        lp.precision = InferenceEngine::Precision::FP32;
+        std::shared_ptr<InferenceEngine::CNNLayer> ieLayer(new InferenceEngine::CNNLayer(lp));
+        ieLayer->params["stride"] = format("%d", reorgStride);
+        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
+#endif  // HAVE_INF_ENGINE
+        return Ptr<BackendNode>();
+    }
+
    virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
                           const std::vector<MatShape> &outputs) const CV_OVERRIDE
    {
--- a/modules/dnn/src/layers/reshape_layer.cpp
+++ b/modules/dnn/src/layers/reshape_layer.cpp
@ -82,17 +82,26 @@ static void computeShapeByReshapeMask(const MatShape &srcShape,
        {
            if (matched)
            {
-                if (i == 0 || total(srcShape, i, srcRange.end) != maskTotal)
+                if (total(srcShape, i, srcRange.end) != maskTotal)
                {
                    srcRange.start = i + 1;
                    break;
                }
+                else if (i == 0)
+                {
+                    srcRange.start = 0;
+                    break;
+                }
            }
            else
            {
                matched = total(srcShape, i, srcRange.end) == maskTotal;
            }
        }
+        while (total(srcShape, srcRange.start, srcRange.end) != maskTotal && srcRange.start > 0)
+        {
+            srcRange.start -= 1;
+        }
        CV_Assert(total(srcShape, srcRange.start, srcRange.end) == maskTotal);
    }

--- a/modules/dnn/src/layers/resize_layer.cpp
+++ b/modules/dnn/src/layers/resize_layer.cpp
@ -192,6 +192,11 @@ public:
        return (outputs[0][2] == inputs[0][2]) && (outputs[0][3] == inputs[0][3]);
    }

+    virtual bool supportBackend(int backendId) CV_OVERRIDE
+    {
+        return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_INFERENCE_ENGINE;
+    }
+
    virtual void finalize(const std::vector<Mat*>& inputs, std::vector<Mat> &outputs) CV_OVERRIDE
    {
        if (!outWidth && !outHeight)
@ -204,6 +209,22 @@ public:
        scaleHeight = (outHeight > 1) ? (static_cast<float>(inpHeight - 1) / (outHeight - 1)) : 0.f;
        scaleWidth = (outWidth > 1) ? (static_cast<float>(inpWidth - 1) / (outWidth - 1)) : 0.f;
    }
+
+    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
+    {
+#ifdef HAVE_INF_ENGINE
+        InferenceEngine::LayerParams lp;
+        lp.name = name;
+        lp.type = "Interp";
+        lp.precision = InferenceEngine::Precision::FP32;
+
+        std::shared_ptr<InferenceEngine::CNNLayer> ieLayer(new InferenceEngine::CNNLayer(lp));
+        ieLayer->params["pad_beg"] = "0";
+        ieLayer->params["pad_end"] = "0";
+        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
+#endif  // HAVE_INF_ENGINE
+        return Ptr<BackendNode>();
+    }
 };

 Ptr<Layer> InterpLayer::create(const LayerParams& params)
--- a/modules/dnn/src/layers/slice_layer.cpp
+++ b/modules/dnn/src/layers/slice_layer.cpp
@ -266,7 +266,21 @@ public:
        std::shared_ptr<InferenceEngine::CropLayer> ieLayer(new InferenceEngine::CropLayer(lp));

        CV_Assert(sliceRanges.size() == 1);
-        for (int i = sliceRanges[0].size() - 1; i >= 0; --i)
+
+        int from, to, step;
+        if (preferableTarget == DNN_TARGET_MYRIAD)
+        {
+            from = 1;
+            to = sliceRanges[0].size() + 1;
+            step = 1;
+        }
+        else
+        {
+            from = sliceRanges[0].size() - 1;
+            to = -1;
+            step = -1;
+        }
+        for (int i = from; i != to; i += step)
        {
            ieLayer->axis.push_back(i);
            ieLayer->offset.push_back(sliceRanges[0][i].start);
--- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp
+++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp
@ -132,9 +132,10 @@ bool OCL4DNNPool<Dtype>::Forward(const UMat& bottom,
                width_,
                pooled_height_,
                pooled_width_,
-                ocl::KernelArg::PtrWriteOnly(top),
-                ocl::KernelArg::PtrWriteOnly(top_mask)
+                ocl::KernelArg::PtrWriteOnly(top)
            );
+            if (computeMaxIdx)
+                oclk_max_pool_forward.set(8, ocl::KernelArg::PtrWriteOnly(top_mask));  // TODO remove magic number. Extend cv::ocl::Kernel API

            ret = oclk_max_pool_forward.run(1, global, local, false);
        }
--- a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp
+++ b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp
@ -571,6 +571,50 @@ public:
    }
 };

+// In case of resizing by factor.
+class UpsamplingKerasSubgraph : public Subgraph
+{
+public:
+    UpsamplingKerasSubgraph()
+    {
+        int input = addNodeToMatch("");
+        int shape = addNodeToMatch("Shape", input);
+        int stack = addNodeToMatch("Const");
+        int stack_1 = addNodeToMatch("Const");
+        int stack_2 = addNodeToMatch("Const");
+        int strided_slice = addNodeToMatch("StridedSlice", shape, stack, stack_1, stack_2);
+        int factors = addNodeToMatch("Const");
+        int mul = addNodeToMatch("Mul", strided_slice, factors);
+        addNodeToMatch("ResizeNearestNeighbor", input, mul);
+        setFusedNode("ResizeNearestNeighbor", input, factors);
+    }
+
+    virtual void finalize(tensorflow::GraphDef& net, tensorflow::NodeDef* fusedNode,
+                          std::vector<tensorflow::NodeDef*>& inputNodes) CV_OVERRIDE
+    {
+        Mat factorsMat = getTensorContent(inputNodes[1]->attr().at("value").tensor());
+        CV_Assert(factorsMat.total() == 2, factorsMat.type() == CV_32SC1);
+
+        // Height scale factor
+        tensorflow::TensorProto* factorY = inputNodes[1]->mutable_attr()->at("value").mutable_tensor();
+        factorY->clear_int_val();
+        factorY->clear_tensor_content();
+        factorY->add_int_val(factorsMat.at<int>(0, 0));
+
+        // Width scale factor.
+        tensorflow::NodeDef* factorXNode = net.add_node();
+        factorXNode->set_op("Const");
+        factorXNode->set_name(fusedNode->name() + "/factor_y");
+
+        tensorflow::AttrValue factorX;
+        factorX.mutable_tensor()->set_dtype(tensorflow::DT_INT32);
+        factorX.mutable_tensor()->add_int_val(factorsMat.at<int>(0, 1));
+        factorXNode->mutable_attr()->insert(MapPair<std::string, tensorflow::AttrValue>("value", factorX));
+
+        fusedNode->add_input(factorXNode->name());
+    }
+};
+
 void simplifySubgraphs(tensorflow::GraphDef& net)
 {
    std::vector<Ptr<Subgraph> > subgraphs;
@ -585,6 +629,7 @@ void simplifySubgraphs(tensorflow::GraphDef& net)
    subgraphs.push_back(Ptr<Subgraph>(new DeconvolutionValidKerasSubgraph()));
    subgraphs.push_back(Ptr<Subgraph>(new DeconvolutionSameKerasSubgraph()));
    subgraphs.push_back(Ptr<Subgraph>(new ResizeBilinearSubgraph()));
+    subgraphs.push_back(Ptr<Subgraph>(new UpsamplingKerasSubgraph()));

    int numNodes = net.node_size();
    std::vector<int> matchedNodesIds;
--- a/modules/dnn/src/tensorflow/tf_importer.cpp
+++ b/modules/dnn/src/tensorflow/tf_importer.cpp
@ -262,6 +262,18 @@ static int getDataLayout(const tensorflow::NodeDef& layer)
    return DATA_LAYOUT_UNKNOWN;
 }

+static inline std::string getNodeName(const std::string& tensorName)
+{
+    return tensorName.substr(0, tensorName.rfind(':'));
+}
+
+static inline int getDataLayout(const std::string& layerName,
+                                const std::map<String, int>& data_layouts)
+{
+    std::map<String, int>::const_iterator it = data_layouts.find(getNodeName(layerName));
+    return it != data_layouts.end() ? it->second : DATA_LAYOUT_UNKNOWN;
+}
+
 void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer)
 {
    if (hasLayerAttr(layer, "strides"))
@ -604,11 +616,6 @@ static void addConstNodes(tensorflow::GraphDef& net, std::map<String, int>& cons
    }
 }

-static inline std::string getNodeName(const std::string& tensorName)
-{
-    return tensorName.substr(0, tensorName.rfind(':'));
-}
-
 // If all inputs of specific layer have the same data layout we can say that
 // this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise.
 static int predictOutputDataLayout(const tensorflow::GraphDef& net,
@ -830,7 +837,8 @@ void TFImporter::populateNet(Net dstNet)
            // one input only
            connect(layer_id, dstNet, parsePin(input), id, 0);

-            if (data_layouts[name] == DATA_LAYOUT_UNKNOWN)
+
+            if (getDataLayout(name, data_layouts) == DATA_LAYOUT_UNKNOWN)
                data_layouts[name] = DATA_LAYOUT_NHWC;
        }
        else if (type == "BiasAdd" || type == "Add")
@ -956,7 +964,8 @@ void TFImporter::populateNet(Net dstNet)
            Pin inpId = parsePin(layer.input(0));
            Mat newShape = getTensorContent(getConstBlob(layer, value_id, 1));

-            if (newShape.total() != 4 && data_layouts[layer.input(0)] == DATA_LAYOUT_NHWC)
+            int inpLayout = getDataLayout(layer.input(0), data_layouts);
+            if (newShape.total() != 4 && inpLayout == DATA_LAYOUT_NHWC)
            {
                LayerParams permLP;
                int order[] = {0, 2, 3, 1};  // From OpenCV's NCHW to NHWC.
@ -969,7 +978,7 @@ void TFImporter::populateNet(Net dstNet)
                connect(layer_id, dstNet, inpId, permId, 0);
                inpId = Pin(permName);
            }
-            else if (newShape.total() == 4 && data_layouts[layer.input(0)] == DATA_LAYOUT_NHWC)
+            else if (newShape.total() == 4 && inpLayout == DATA_LAYOUT_NHWC)
            {
                // NHWC->NCHW
                std::swap(*newShape.ptr<int32_t>(0, 2), *newShape.ptr<int32_t>(0, 3));
@ -987,7 +996,7 @@ void TFImporter::populateNet(Net dstNet)
        else if (type == "Flatten" || type == "Squeeze")
        {
            Pin inpId = parsePin(layer.input(0));
-            int inpLayout = data_layouts[layer.input(0)];
+            int inpLayout = getDataLayout(layer.input(0), data_layouts);
            if (type == "Squeeze")
            {
                CV_Assert(hasLayerAttr(layer, "squeeze_dims"));
@ -1032,7 +1041,8 @@ void TFImporter::populateNet(Net dstNet)
            {
                // Only NHWC <-> NCHW permutations are allowed. OpenCV is always
                // keep NCHW layout this way.
-                if (data_layouts[layer.input(0)] == DATA_LAYOUT_NHWC)
+                int inpLayout = getDataLayout(layer.input(0), data_layouts);
+                if (inpLayout == DATA_LAYOUT_NHWC)
                {
                    if (permData[0] == 0 && permData[1] == 3 && permData[2] == 1 && permData[3] == 2)
                    {
@ -1049,7 +1059,7 @@ void TFImporter::populateNet(Net dstNet)
                    else
                        CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");
                }
-                else if (data_layouts[layer.input(0)] == DATA_LAYOUT_NCHW)
+                else if (inpLayout == DATA_LAYOUT_NCHW)
                {
                    if (permData[0] == 0 && permData[1] == 2 && permData[2] == 3 && permData[3] == 1)
                    {
@ -1112,7 +1122,7 @@ void TFImporter::populateNet(Net dstNet)
            int axisId = (type == "Concat" ? 0 : layer.input_size() - 1);
            int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0);

-            if (data_layouts[name] == DATA_LAYOUT_NHWC)
+            if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
                axis = toNCHW(axis);
            layerParams.set("axis", axis);

@ -1197,7 +1207,7 @@ void TFImporter::populateNet(Net dstNet)
            CV_Assert(!begins.empty(), !sizes.empty(), begins.type() == CV_32SC1,
                      sizes.type() == CV_32SC1);

-            if (begins.total() == 4 && data_layouts[name] == DATA_LAYOUT_NHWC)
+            if (begins.total() == 4 && getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
            {
                // Swap NHWC parameters' order to NCHW.
                std::swap(*begins.ptr<int32_t>(0, 2), *begins.ptr<int32_t>(0, 3));
@ -1597,7 +1607,7 @@ void TFImporter::populateNet(Net dstNet)
            CV_Assert(reductionIndices.type() == CV_32SC1);

            const int numAxes = reductionIndices.total();
-            if (data_layouts[name] == DATA_LAYOUT_NHWC)
+            if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
                for (int i = 0; i < numAxes; ++i)
                    reductionIndices.at<int>(i) = toNCHW(reductionIndices.at<int>(i));

--- a/modules/dnn/src/torch/torch_importer.cpp
+++ b/modules/dnn/src/torch/torch_importer.cpp
@ -370,8 +370,8 @@ struct TorchImporter
        int ndims = readInt();
        AutoBuffer<int64, 4> sizes(ndims);
        AutoBuffer<int64, 4> steps(ndims);
-        THFile_readLongRaw(file, sizes, ndims);
-        THFile_readLongRaw(file, steps, ndims);
+        THFile_readLongRaw(file, sizes.data(), ndims);
+        THFile_readLongRaw(file, steps.data(), ndims);
        long offset = readLong() - 1;

        //read Storage
@ -411,7 +411,7 @@ struct TorchImporter
        }

        //allocate Blob
-        Mat srcMat(ndims, (int*)isizes, typeTensor , storages[indexStorage].ptr() + offset*CV_ELEM_SIZE(typeTensor), (size_t*)ssteps);
+        Mat srcMat(ndims, isizes.data(), typeTensor , storages[indexStorage].ptr() + offset*CV_ELEM_SIZE(typeTensor), ssteps.data());
        int dstType = CV_32F;

        Mat blob;
@ -592,8 +592,8 @@ struct TorchImporter
                DictValue dimParam = scalarParams.get("size");
                layerParams.set("dim", dimParam);

-                if (scalarParams.has("batchMode") && scalarParams.get<bool>("batchMode"))
-                    layerParams.set("axis", 1);
+                int axis = (int)scalarParams.get<bool>("batchMode", true);
+                layerParams.set("axis", axis);

                curModule->modules.push_back(newModule);
            }
--- a/modules/dnn/test/test_backends.cpp
+++ b/modules/dnn/test/test_backends.cpp
@ -10,18 +10,9 @@

 namespace opencv_test { namespace {

-class DNNTestNetwork : public TestWithParam <tuple<DNNBackend, DNNTarget> >
+class DNNTestNetwork : public DNNTestLayer
 {
 public:
-    dnn::Backend backend;
-    dnn::Target target;
-
-    DNNTestNetwork()
-    {
-        backend = (dnn::Backend)(int)get<0>(GetParam());
-        target = (dnn::Target)(int)get<1>(GetParam());
-    }
-
    void processNet(const std::string& weights, const std::string& proto,
                    Size inpSize, const std::string& outputLayer = "",
                    const std::string& halideScheduler = "",
@ -40,32 +31,10 @@ public:
                    std::string halideScheduler = "",
                    double l1 = 0.0, double lInf = 0.0, double detectionConfThresh = 0.2)
    {
-        if (backend == DNN_BACKEND_OPENCV && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
-        {
-#ifdef HAVE_OPENCL
-            if (!cv::ocl::useOpenCL())
-#endif
-            {
-                throw SkipTestException("OpenCL is not available/disabled in OpenCV");
-            }
-        }
-        if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
-        {
-            if (!checkMyriadTarget())
-            {
-                throw SkipTestException("Myriad is not available/disabled in OpenCV");
-            }
-        }
-        if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD)
-        {
-            l1 = l1 == 0.0 ? 4e-3 : l1;
-            lInf = lInf == 0.0 ? 2e-2 : lInf;
-        }
-        else
-        {
-            l1 = l1 == 0.0 ? 1e-5 : l1;
-            lInf = lInf == 0.0 ? 1e-4 : lInf;
-        }
+        checkBackend();
+        l1 = l1 ? l1 : default_l1;
+        lInf = lInf ? lInf : default_lInf;
+
        weights = findDataFile(weights, false);
        if (!proto.empty())
            proto = findDataFile(proto, false);
--- a/modules/dnn/test/test_darknet_importer.cpp
+++ b/modules/dnn/test/test_darknet_importer.cpp
@ -65,76 +65,84 @@ TEST(Test_Darknet, read_yolo_voc)
    ASSERT_FALSE(net.empty());
 }

-// Test object detection network from Darknet framework.
-static void testDarknetModel(const std::string& cfg, const std::string& weights,
-                             const std::vector<cv::String>& outNames,
-                             const std::vector<int>& refClassIds,
-                             const std::vector<float>& refConfidences,
-                             const std::vector<Rect2d>& refBoxes,
-                             int backendId, int targetId, float scoreDiff = 0.0,
-                             float iouDiff = 0.0, float confThreshold = 0.24)
+class Test_Darknet_layers : public DNNTestLayer
 {
-    if (backendId == DNN_BACKEND_OPENCV && targetId == DNN_TARGET_OPENCL)
+public:
+    void testDarknetLayer(const std::string& name, bool hasWeights = false)
    {
-  #ifdef HAVE_OPENCL
-        if (!cv::ocl::useOpenCL())
-  #endif
-        {
-            throw SkipTestException("OpenCL is not available/disabled in OpenCV");
-        }
-    }
-    if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_MYRIAD)
-    {
-        if (!checkMyriadTarget())
-        {
-            throw SkipTestException("Myriad is not available/disabled in OpenCV");
-        }
+        std::string cfg = findDataFile("dnn/darknet/" + name + ".cfg", false);
+        std::string model = "";
+        if (hasWeights)
+            model = findDataFile("dnn/darknet/" + name + ".weights", false);
+        Mat inp = blobFromNPY(findDataFile("dnn/darknet/" + name + "_in.npy", false));
+        Mat ref = blobFromNPY(findDataFile("dnn/darknet/" + name + "_out.npy", false));
+
+        checkBackend(&inp, &ref);
+
+        Net net = readNet(cfg, model);
+        net.setPreferableBackend(backend);
+        net.setPreferableTarget(target);
+        net.setInput(inp);
+        Mat out = net.forward();
+        normAssert(out, ref, "", default_l1, default_lInf);
    }
-    Mat sample = imread(_tf("dog416.png"));
-    Mat inp = blobFromImage(sample, 1.0/255, Size(416, 416), Scalar(), true, false);
-
-    Net net = readNet(findDataFile("dnn/" + cfg, false),
-                      findDataFile("dnn/" + weights, false));
-    net.setPreferableBackend(backendId);
-    net.setPreferableTarget(targetId);
-    net.setInput(inp);
-    std::vector<Mat> outs;
-    net.forward(outs, outNames);
-
-    std::vector<int> classIds;
-    std::vector<float> confidences;
-    std::vector<Rect2d> boxes;
-    for (int i = 0; i < outs.size(); ++i)
+};
+
+class Test_Darknet_nets : public DNNTestLayer
+{
+public:
+    // Test object detection network from Darknet framework.
+    void testDarknetModel(const std::string& cfg, const std::string& weights,
+                          const std::vector<cv::String>& outNames,
+                          const std::vector<int>& refClassIds,
+                          const std::vector<float>& refConfidences,
+                          const std::vector<Rect2d>& refBoxes,
+                          double scoreDiff, double iouDiff, float confThreshold = 0.24)
    {
-        Mat& out = outs[i];
-        for (int j = 0; j < out.rows; ++j)
+        checkBackend();
+
+        Mat sample = imread(_tf("dog416.png"));
+        Mat inp = blobFromImage(sample, 1.0/255, Size(416, 416), Scalar(), true, false);
+
+        Net net = readNet(findDataFile("dnn/" + cfg, false),
+                          findDataFile("dnn/" + weights, false));
+        net.setPreferableBackend(backend);
+        net.setPreferableTarget(target);
+        net.setInput(inp);
+        std::vector<Mat> outs;
+        net.forward(outs, outNames);
+
+        std::vector<int> classIds;
+        std::vector<float> confidences;
+        std::vector<Rect2d> boxes;
+        for (int i = 0; i < outs.size(); ++i)
        {
-            Mat scores = out.row(j).colRange(5, out.cols);
-            double confidence;
-            Point maxLoc;
-            minMaxLoc(scores, 0, &confidence, 0, &maxLoc);
-
-            float* detection = out.ptr<float>(j);
-            double centerX = detection[0];
-            double centerY = detection[1];
-            double width = detection[2];
-            double height = detection[3];
-            boxes.push_back(Rect2d(centerX - 0.5 * width, centerY - 0.5 * height,
-                                   width, height));
-            confidences.push_back(confidence);
-            classIds.push_back(maxLoc.x);
+            Mat& out = outs[i];
+            for (int j = 0; j < out.rows; ++j)
+            {
+                Mat scores = out.row(j).colRange(5, out.cols);
+                double confidence;
+                Point maxLoc;
+                minMaxLoc(scores, 0, &confidence, 0, &maxLoc);
+
+                float* detection = out.ptr<float>(j);
+                double centerX = detection[0];
+                double centerY = detection[1];
+                double width = detection[2];
+                double height = detection[3];
+                boxes.push_back(Rect2d(centerX - 0.5 * width, centerY - 0.5 * height,
+                                       width, height));
+                confidences.push_back(confidence);
+                classIds.push_back(maxLoc.x);
+            }
        }
+        normAssertDetections(refClassIds, refConfidences, refBoxes, classIds,
+                             confidences, boxes, "", confThreshold, scoreDiff, iouDiff);
    }
-    normAssertDetections(refClassIds, refConfidences, refBoxes, classIds,
-                         confidences, boxes, "", confThreshold, scoreDiff, iouDiff);
-}
-
-typedef testing::TestWithParam<tuple<DNNBackend, DNNTarget> > Test_Darknet_nets;
+};

 TEST_P(Test_Darknet_nets, YoloVoc)
 {
-    int backendId = get<0>(GetParam());
-    int targetId = get<1>(GetParam());
    std::vector<cv::String> outNames(1, "detection_out");

    std::vector<int> classIds(3);
@ -143,34 +151,28 @@ TEST_P(Test_Darknet_nets, YoloVoc)
    classIds[0] = 6;  confidences[0] = 0.750469f; boxes[0] = Rect2d(0.577374, 0.127391, 0.325575, 0.173418);  // a car
    classIds[1] = 1;  confidences[1] = 0.780879f; boxes[1] = Rect2d(0.270762, 0.264102, 0.461713, 0.48131); // a bicycle
    classIds[2] = 11; confidences[2] = 0.901615f; boxes[2] = Rect2d(0.1386, 0.338509, 0.282737, 0.60028);  // a dog
-    double scoreDiff = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1e-2 : 8e-5;
-    double iouDiff = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.013 : 3e-5;
+    double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 1e-2 : 8e-5;
+    double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.013 : 3e-5;
    testDarknetModel("yolo-voc.cfg", "yolo-voc.weights", outNames,
-                     classIds, confidences, boxes, backendId, targetId, scoreDiff, iouDiff);
+                     classIds, confidences, boxes, scoreDiff, iouDiff);
 }

 TEST_P(Test_Darknet_nets, TinyYoloVoc)
 {
-    int backendId = get<0>(GetParam());
-    int targetId = get<1>(GetParam());
    std::vector<cv::String> outNames(1, "detection_out");
    std::vector<int> classIds(2);
    std::vector<float> confidences(2);
    std::vector<Rect2d> boxes(2);
    classIds[0] = 6;  confidences[0] = 0.761967f; boxes[0] = Rect2d(0.579042, 0.159161, 0.31544, 0.160779);  // a car
    classIds[1] = 11; confidences[1] = 0.780595f; boxes[1] = Rect2d(0.129696, 0.386467, 0.315579, 0.534527);  // a dog
-    double scoreDiff = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 8e-3 : 8e-5;
-    double iouDiff = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 8e-3 : 3e-5;
+    double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 8e-3 : 8e-5;
+    double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 8e-3 : 3e-5;
    testDarknetModel("tiny-yolo-voc.cfg", "tiny-yolo-voc.weights", outNames,
-                     classIds, confidences, boxes, backendId, targetId, scoreDiff, iouDiff);
+                     classIds, confidences, boxes, scoreDiff, iouDiff);
 }

 TEST_P(Test_Darknet_nets, YOLOv3)
 {
-    int backendId = get<0>(GetParam());
-    int targetId = get<1>(GetParam());
-    if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_MYRIAD)
-        throw SkipTestException("");
    std::vector<cv::String> outNames(3);
    outNames[0] = "yolo_82";
    outNames[1] = "yolo_94";
@ -182,55 +184,41 @@ TEST_P(Test_Darknet_nets, YOLOv3)
    classIds[0] = 7;  confidences[0] = 0.952983f; boxes[0] = Rect2d(0.614622, 0.150257, 0.286747, 0.138994);  // a truck
    classIds[1] = 1; confidences[1] = 0.987908f; boxes[1] = Rect2d(0.150913, 0.221933, 0.591342, 0.524327);  // a bicycle
    classIds[2] = 16; confidences[2] = 0.998836f; boxes[2] = Rect2d(0.160024, 0.389964, 0.257861, 0.553752);  // a dog (COCO)
-    double scoreDiff = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 4e-3 : 8e-5;
-    double iouDiff = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.011 : 3e-5;
+    double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 4e-3 : 8e-5;
+    double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.011 : 3e-5;
    testDarknetModel("yolov3.cfg", "yolov3.weights", outNames,
-                     classIds, confidences, boxes, backendId, targetId, scoreDiff, iouDiff);
+                     classIds, confidences, boxes, scoreDiff, iouDiff);
 }

-const tuple<DNNBackend, DNNTarget> testCases[] = {
-#ifdef HAVE_INF_ENGINE
-    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU),
-    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL),
-    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
-    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD),
-#endif
-    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_CPU),
-    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL),
-    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)
-};
+INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_nets, dnnBackendsAndTargets());

-INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_nets, testing::ValuesIn(testCases));
+TEST_P(Test_Darknet_layers, shortcut)
+{
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_CPU)
+        throw SkipTestException("");
+    testDarknetLayer("shortcut");
+}

-static void testDarknetLayer(const std::string& name, bool hasWeights = false)
+TEST_P(Test_Darknet_layers, upsample)
 {
-    std::string cfg = findDataFile("dnn/darknet/" + name + ".cfg", false);
-    std::string model = "";
-    if (hasWeights)
-        model = findDataFile("dnn/darknet/" + name + ".weights", false);
-    Mat inp = blobFromNPY(findDataFile("dnn/darknet/" + name + "_in.npy", false));
-    Mat ref = blobFromNPY(findDataFile("dnn/darknet/" + name + "_out.npy", false));
-
-    Net net = readNet(cfg, model);
-    net.setPreferableBackend(DNN_BACKEND_OPENCV);
-    net.setInput(inp);
-    Mat out = net.forward();
-    normAssert(out, ref);
+    testDarknetLayer("upsample");
 }

-TEST(Test_Darknet, shortcut)
+TEST_P(Test_Darknet_layers, avgpool_softmax)
 {
-    testDarknetLayer("shortcut");
+    testDarknetLayer("avgpool_softmax");
 }

-TEST(Test_Darknet, upsample)
+TEST_P(Test_Darknet_layers, region)
 {
-    testDarknetLayer("upsample");
+    testDarknetLayer("region");
 }

-TEST(Test_Darknet, avgpool_softmax)
+TEST_P(Test_Darknet_layers, reorg)
 {
-    testDarknetLayer("avgpool_softmax");
+    testDarknetLayer("reorg");
 }

+INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_layers, dnnBackendsAndTargets());
+
 }} // namespace
--- a/modules/dnn/test/test_halide_layers.cpp
+++ b/modules/dnn/test/test_halide_layers.cpp
@ -12,32 +12,60 @@

 namespace opencv_test { namespace {

-#ifdef HAVE_HALIDE
 using namespace cv;
 using namespace cv::dnn;
 using namespace testing;

-static void test(LayerParams& params, Mat& input)
+static void test(Mat& input, Net& net, int backendId, int targetId)
 {
+    DNNTestLayer::checkBackend(backendId, targetId);
    randu(input, -1.0f, 1.0f);

-    Net net;
-    int lid = net.addLayer(params.name, params.type, params);
-    net.connect(0, 0, lid, 0);
-
    net.setInput(input);
    net.setPreferableBackend(DNN_BACKEND_OPENCV);
-    Mat outputDefault = net.forward(params.name).clone();
+    Mat outputDefault = net.forward().clone();

-    net.setPreferableBackend(DNN_BACKEND_HALIDE);
-    Mat outputHalide = net.forward(params.name).clone();
-    normAssert(outputDefault, outputHalide);
+    net.setPreferableBackend(backendId);
+    net.setPreferableTarget(targetId);
+    Mat outputHalide = net.forward().clone();
+
+    double l1, lInf;
+    DNNTestLayer::getDefaultThresholds(backendId, targetId, &l1, &lInf);
+    normAssert(outputDefault, outputHalide, "", l1, lInf);
+}
+
+static void test(LayerParams& params, Mat& input, int backendId, int targetId)
+{
+    Net net;
+    net.addLayerToPrev(params.name, params.type, params);
+    test(input, net, backendId, targetId);
+}
+
+static testing::internal::ParamGenerator<tuple<DNNBackend, DNNTarget> > dnnBackendsAndTargetsWithHalide()
+{
+    static const tuple<DNNBackend, DNNTarget> testCases[] = {
+#ifdef HAVE_HALIDE
+        tuple<DNNBackend, DNNTarget>(DNN_BACKEND_HALIDE, DNN_TARGET_CPU),
+        tuple<DNNBackend, DNNTarget>(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL),
+#endif
+#ifdef HAVE_INF_ENGINE
+        tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU),
+        tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL),
+        tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
+        tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD),
+#endif
+        tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL),
+        tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)
+    };
+    return testing::ValuesIn(testCases);
 }

+class Test_Halide_layers : public DNNTestLayer {};
+
 ////////////////////////////////////////////////////////////////////////////////
 // Padding
 ////////////////////////////////////////////////////////////////////////////////
-TEST(Padding_Halide, Accuracy)
+TEST_P(Test_Halide_layers, Padding)
 {
    static const int kNumRuns = 10;
    std::vector<int> paddings(8);
@ -52,15 +80,16 @@ TEST(Padding_Halide, Accuracy)
        lp.type = "Padding";
        lp.name = "testLayer";

-        Mat input({1 + rng(10), 1 + rng(10), 1 + rng(10), 1 + rng(10)}, CV_32F);
-        test(lp, input);
+        int sz[] = {1 + (int)rng(10), 1 + (int)rng(10), 1 + (int)rng(10), 1 + (int)rng(10)};
+        Mat input(4, &sz[0], CV_32F);
+        test(lp, input, backend, target);
    }
 }

 ////////////////////////////////////////////////////////////////////////////////
 // Convolution
 ////////////////////////////////////////////////////////////////////////////////
-typedef TestWithParam<tuple<Vec3i, Size, Size, Size, Size, Size, bool> > Convolution;
+typedef TestWithParam<tuple<Vec3i, Size, Size, Size, Size, Size, bool, tuple<DNNBackend, DNNTarget> > > Convolution;
 TEST_P(Convolution, Accuracy)
 {
    int inChannels = get<0>(GetParam())[0];
@ -72,8 +101,15 @@ TEST_P(Convolution, Accuracy)
    Size pad = get<4>(GetParam());
    Size dilation = get<5>(GetParam());
    bool hasBias = get<6>(GetParam());
+    int backendId = get<0>(get<7>(GetParam()));
+    int targetId = get<1>(get<7>(GetParam()));
+
+    if ((backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_MYRIAD) ||
+        (backendId == DNN_BACKEND_OPENCV && targetId == DNN_TARGET_OPENCL_FP16))
+        throw SkipTestException("");

-    Mat weights({outChannels, inChannels / group, kernel.height, kernel.width}, CV_32F);
+    int sz[] = {outChannels, inChannels / group, kernel.height, kernel.width};
+    Mat weights(4, &sz[0], CV_32F);
    randu(weights, -1.0f, 1.0f);

    LayerParams lp;
@ -93,12 +129,13 @@ TEST_P(Convolution, Accuracy)
    lp.blobs.push_back(weights);
    if (hasBias)
    {
-        Mat bias({outChannels}, CV_32F);
+        Mat bias(1, outChannels, CV_32F);
        randu(bias, -1.0f, 1.0f);
        lp.blobs.push_back(bias);
    }
-    Mat input({1, inChannels, inSize.height, inSize.width}, CV_32F);
-    test(lp, input);
+    int inpSz[] = {1, inChannels, inSize.height, inSize.width};
+    Mat input(4, &inpSz[0], CV_32F);
+    test(lp, input, backendId, targetId);
 }

 INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Convolution, Combine(
@ -110,13 +147,14 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Convolution, Combine(
 /*stride*/   Values(Size(1, 1), Size(2, 2)),
 /*pad*/      Values(Size(1, 0), Size(0, 1)),
 /*dilation*/ Values(Size(1, 1), Size(2, 2)),
-/*has bias*/ Bool()
+/*has bias*/ Bool(),
+             dnnBackendsAndTargetsWithHalide()
 ));

 ////////////////////////////////////////////////////////////////////////////////
 // Deconvolution
 ////////////////////////////////////////////////////////////////////////////////
-typedef TestWithParam<tuple<Vec3i, Size, Size, Size, Size, Vec4i, bool> > Deconvolution;
+typedef TestWithParam<tuple<Vec3i, Size, Size, Size, Size, Vec4i, bool, tuple<DNNBackend, DNNTarget> > > Deconvolution;
 TEST_P(Deconvolution, Accuracy)
 {
    int inChannels = get<0>(GetParam())[0];
@ -129,8 +167,14 @@ TEST_P(Deconvolution, Accuracy)
    Size stride = Size(get<5>(GetParam())[0], get<5>(GetParam())[1]);
    Size adjPad = Size(get<5>(GetParam())[2], get<5>(GetParam())[3]);
    bool hasBias = get<6>(GetParam());
-
-    Mat weights({inChannels, outChannels / group, kernel.height, kernel.width}, CV_32F);
+    int backendId = get<0>(get<7>(GetParam()));
+    int targetId = get<1>(get<7>(GetParam()));
+    if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_CPU &&
+        dilation.width == 2 && dilation.height == 2)
+        throw SkipTestException("");
+
+    int sz[] = {inChannels, outChannels / group, kernel.height, kernel.width};
+    Mat weights(4, &sz[0], CV_32F);
    randu(weights, -1.0f, 1.0f);

    LayerParams lp;
@ -152,12 +196,13 @@ TEST_P(Deconvolution, Accuracy)
    lp.blobs.push_back(weights);
    if (hasBias)
    {
-        Mat bias({outChannels}, CV_32F);
+        Mat bias(1, outChannels, CV_32F);
        randu(bias, -1.0f, 1.0f);
        lp.blobs.push_back(bias);
    }
-    Mat input({1, inChannels, inSize.height, inSize.width}, CV_32F);
-    test(lp, input);
+    int inpSz[] = {1, inChannels, inSize.height, inSize.width};
+    Mat input(4, &inpSz[0], CV_32F);
+    test(lp, input, backendId, targetId);
 }

 INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Deconvolution, Combine(
@ -168,13 +213,14 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Deconvolution, Combine(
 /*pad*/      Values(Size(1, 0), Size(0, 1)),
 /*dilation*/ Values(Size(1, 1), Size(2, 2)),
 /*stride, adj. pad*/ Values(Vec4i(1,1, 0,0), Vec4i(2,2, 1,0), Vec4i(1,2, 0,1)),
-/*has bias*/ Bool()
+/*has bias*/ Bool(),
+             dnnBackendsAndTargetsWithHalide()
 ));

 ////////////////////////////////////////////////////////////////////////////////
 // LRN
 ////////////////////////////////////////////////////////////////////////////////
-typedef TestWithParam<tuple<Vec3i, int, Vec3f, bool, std::string> > LRN;
+typedef TestWithParam<tuple<Vec3i, int, Vec3f, bool, std::string, tuple<DNNBackend, DNNTarget> > > LRN;
 TEST_P(LRN, Accuracy)
 {
    int inChannels = get<0>(GetParam())[0];
@ -185,6 +231,10 @@ TEST_P(LRN, Accuracy)
    float bias = get<2>(GetParam())[2];
    bool normBySize = get<3>(GetParam());
    std::string nrmType = get<4>(GetParam());
+    int backendId = get<0>(get<5>(GetParam()));
+    int targetId = get<1>(get<5>(GetParam()));
+    if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
+        throw SkipTestException("");

    LayerParams lp;
    lp.set("norm_region", nrmType);
@ -196,8 +246,9 @@ TEST_P(LRN, Accuracy)
    lp.type = "LRN";
    lp.name = "testLayer";

-    Mat input({1, inChannels, inSize.height, inSize.width}, CV_32F);
-    test(lp, input);
+    int sz[] = {1, inChannels, inSize.height, inSize.width};
+    Mat input(4, &sz[0], CV_32F);
+    test(lp, input, backendId, targetId);
 }

 INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, LRN, Combine(
@ -207,19 +258,24 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, LRN, Combine(
 /*alpha, beta,*/        Vec3f(1.0f, 0.9f, 1.1f), Vec3f(1.0f, 1.1f, 0.9f),
 /*bias */               Vec3f(1.1f, 0.9f, 1.0f), Vec3f(1.1f, 1.0f, 0.9f)),
 /*norm_by_size*/ Bool(),
-/*norm_type*/    Values("ACROSS_CHANNELS", "WITHIN_CHANNEL")
+/*norm_type*/    Values("ACROSS_CHANNELS", "WITHIN_CHANNEL"),
+                 dnnBackendsAndTargetsWithHalide()
 ));

 ////////////////////////////////////////////////////////////////////////////////
 // Average pooling
 ////////////////////////////////////////////////////////////////////////////////
-typedef TestWithParam<tuple<int, Size, Size, Size> > AvePooling;
+typedef TestWithParam<tuple<int, Size, Size, Size, tuple<DNNBackend, DNNTarget> > > AvePooling;
 TEST_P(AvePooling, Accuracy)
 {
    int inChannels = get<0>(GetParam());
    Size outSize = get<1>(GetParam());;  // Input size will be computed from parameters.
    Size kernel = get<2>(GetParam());
    Size stride = get<3>(GetParam());
+    int backendId = get<0>(get<4>(GetParam()));
+    int targetId = get<1>(get<4>(GetParam()));
+    if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_MYRIAD)
+        throw SkipTestException("");

    const int inWidth = (outSize.width - 1) * stride.width + kernel.width;
    const int inHeight = (outSize.height - 1) * stride.height + kernel.height;
@ -233,21 +289,23 @@ TEST_P(AvePooling, Accuracy)
    lp.type = "Pooling";
    lp.name = "testLayer";

-    Mat input({1, inChannels, inHeight, inWidth}, CV_32F);
-    test(lp, input);
+    int sz[] = {1, inChannels, inHeight, inWidth};
+    Mat input(4, &sz[0], CV_32F);
+    test(lp, input, backendId, targetId);
 }

 INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, AvePooling, Combine(
 /*in channels*/ Values(3, 4),
 /*out size*/    Values(Size(1, 1), Size(2, 2), Size(3, 2), Size(4, 7)),
 /*kernel*/      Values(Size(1, 1), Size(2, 2), Size(3, 3), Size(3, 2)),
-/*stride*/      Values(Size(1, 1), Size(2, 2), Size(3, 2))
+/*stride*/      Values(Size(1, 1), Size(2, 2), Size(3, 2)),
+                dnnBackendsAndTargetsWithHalide()
 ));

 ////////////////////////////////////////////////////////////////////////////////
 // Maximum pooling
 ////////////////////////////////////////////////////////////////////////////////
-typedef TestWithParam<tuple<int, Size, Size, Size, Size> > MaxPooling;
+typedef TestWithParam<tuple<int, Size, Size, Size, Size, tuple<DNNBackend, DNNTarget> > > MaxPooling;
 TEST_P(MaxPooling, Accuracy)
 {
    int inChannels = get<0>(GetParam());
@ -255,6 +313,8 @@ TEST_P(MaxPooling, Accuracy)
    Size kernel = get<2>(GetParam());
    Size stride = get<3>(GetParam());
    Size pad = get<4>(GetParam());
+    int backendId = get<0>(get<5>(GetParam()));
+    int targetId = get<1>(get<5>(GetParam()));

    LayerParams lp;
    lp.set("pool", "max");
@ -267,8 +327,9 @@ TEST_P(MaxPooling, Accuracy)
    lp.type = "Pooling";
    lp.name = "testLayer";

-    Mat input({1, inChannels, inSize.height, inSize.width}, CV_32F);
-    test(lp, input);
+    int sz[] = {1, inChannels, inSize.height, inSize.width};
+    Mat input(4, &sz[0], CV_32F);
+    test(lp, input, backendId, targetId);
 }

 INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, MaxPooling, Combine(
@ -276,19 +337,25 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, MaxPooling, Combine(
 /*in size*/     Values(Size(5, 5), Size(7, 6)),
 /*kernel*/      Values(Size(2, 2), Size(3, 3), Size(3, 2)),
 /*stride*/      Values(Size(1, 1), Size(2, 2), Size(3, 2)),
-/*pad*/         Values(Size(0, 0), Size(1, 1), Size(0, 1))
+/*pad*/         Values(Size(0, 0), Size(1, 1), Size(0, 1)),
+                dnnBackendsAndTargetsWithHalide()
 ));

 ////////////////////////////////////////////////////////////////////////////////
 // Fully-connected
 ////////////////////////////////////////////////////////////////////////////////
-typedef TestWithParam<tuple<int, Size, int, bool> > FullyConnected;
+typedef TestWithParam<tuple<int, Size, int, bool, tuple<DNNBackend, DNNTarget> > > FullyConnected;
 TEST_P(FullyConnected, Accuracy)
 {
    int inChannels = get<0>(GetParam());
    Size inSize = get<1>(GetParam());
    int outChannels = get<2>(GetParam());
    bool hasBias = get<3>(GetParam());
+    int backendId = get<0>(get<4>(GetParam()));
+    int targetId = get<1>(get<4>(GetParam()));
+    if (backendId == DNN_BACKEND_INFERENCE_ENGINE ||
+        (backendId == DNN_BACKEND_OPENCV && targetId == DNN_TARGET_OPENCL_FP16))
+        throw SkipTestException("");

    Mat weights(outChannels, inChannels * inSize.height * inSize.width, CV_32F);
    randu(weights, -1.0f, 1.0f);
@ -304,39 +371,50 @@ TEST_P(FullyConnected, Accuracy)
    lp.type = "InnerProduct";
    lp.name = "testLayer";

-    Mat input({1, inChannels, inSize.height, inSize.width}, CV_32F);
-    test(lp, input);
+    int sz[] = {1, inChannels, inSize.height, inSize.width};
+    Mat input(4, &sz[0], CV_32F);
+    test(lp, input, backendId, targetId);
 }

 INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, FullyConnected, Combine(
 /*in channels*/  Values(3, 4),
 /*in size*/      Values(Size(5, 4), Size(4, 5), Size(1, 1)),
 /*out channels*/ Values(3, 4),
-/*has bias*/     Bool()
+/*has bias*/     Bool(),
+                 dnnBackendsAndTargetsWithHalide()
 ));

 ////////////////////////////////////////////////////////////////////////////////
 // SoftMax
 ////////////////////////////////////////////////////////////////////////////////
-typedef TestWithParam<tuple<int> > SoftMax;
+typedef TestWithParam<tuple<int,  tuple<DNNBackend, DNNTarget> > > SoftMax;
 TEST_P(SoftMax, Accuracy)
 {
    int inChannels = get<0>(GetParam());
+    int backendId = get<0>(get<1>(GetParam()));
+    int targetId = get<1>(get<1>(GetParam()));
    LayerParams lp;
    lp.type = "SoftMax";
    lp.name = "testLayer";

-    Mat input({1, inChannels, 1, 1}, CV_32F);
-    test(lp, input);
+    int sz[] = {1, inChannels, 1, 1};
+    Mat input(4, &sz[0], CV_32F);
+    test(lp, input, backendId, targetId);
 }

-INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, SoftMax, Values(3, 4, 5, 1024));
+INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, SoftMax, Combine(
+    Values(3, 4, 5, 1024),
+    dnnBackendsAndTargetsWithHalide()
+));

 //////////////////////////////////////////////////////////////////////////////
 // Max pooling - unpooling
 //////////////////////////////////////////////////////////////////////////////
-TEST(MaxPoolUnpool_Halide, Accuracy)
+TEST_P(Test_Halide_layers, MaxPoolUnpool)
 {
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE)
+        throw SkipTestException("");
+
    LayerParams pool;
    pool.set("pool", "max");
    pool.set("kernel_w", 2);
@ -366,16 +444,9 @@ TEST(MaxPoolUnpool_Halide, Accuracy)
    net.connect(poolId, 0, unpoolId, 0);
    net.connect(poolId, 1, unpoolId, 1);

-    Mat input({1, 1, 4, 4}, CV_32F);
-    randu(input, -1.0f, 1.0f);
-    net.setInput(input);
-    net.setPreferableBackend(DNN_BACKEND_OPENCV);
-    Mat outputDefault = net.forward("testUnpool").clone();
-
-    net.setPreferableBackend(DNN_BACKEND_HALIDE);
-    net.setInput(input);
-    Mat outputHalide = net.forward("testUnpool").clone();
-    normAssert(outputDefault, outputHalide);
+    int sz[] = {1, 1, 4, 4};
+    Mat input(4, &sz[0], CV_32F);
+    test(input, net, backend, target);
 }

 ////////////////////////////////////////////////////////////////////////////////
@ -383,7 +454,7 @@ TEST(MaxPoolUnpool_Halide, Accuracy)
 ////////////////////////////////////////////////////////////////////////////////
 static const int kNumChannels = 3;

-void testInPlaceActivation(LayerParams& lp)
+void testInPlaceActivation(LayerParams& lp, int backendId, int targetId)
 {
    EXPECT_FALSE(lp.name.empty());

@ -400,24 +471,19 @@ void testInPlaceActivation(LayerParams& lp)
    net.connect(0, 0, poolId, 0);
    net.addLayerToPrev(lp.name, lp.type, lp);

-    Mat input({1, kNumChannels, 10, 10}, CV_32F);
-    randu(input, -1.0f, 1.0f);
-    net.setInput(input);
-    net.setPreferableBackend(DNN_BACKEND_OPENCV);
-    Mat outputDefault = net.forward(lp.name).clone();
-
-    net.setInput(input);
-    net.setPreferableBackend(DNN_BACKEND_HALIDE);
-    Mat outputHalide = net.forward(lp.name).clone();
-    normAssert(outputDefault, outputHalide);
+    int sz[] = {1, kNumChannels, 10, 10};
+    Mat input(4, &sz[0], CV_32F);
+    test(input, net, backendId, targetId);
 }

-typedef TestWithParam<tuple<bool, bool, float> > BatchNorm;
+typedef TestWithParam<tuple<bool, bool, float, tuple<DNNBackend, DNNTarget> > > BatchNorm;
 TEST_P(BatchNorm, Accuracy)
 {
    bool hasWeights = get<0>(GetParam());
    bool hasBias = get<1>(GetParam());
    float epsilon = get<2>(GetParam());
+    int backendId = get<0>(get<3>(GetParam()));
+    int targetId = get<1>(get<3>(GetParam()));

    LayerParams lp;
    lp.set("has_weight", hasWeights);
@ -428,56 +494,66 @@ TEST_P(BatchNorm, Accuracy)

    lp.blobs.reserve(4);
    for (int i = 0; i < 3; ++i)
-        lp.blobs.push_back(Mat({kNumChannels}, CV_32F));
+        lp.blobs.push_back(Mat(1, kNumChannels, CV_32F));
    if (hasBias || hasWeights)
-        lp.blobs.push_back(Mat({kNumChannels}, CV_32F));
+        lp.blobs.push_back(Mat(1, kNumChannels, CV_32F));

-    for (Mat& m : lp.blobs)
-        randu(m, 0.0f, 1.0f);
+    for (int i = 0; i < lp.blobs.size(); ++i)
+        randu(lp.blobs[i], 0.0f, 1.0f);

-    testInPlaceActivation(lp);
+    testInPlaceActivation(lp, backendId, targetId);
 }

 INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, BatchNorm, Combine(
 /*has weights*/ Bool(),
 /*has bias*/    Bool(),
-/*epsilon*/     Values(1e-3f, 1e-5f)
+/*epsilon*/     Values(1e-3f, 1e-5f),
+                dnnBackendsAndTargetsWithHalide()
 ));

-typedef TestWithParam<tuple<float> > ReLU;
+typedef TestWithParam<tuple<float, tuple<DNNBackend, DNNTarget> > > ReLU;
 TEST_P(ReLU, Accuracy)
 {
    float negativeSlope = get<0>(GetParam());
+    int backendId = get<0>(get<1>(GetParam()));
+    int targetId = get<1>(get<1>(GetParam()));

    LayerParams lp;
    lp.set("negative_slope", negativeSlope);
    lp.type = "ReLU";
    lp.name = "testLayer";
-    testInPlaceActivation(lp);
+    testInPlaceActivation(lp, backendId, targetId);
 }

-INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, ReLU, Values(
-/*negative slope*/ 2.0f, 0.3f, -0.1f, 0.0f
+INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, ReLU, Combine(
+/*negative slope*/ Values(2.0f, 0.3f, -0.1f, 0.0f),
+                   dnnBackendsAndTargetsWithHalide()
 ));

-typedef TestWithParam<tuple<std::string> > NoParamActivation;
+typedef TestWithParam<tuple<std::string, tuple<DNNBackend, DNNTarget> > > NoParamActivation;
 TEST_P(NoParamActivation, Accuracy)
 {
+    int backendId = get<0>(get<1>(GetParam()));
+    int targetId = get<1>(get<1>(GetParam()));
+
    LayerParams lp;
    lp.type = get<0>(GetParam());
    lp.name = "testLayer";
-    testInPlaceActivation(lp);
+    testInPlaceActivation(lp, backendId, targetId);
 }
-INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, NoParamActivation, Values(
-/*type*/ "TanH", "Sigmoid", "AbsVal", "BNLL"
+INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, NoParamActivation, Combine(
+/*type*/ Values("TanH", "Sigmoid", "AbsVal", "BNLL"),
+         dnnBackendsAndTargetsWithHalide()
 ));

-typedef TestWithParam<tuple<Vec3f> > Power;
+typedef TestWithParam<tuple<Vec3f, tuple<DNNBackend, DNNTarget> > > Power;
 TEST_P(Power, Accuracy)
 {
    float power = get<0>(GetParam())[0];
    float scale = get<0>(GetParam())[1];
    float shift = get<0>(GetParam())[2];
+    int backendId = get<0>(get<1>(GetParam()));
+    int targetId = get<1>(get<1>(GetParam()));

    LayerParams lp;
    lp.set("power", power);
@ -485,46 +561,52 @@ TEST_P(Power, Accuracy)
    lp.set("shift", shift);
    lp.type = "Power";
    lp.name = "testLayer";
-    testInPlaceActivation(lp);
+    testInPlaceActivation(lp, backendId, targetId);
 }

-INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Power,
+INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Power, Combine(
 /*power, scale, shift*/ Values(Vec3f(0.9f, 1.0f, 1.1f), Vec3f(0.9f, 1.1f, 1.0f),
                               Vec3f(1.0f, 0.9f, 1.1f), Vec3f(1.0f, 1.1f, 0.9f),
-                               Vec3f(1.1f, 0.9f, 1.0f), Vec3f(1.1f, 1.0f, 0.9f))
-);
+                               Vec3f(1.1f, 0.9f, 1.0f), Vec3f(1.1f, 1.0f, 0.9f)),
+                        dnnBackendsAndTargetsWithHalide()
+));

-TEST(ChannelsPReLU, Accuracy)
+TEST_P(Test_Halide_layers, ChannelsPReLU)
 {
    LayerParams lp;
    lp.type = "ChannelsPReLU";
    lp.name = "testLayer";
-    lp.blobs.push_back(Mat({kNumChannels}, CV_32F));
+    lp.blobs.push_back(Mat(1, kNumChannels, CV_32F));
    randu(lp.blobs[0], -1.0f, 1.0f);

-    testInPlaceActivation(lp);
+    testInPlaceActivation(lp, backend, target);
 }

-typedef TestWithParam<tuple<bool> > Scale;
+typedef TestWithParam<tuple<bool, tuple<DNNBackend, DNNTarget> > > Scale;
 TEST_P(Scale, Accuracy)
 {
    bool hasBias = get<0>(GetParam());
+    int backendId = get<0>(get<1>(GetParam()));
+    int targetId = get<1>(get<1>(GetParam()));

    LayerParams lp;
    lp.set("bias_term", hasBias);
    lp.type = "Scale";
    lp.name = "testLayer";
-    lp.blobs.push_back(Mat({kNumChannels}, CV_32F));
+    lp.blobs.push_back(Mat(1, kNumChannels, CV_32F));
    randu(lp.blobs[0], -1.0f, 1.0f);
    if (hasBias)
    {
-        lp.blobs.push_back(Mat({kNumChannels}, CV_32F));
+        lp.blobs.push_back(Mat(1, kNumChannels, CV_32F));
        randu(lp.blobs[1], -1.0f, 1.0f);
    }
-    testInPlaceActivation(lp);
+    testInPlaceActivation(lp, backendId, targetId);
 }

-INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Scale, Values(true, false));
+INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Scale, Combine(
+    Bool(),
+    dnnBackendsAndTargetsWithHalide()
+));

 ////////////////////////////////////////////////////////////////////////////////
 // Concat layer
@ -534,11 +616,13 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Scale, Values(true, false));
 //      `--- conv ----^ ^ ^
 //      `---- ... ------' '
 //      `-----------------'
-typedef TestWithParam<tuple<Vec3i, Vec3i> > Concat;
+typedef TestWithParam<tuple<Vec3i, Vec3i, tuple<DNNBackend, DNNTarget> > > Concat;
 TEST_P(Concat, Accuracy)
 {
    Vec3i inSize = get<0>(GetParam());
    Vec3i numChannels = get<1>(GetParam());
+    int backendId = get<0>(get<2>(GetParam()));
+    int targetId = get<1>(get<2>(GetParam()));

    Net net;

@ -549,7 +633,8 @@ TEST_P(Concat, Accuracy)
        if (!numChannels[i])
            break;

-        Mat weights({numChannels[i], inSize[0], 1, 1}, CV_32F);
+        int sz[] = {numChannels[i], inSize[0], 1, 1};
+        Mat weights(4, &sz[0], CV_32F);
        randu(weights, -1.0f, 1.0f);

        LayerParams convParam;
@ -578,21 +663,15 @@ TEST_P(Concat, Accuracy)
        net.connect(convLayerIds[i], 0, concatId, i + 1);
    }

-    Mat input({1, inSize[0], inSize[1], inSize[2]}, CV_32F);
-    randu(input, -1.0f, 1.0f);
-
-    net.setInput(input);
-    net.setPreferableBackend(DNN_BACKEND_OPENCV);
-    Mat outputDefault = net.forward(concatParam.name).clone();
-
-    net.setPreferableBackend(DNN_BACKEND_HALIDE);
-    Mat outputHalide = net.forward(concatParam.name).clone();
-    normAssert(outputDefault, outputHalide);
+    int sz[] = {1, inSize[0], inSize[1], inSize[2]};
+    Mat input(4, &sz[0], CV_32F);
+    test(input, net, backendId, targetId);
 }

 INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Concat, Combine(
 /*input size*/ Values(Vec3i(1, 4, 5), Vec3i(2, 8, 6)),
-/*channels*/   Values(Vec3i(2, 0, 0), Vec3i(3, 4, 0), Vec3i(1, 6, 2))
+/*channels*/   Values(Vec3i(2, 0, 0), Vec3i(3, 4, 0), Vec3i(1, 6, 2)),
+               dnnBackendsAndTargetsWithHalide()
 ));

 ////////////////////////////////////////////////////////////////////////////////
@ -603,20 +682,27 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Concat, Combine(
 //      `--- conv ----^ ^ ^
 //      `---- ... ------' '
 //      `-----------------'
-typedef TestWithParam<tuple<Vec3i, std::string, int, bool> > Eltwise;
+typedef TestWithParam<tuple<Vec3i, std::string, int, bool, tuple<DNNBackend, DNNTarget> > > Eltwise;
 TEST_P(Eltwise, Accuracy)
 {
    Vec3i inSize = get<0>(GetParam());
    std::string op = get<1>(GetParam());
    int numConv = get<2>(GetParam());
    bool weighted = get<3>(GetParam());
+    int backendId = get<0>(get<4>(GetParam()));
+    int targetId = get<1>(get<4>(GetParam()));
+
+    if (backendId == DNN_BACKEND_OPENCV &&
+        (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
+        throw SkipTestException("");

    Net net;

    std::vector<int> convLayerIds(numConv);
    for (int i = 0; i < numConv; ++i)
    {
-        Mat weights({inSize[0], inSize[0], 1, 1}, CV_32F);
+        int sz[] = {inSize[0], inSize[0], 1, 1};
+        Mat weights(4, &sz[0], CV_32F);
        randu(weights, -1.0f, 1.0f);

        LayerParams convParam;
@ -655,28 +741,23 @@ TEST_P(Eltwise, Accuracy)
        net.connect(convLayerIds[i], 0, eltwiseId, i + 1);
    }

-    Mat input({1, inSize[0], inSize[1], inSize[2]}, CV_32F);
-    randu(input, -1.0f, 1.0f);
-
-    net.setInput(input);
-    net.setPreferableBackend(DNN_BACKEND_OPENCV);
-    Mat outputDefault = net.forward(eltwiseParam.name).clone();
-
-    net.setPreferableBackend(DNN_BACKEND_HALIDE);
-    Mat outputHalide = net.forward(eltwiseParam.name).clone();
-    normAssert(outputDefault, outputHalide);
+    int sz[] = {1, inSize[0], inSize[1], inSize[2]};
+    Mat input(4, &sz[0], CV_32F);
+    test(input, net, backendId, targetId);
 }

 INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Eltwise, Combine(
 /*input size*/ Values(Vec3i(1, 4, 5), Vec3i(2, 8, 6)),
 /*operation*/  Values("prod", "sum", "max"),
 /*num convs*/  Values(1, 2, 3),
-/*weighted(for sum only)*/ Bool()
+/*weighted(for sum only)*/ Bool(),
+               dnnBackendsAndTargetsWithHalide()
 ));

 ////////////////////////////////////////////////////////////////////////////
 // Mixed backends
 ////////////////////////////////////////////////////////////////////////////
+#ifdef HAVE_HALIDE
 TEST(MixedBackends_Halide_Default_Halide, Accuracy)
 {
    // Just a layer that supports Halide backend.
@ -700,7 +781,8 @@ TEST(MixedBackends_Halide_Default_Halide, Accuracy)
    net.addLayerToPrev(mvn.name, mvn.type, mvn);
    net.addLayerToPrev(lrn2.name, lrn2.type, lrn2);

-    Mat input({4, 3, 5, 6}, CV_32F);
+    int sz[] = {4, 3, 5, 6};
+    Mat input(4, &sz[0], CV_32F);
    randu(input, -1.0f, 1.0f);
    net.setInput(input);
    net.setPreferableBackend(DNN_BACKEND_OPENCV);
@ -718,4 +800,6 @@ TEST(MixedBackends_Halide_Default_Halide, Accuracy)
 }
 #endif  // HAVE_HALIDE

+INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_Halide_layers, dnnBackendsAndTargetsWithHalide());
+
 }} // namespace
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@ -92,75 +92,84 @@ void runLayer(Ptr<Layer> layer, std::vector<Mat> &inpBlobs, std::vector<Mat> &ou
        outBlobs[i] = outp[i];
 }

-
-void testLayerUsingCaffeModels(String basename, int targetId = DNN_TARGET_CPU,
-                               bool useCaffeModel = false, bool useCommonInputBlob = true)
+class Test_Caffe_layers : public DNNTestLayer
 {
-    String prototxt = _tf(basename + ".prototxt");
-    String caffemodel = _tf(basename + ".caffemodel");
+public:
+    void testLayerUsingCaffeModels(const String& basename, bool useCaffeModel = false,
+                                   bool useCommonInputBlob = true, double l1 = 0.0,
+                                   double lInf = 0.0)
+    {
+        String prototxt = _tf(basename + ".prototxt");
+        String caffemodel = _tf(basename + ".caffemodel");

-    String inpfile = (useCommonInputBlob) ? _tf("blob.npy") : _tf(basename + ".input.npy");
-    String outfile = _tf(basename + ".npy");
+        String inpfile = (useCommonInputBlob) ? _tf("blob.npy") : _tf(basename + ".input.npy");
+        String outfile = _tf(basename + ".npy");

-    Net net = readNetFromCaffe(prototxt, (useCaffeModel) ? caffemodel : String());
-    ASSERT_FALSE(net.empty());
+        Mat inp = blobFromNPY(inpfile);
+        Mat ref = blobFromNPY(outfile);
+        checkBackend(&inp, &ref);

-    net.setPreferableBackend(DNN_BACKEND_OPENCV);
-    net.setPreferableTarget(targetId);
+        Net net = readNetFromCaffe(prototxt, (useCaffeModel) ? caffemodel : String());
+        ASSERT_FALSE(net.empty());

-    Mat inp = blobFromNPY(inpfile);
-    Mat ref = blobFromNPY(outfile);
+        net.setPreferableBackend(backend);
+        net.setPreferableTarget(target);

-    net.setInput(inp, "input");
-    Mat out = net.forward("output");
+        net.setInput(inp, "input");
+        Mat out = net.forward("output");

-    normAssert(ref, out);
-}
+        normAssert(ref, out, "", l1 ? l1 : default_l1, lInf ? lInf : default_lInf);
+    }
+};

-typedef testing::TestWithParam<DNNTarget> Test_Caffe_layers;
 TEST_P(Test_Caffe_layers, Softmax)
 {
-    testLayerUsingCaffeModels("layer_softmax", GetParam());
+    testLayerUsingCaffeModels("layer_softmax");
 }

 TEST_P(Test_Caffe_layers, LRN_spatial)
 {
-    testLayerUsingCaffeModels("layer_lrn_spatial", GetParam());
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
+        throw SkipTestException("");
+    testLayerUsingCaffeModels("layer_lrn_spatial");
 }

 TEST_P(Test_Caffe_layers, LRN_channels)
 {
-    testLayerUsingCaffeModels("layer_lrn_channels", GetParam());
+    testLayerUsingCaffeModels("layer_lrn_channels");
 }

 TEST_P(Test_Caffe_layers, Convolution)
 {
-    testLayerUsingCaffeModels("layer_convolution", GetParam(), true);
+    testLayerUsingCaffeModels("layer_convolution", true);
 }

 TEST_P(Test_Caffe_layers, DeConvolution)
 {
-    testLayerUsingCaffeModels("layer_deconvolution", GetParam(), true, false);
+    testLayerUsingCaffeModels("layer_deconvolution", true, false);
 }

 TEST_P(Test_Caffe_layers, InnerProduct)
 {
-    testLayerUsingCaffeModels("layer_inner_product", GetParam(), true);
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE ||
+        (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
+        throw SkipTestException("");
+    testLayerUsingCaffeModels("layer_inner_product", true);
 }

 TEST_P(Test_Caffe_layers, Pooling_max)
 {
-    testLayerUsingCaffeModels("layer_pooling_max", GetParam());
+    testLayerUsingCaffeModels("layer_pooling_max");
 }

 TEST_P(Test_Caffe_layers, Pooling_ave)
 {
-    testLayerUsingCaffeModels("layer_pooling_ave", GetParam());
+    testLayerUsingCaffeModels("layer_pooling_ave");
 }

 TEST_P(Test_Caffe_layers, MVN)
 {
-    testLayerUsingCaffeModels("layer_mvn", GetParam());
+    testLayerUsingCaffeModels("layer_mvn");
 }

 void testReshape(const MatShape& inputShape, const MatShape& targetShape,
@ -201,35 +210,47 @@ TEST(Layer_Test_Reshape, Accuracy)
        testReshape(MatShape(inp, inp + 4), MatShape(out, out + 2), 0, -1,
                    MatShape(mask, mask + 2));
    }
+    {
+        int inp[] = {1, 2, 3};
+        int out[] = {3, 1, 2};
+        int mask[] = {3, 1, 2};
+        testReshape(MatShape(inp, inp + 3), MatShape(out, out + 3), 0, -1,
+                    MatShape(mask, mask + 3));
+    }
 }

-TEST(Layer_Test_BatchNorm, Accuracy)
-{
-    testLayerUsingCaffeModels("layer_batch_norm", DNN_TARGET_CPU, true);
-}
-
-TEST(Layer_Test_BatchNorm, local_stats)
+TEST_P(Test_Caffe_layers, BatchNorm)
 {
-    testLayerUsingCaffeModels("layer_batch_norm_local_stats", DNN_TARGET_CPU, true, false);
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE)
+        throw SkipTestException("");
+    testLayerUsingCaffeModels("layer_batch_norm", true);
+    testLayerUsingCaffeModels("layer_batch_norm_local_stats", true, false);
 }

 TEST_P(Test_Caffe_layers, ReLU)
 {
-    testLayerUsingCaffeModels("layer_relu", GetParam());
+    testLayerUsingCaffeModels("layer_relu");
 }

-TEST(Layer_Test_Dropout, Accuracy)
+TEST_P(Test_Caffe_layers, Dropout)
 {
    testLayerUsingCaffeModels("layer_dropout");
 }

 TEST_P(Test_Caffe_layers, Concat)
 {
-    testLayerUsingCaffeModels("layer_concat", GetParam());
+    testLayerUsingCaffeModels("layer_concat");
+    testLayerUsingCaffeModels("layer_concat_optim", true, false);
+    testLayerUsingCaffeModels("layer_concat_shared_input", true, false);
 }

-TEST(Layer_Test_Fused_Concat, Accuracy)
+TEST_P(Test_Caffe_layers, Fused_Concat)
 {
+    if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_CPU) ||
+        (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL))
+        throw SkipTestException("");
+    checkBackend();
+
    // Test case
    // input
    //   |
@ -260,28 +281,32 @@ TEST(Layer_Test_Fused_Concat, Accuracy)
    randu(input, 0.0f, 1.0f);  // [0, 1] to make AbsVal an identity transformation.

    net.setInput(input);
-    net.setPreferableBackend(DNN_BACKEND_OPENCV);
+    net.setPreferableBackend(backend);
+    net.setPreferableTarget(target);
    Mat out = net.forward();

-    normAssert(slice(out, Range::all(), Range(0, 2), Range::all(), Range::all()), input);
-    normAssert(slice(out, Range::all(), Range(2, 4), Range::all(), Range::all()), input);
-
-    //
-
-    testLayerUsingCaffeModels("layer_concat_optim", DNN_TARGET_CPU, true, false);
-    testLayerUsingCaffeModels("layer_concat_shared_input", DNN_TARGET_CPU, true, false);
+    normAssert(slice(out, Range::all(), Range(0, 2), Range::all(), Range::all()), input, "", default_l1, default_lInf);
+    normAssert(slice(out, Range::all(), Range(2, 4), Range::all(), Range::all()), input, "", default_l1, default_lInf);
 }

 TEST_P(Test_Caffe_layers, Eltwise)
 {
-    testLayerUsingCaffeModels("layer_eltwise", GetParam());
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE)
+        throw SkipTestException("");
+    testLayerUsingCaffeModels("layer_eltwise");
 }

 TEST_P(Test_Caffe_layers, PReLU)
 {
-    int targetId = GetParam();
-    testLayerUsingCaffeModels("layer_prelu", targetId, true);
-    testLayerUsingCaffeModels("layer_prelu_fc", targetId, true, false);
+    testLayerUsingCaffeModels("layer_prelu", true);
+}
+
+// TODO: fix an unstable test case
+TEST_P(Test_Caffe_layers, layer_prelu_fc)
+{
+    if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
+        throw SkipTestException("");
+    testLayerUsingCaffeModels("layer_prelu_fc", true, false);
 }

 //template<typename XMat>
@ -304,13 +329,16 @@ TEST_P(Test_Caffe_layers, PReLU)
 //    );
 //}

-static void test_Reshape_Split_Slice_layers(int targetId)
+TEST_P(Test_Caffe_layers, Reshape_Split_Slice)
 {
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE)
+        throw SkipTestException("");
+
    Net net = readNetFromCaffe(_tf("reshape_and_slice_routines.prototxt"));
    ASSERT_FALSE(net.empty());

-    net.setPreferableBackend(DNN_BACKEND_OPENCV);
-    net.setPreferableTarget(targetId);
+    net.setPreferableBackend(backend);
+    net.setPreferableTarget(target);

    Mat input(6, 12, CV_32F);
    RNG rng(0);
@ -319,16 +347,17 @@ static void test_Reshape_Split_Slice_layers(int targetId)
    net.setInput(input, "input");
    Mat output = net.forward("output");

-    normAssert(input, output);
+    normAssert(input, output, "", default_l1, default_lInf);
 }

-TEST_P(Test_Caffe_layers, Reshape_Split_Slice)
+TEST_P(Test_Caffe_layers, Conv_Elu)
 {
-    test_Reshape_Split_Slice_layers(GetParam());
-}
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
+    {
+        if (!checkMyriadTarget())
+            throw SkipTestException("Myriad is not available/disabled in OpenCV");
+    }

-TEST(Layer_Conv_Elu, Accuracy)
-{
    Net net = readNetFromTensorflow(_tf("layer_elu_model.pb"));
    ASSERT_FALSE(net.empty());

@ -336,10 +365,11 @@ TEST(Layer_Conv_Elu, Accuracy)
    Mat ref = blobFromNPY(_tf("layer_elu_out.npy"));

    net.setInput(inp, "input");
-    net.setPreferableBackend(DNN_BACKEND_OPENCV);
+    net.setPreferableBackend(backend);
+    net.setPreferableTarget(target);
    Mat out = net.forward();

-    normAssert(ref, out);
+    normAssert(ref, out, "", default_l1, default_lInf);
 }

 class Layer_LSTM_Test : public ::testing::Test
@ -489,37 +519,6 @@ TEST_F(Layer_RNN_Test, get_set_test)
    EXPECT_EQ(shape(outputs[1]), shape(nT, nS, nH));
 }

-void testLayerUsingDarknetModels(String basename, bool useDarknetModel = false, bool useCommonInputBlob = true)
-{
-    String cfg = _tf(basename + ".cfg");
-    String weights = _tf(basename + ".weights");
-
-    String inpfile = (useCommonInputBlob) ? _tf("blob.npy") : _tf(basename + ".input.npy");
-    String outfile = _tf(basename + ".npy");
-
-    Net net = readNetFromDarknet(cfg, (useDarknetModel) ? weights : String());
-    ASSERT_FALSE(net.empty());
-
-    Mat inp = blobFromNPY(inpfile);
-    Mat ref = blobFromNPY(outfile);
-
-    net.setInput(inp, "data");
-    net.setPreferableBackend(DNN_BACKEND_OPENCV);
-    Mat out = net.forward();
-
-    normAssert(ref, out);
-}
-
-TEST(Layer_Test_Region, Accuracy)
-{
-    testLayerUsingDarknetModels("region", false, false);
-}
-
-TEST(Layer_Test_Reorg, Accuracy)
-{
-    testLayerUsingDarknetModels("reorg", false, false);
-}
-
 TEST(Layer_Test_ROIPooling, Accuracy)
 {
    Net net = readNetFromCaffe(_tf("net_roi_pooling.prototxt"));
@ -539,8 +538,10 @@ TEST(Layer_Test_ROIPooling, Accuracy)

 TEST_P(Test_Caffe_layers, FasterRCNN_Proposal)
 {
+    if ((backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ||
+        backend == DNN_BACKEND_INFERENCE_ENGINE)
+        throw SkipTestException("");
    Net net = readNetFromCaffe(_tf("net_faster_rcnn_proposal.prototxt"));
-    net.setPreferableTarget(GetParam());

    Mat scores = blobFromNPY(_tf("net_faster_rcnn_proposal.scores.npy"));
    Mat deltas = blobFromNPY(_tf("net_faster_rcnn_proposal.deltas.npy"));
@ -551,7 +552,8 @@ TEST_P(Test_Caffe_layers, FasterRCNN_Proposal)
    net.setInput(imInfo, "im_info");

    std::vector<Mat> outs;
-    net.setPreferableBackend(DNN_BACKEND_OPENCV);
+    net.setPreferableBackend(backend);
+    net.setPreferableTarget(target);
    net.forward(outs, "output");

    for (int i = 0; i < 2; ++i)
@ -566,7 +568,6 @@ TEST_P(Test_Caffe_layers, FasterRCNN_Proposal)
            EXPECT_EQ(countNonZero(outs[i].rowRange(numDets, outs[i].size[0])), 0);
    }
 }
-INSTANTIATE_TEST_CASE_P(/**/, Test_Caffe_layers, availableDnnTargets());

 typedef testing::TestWithParam<tuple<Vec4i, Vec2i, bool> > Scale_untrainable;
 TEST_P(Scale_untrainable, Accuracy)
@ -732,8 +733,10 @@ INSTANTIATE_TEST_CASE_P(Layer_Test, Crop, Combine(

 // Check that by default average pooling layer should not count zero padded values
 // into the normalization area.
-TEST(Layer_Test_Average_pooling_kernel_area, Accuracy)
+TEST_P(Test_Caffe_layers, Average_pooling_kernel_area)
 {
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
+        throw SkipTestException("");
    LayerParams lp;
    lp.name = "testAvePool";
    lp.type = "Pooling";
@ -748,17 +751,21 @@ TEST(Layer_Test_Average_pooling_kernel_area, Accuracy)
    // ----+--
    // 7 8 | 9
    Mat inp = (Mat_<float>(3, 3) << 1, 2, 3, 4, 5, 6, 7, 8, 9);
-    Mat target = (Mat_<float>(2, 2) << (1 + 2 + 4 + 5) / 4.f, (3 + 6) / 2.f, (7 + 8) / 2.f, 9);
+    Mat ref = (Mat_<float>(2, 2) << (1 + 2 + 4 + 5) / 4.f, (3 + 6) / 2.f, (7 + 8) / 2.f, 9);
    Mat tmp = blobFromImage(inp);
    net.setInput(blobFromImage(inp));
-    net.setPreferableBackend(DNN_BACKEND_OPENCV);
+    net.setPreferableBackend(backend);
+    net.setPreferableTarget(target);
    Mat out = net.forward();
-    normAssert(out, blobFromImage(target));
+    normAssert(out, blobFromImage(ref));
 }

 // Test PriorBoxLayer in case of no aspect ratios (just squared proposals).
-TEST(Layer_PriorBox, squares)
+TEST_P(Test_Caffe_layers, PriorBox_squares)
 {
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE ||
+        (backend == DNN_BACKEND_OPENCV && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)))
+        throw SkipTestException("");
    LayerParams lp;
    lp.name = "testPriorBox";
    lp.type = "PriorBox";
@ -776,14 +783,15 @@ TEST(Layer_PriorBox, squares)
    Mat inp(1, 2, CV_32F);
    randu(inp, -1, 1);
    net.setInput(blobFromImage(inp));
-    net.setPreferableBackend(DNN_BACKEND_OPENCV);
+    net.setPreferableBackend(backend);
+    net.setPreferableTarget(target);
    Mat out = net.forward();

-    Mat target = (Mat_<float>(4, 4) << 0.0, 0.0, 0.75, 1.0,
+    Mat ref = (Mat_<float>(4, 4) << 0.0, 0.0, 0.75, 1.0,
                                       0.25, 0.0, 1.0, 1.0,
                                       0.1f, 0.1f, 0.2f, 0.2f,
                                       0.1f, 0.1f, 0.2f, 0.2f);
-    normAssert(out.reshape(1, 4), target);
+    normAssert(out.reshape(1, 4), ref);
 }

 typedef TestWithParam<tuple<int, int> > Layer_Test_DWconv_Prelu;
@ -1049,19 +1057,19 @@ TEST(Test_DLDT, multiple_networks)
 #endif  // HAVE_INF_ENGINE

 // Test a custom layer.
-class InterpLayer CV_FINAL : public Layer
+class CustomInterpLayer CV_FINAL : public Layer
 {
 public:
-    InterpLayer(const LayerParams &params) : Layer(params)
+    CustomInterpLayer(const LayerParams &params) : Layer(params)
    {
        zoomFactor = params.get<int>("zoom_factor", 0);
        outWidth = params.get<int>("width", 0);
        outHeight = params.get<int>("height", 0);
    }

-    static Ptr<InterpLayer> create(LayerParams& params)
+    static Ptr<Layer> create(LayerParams& params)
    {
-        return Ptr<InterpLayer>(new InterpLayer(params));
+        return Ptr<Layer>(new CustomInterpLayer(params));
    }

    virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
@ -1135,24 +1143,45 @@ public:
        }
    }

-    virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {}
+    void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) CV_OVERRIDE
+    {
+        CV_TRACE_FUNCTION();
+        CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+        Layer::forward_fallback(inputs, outputs, internals);
+    }

 private:
    int outWidth, outHeight, zoomFactor;
 };

-TEST(Layer_Test_Interp_custom, Accuracy)
+#ifndef OPENCV_DNN_EXTERNAL_PROTOBUF
+TEST_P(Test_Caffe_layers, Interp)
+#else
+TEST_P(Test_Caffe_layers, DISABLED_Interp)  // requires patched protobuf (available in OpenCV source tree only)
+#endif
 {
-    CV_DNN_REGISTER_LAYER_CLASS(Interp, InterpLayer);
-    testLayerUsingCaffeModels("layer_interp", DNN_TARGET_CPU, false, false);
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
+        throw SkipTestException("");
+    // Test a cusom layer.
+    CV_DNN_REGISTER_LAYER_CLASS(Interp, CustomInterpLayer);
+    try
+    {
+        testLayerUsingCaffeModels("layer_interp", false, false);
+    }
+    catch (...)
+    {
+        LayerFactory::unregisterLayer("Interp");
+        throw;
+    }
    LayerFactory::unregisterLayer("Interp");
-}

-TEST(Layer_Test_Interp, Accuracy)
-{
-    testLayerUsingCaffeModels("layer_interp", DNN_TARGET_CPU, false, false);
+    // Test an implemented layer.
+    testLayerUsingCaffeModels("layer_interp", false, false);
 }

+INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_Caffe_layers, dnnBackendsAndTargets());
+
 TEST(Layer_Test_PoolingIndices, Accuracy)
 {
    Net net;
@ -1233,4 +1262,36 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_ShuffleChannel, Combine(
 /*group*/        Values(1, 2, 3, 6)
 ));

+// Check if relu is not fused to convolution if we requested it's output
+TEST(Layer_Test_Convolution, relu_fusion)
+{
+    Net net;
+    {
+        LayerParams lp;
+        lp.set("kernel_size", 1);
+        lp.set("num_output", 1);
+        lp.set("bias_term", false);
+        lp.type = "Convolution";
+        lp.name = "testConv";
+
+        int weightsShape[] = {1, 1, 1, 1};
+        Mat weights(4, &weightsShape[0], CV_32F, Scalar(1));
+        lp.blobs.push_back(weights);
+        net.addLayerToPrev(lp.name, lp.type, lp);
+    }
+    {
+        LayerParams lp;
+        lp.type = "ReLU";
+        lp.name = "testReLU";
+        net.addLayerToPrev(lp.name, lp.type, lp);
+    }
+    int sz[] = {1, 1, 2, 3};
+    Mat input(4, &sz[0], CV_32F);
+    randu(input, -1.0, -0.1);
+    net.setInput(input);
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
+    Mat output = net.forward("testConv");
+    normAssert(input, output);
+}
+
 }} // namespace
--- a/modules/dnn/test/test_precomp.hpp
+++ b/modules/dnn/test/test_precomp.hpp
@ -69,6 +69,93 @@ static testing::internal::ParamGenerator<DNNTarget> availableDnnTargets()
    return testing::ValuesIn(targets);
 }

+static testing::internal::ParamGenerator<tuple<DNNBackend, DNNTarget> > dnnBackendsAndTargets()
+{
+    static const tuple<DNNBackend, DNNTarget> testCases[] = {
+    #ifdef HAVE_INF_ENGINE
+        tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU),
+        tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL),
+        tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
+        tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD),
+    #endif
+        tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_CPU),
+        tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL),
+        tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)
+    };
+    return testing::ValuesIn(testCases);
+}
+
+class DNNTestLayer : public TestWithParam <tuple<DNNBackend, DNNTarget> >
+{
+public:
+    dnn::Backend backend;
+    dnn::Target target;
+    double default_l1, default_lInf;
+
+    DNNTestLayer()
+    {
+        backend = (dnn::Backend)(int)get<0>(GetParam());
+        target = (dnn::Target)(int)get<1>(GetParam());
+        getDefaultThresholds(backend, target, &default_l1, &default_lInf);
+    }
+
+   static void getDefaultThresholds(int backend, int target, double* l1, double* lInf)
+   {
+       if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD)
+       {
+           *l1 = 4e-3;
+           *lInf = 2e-2;
+       }
+       else
+       {
+           *l1 = 1e-5;
+           *lInf = 1e-4;
+       }
+   }
+
+   static void checkBackend(int backend, int target, Mat* inp = 0, Mat* ref = 0)
+   {
+       if (backend == DNN_BACKEND_OPENCV && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
+       {
+#ifdef HAVE_OPENCL
+           if (!cv::ocl::useOpenCL())
+#endif
+           {
+               throw SkipTestException("OpenCL is not available/disabled in OpenCV");
+           }
+       }
+       if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
+       {
+           if (!checkMyriadTarget())
+           {
+               throw SkipTestException("Myriad is not available/disabled in OpenCV");
+           }
+           if (inp && ref && inp->size[0] != 1)
+           {
+               // Myriad plugin supports only batch size 1. Slice a single sample.
+               if (inp->size[0] == ref->size[0])
+               {
+                   std::vector<cv::Range> range(inp->dims, Range::all());
+                   range[0] = Range(0, 1);
+                   *inp = inp->operator()(range);
+
+                   range = std::vector<cv::Range>(ref->dims, Range::all());
+                   range[0] = Range(0, 1);
+                   *ref = ref->operator()(range);
+               }
+               else
+                   throw SkipTestException("Myriad plugin supports only batch size 1");
+           }
+       }
+   }
+
+protected:
+    void checkBackend(Mat* inp = 0, Mat* ref = 0)
+    {
+        checkBackend(backend, target, inp, ref);
+    }
+};
+
 }}

 #endif
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@ -78,140 +78,170 @@ static std::string path(const std::string& file)
    return findDataFile("dnn/tensorflow/" + file, false);
 }

-static void runTensorFlowNet(const std::string& prefix, int targetId = DNN_TARGET_CPU, bool hasText = false,
-                             double l1 = 1e-5, double lInf = 1e-4,
-                             bool memoryLoad = false)
+class Test_TensorFlow_layers : public DNNTestLayer
 {
-    std::string netPath = path(prefix + "_net.pb");
-    std::string netConfig = (hasText ? path(prefix + "_net.pbtxt") : "");
-    std::string inpPath = path(prefix + "_in.npy");
-    std::string outPath = path(prefix + "_out.npy");
-
-    Net net;
-    if (memoryLoad)
+public:
+    void runTensorFlowNet(const std::string& prefix, bool hasText = false,
+                          double l1 = 0.0, double lInf = 0.0, bool memoryLoad = false)
    {
-        // Load files into a memory buffers
-        string dataModel;
-        ASSERT_TRUE(readFileInMemory(netPath, dataModel));
+        std::string netPath = path(prefix + "_net.pb");
+        std::string netConfig = (hasText ? path(prefix + "_net.pbtxt") : "");
+        std::string inpPath = path(prefix + "_in.npy");
+        std::string outPath = path(prefix + "_out.npy");
+
+        cv::Mat input = blobFromNPY(inpPath);
+        cv::Mat ref = blobFromNPY(outPath);
+        checkBackend(&input, &ref);
+
+        Net net;
+        if (memoryLoad)
+        {
+            // Load files into a memory buffers
+            string dataModel;
+            ASSERT_TRUE(readFileInMemory(netPath, dataModel));
+
+            string dataConfig;
+            if (hasText)
+                ASSERT_TRUE(readFileInMemory(netConfig, dataConfig));
+
+            net = readNetFromTensorflow(dataModel.c_str(), dataModel.size(),
+                                        dataConfig.c_str(), dataConfig.size());
+        }
+        else
+            net = readNetFromTensorflow(netPath, netConfig);

-        string dataConfig;
-        if (hasText)
-            ASSERT_TRUE(readFileInMemory(netConfig, dataConfig));
+        ASSERT_FALSE(net.empty());

-        net = readNetFromTensorflow(dataModel.c_str(), dataModel.size(),
-                                    dataConfig.c_str(), dataConfig.size());
+        net.setPreferableBackend(backend);
+        net.setPreferableTarget(target);
+        net.setInput(input);
+        cv::Mat output = net.forward();
+        normAssert(ref, output, "", l1 ? l1 : default_l1, lInf ? lInf : default_lInf);
    }
-    else
-        net = readNetFromTensorflow(netPath, netConfig);
-
-    ASSERT_FALSE(net.empty());
-
-    net.setPreferableBackend(DNN_BACKEND_OPENCV);
-    net.setPreferableTarget(targetId);
-
-    cv::Mat input = blobFromNPY(inpPath);
-    cv::Mat target = blobFromNPY(outPath);
-
-    net.setInput(input);
-    cv::Mat output = net.forward();
-    normAssert(target, output, "", l1, lInf);
-}
-
-typedef testing::TestWithParam<DNNTarget> Test_TensorFlow_layers;
+};

 TEST_P(Test_TensorFlow_layers, conv)
 {
-    int targetId = GetParam();
-    runTensorFlowNet("single_conv", targetId);
-    runTensorFlowNet("atrous_conv2d_valid", targetId);
-    runTensorFlowNet("atrous_conv2d_same", targetId);
-    runTensorFlowNet("depthwise_conv2d", targetId);
-    runTensorFlowNet("keras_atrous_conv2d_same", targetId);
-    runTensorFlowNet("conv_pool_nchw", targetId);
+    runTensorFlowNet("single_conv");
+    runTensorFlowNet("atrous_conv2d_valid");
+    runTensorFlowNet("atrous_conv2d_same");
+    runTensorFlowNet("depthwise_conv2d");
+    runTensorFlowNet("keras_atrous_conv2d_same");
+    runTensorFlowNet("conv_pool_nchw");
 }

 TEST_P(Test_TensorFlow_layers, padding)
 {
-    int targetId = GetParam();
-    runTensorFlowNet("padding_same", targetId);
-    runTensorFlowNet("padding_valid", targetId);
-    runTensorFlowNet("spatial_padding", targetId);
+    runTensorFlowNet("padding_same");
+    runTensorFlowNet("padding_valid");
+    runTensorFlowNet("spatial_padding");
 }

 TEST_P(Test_TensorFlow_layers, eltwise_add_mul)
 {
-    runTensorFlowNet("eltwise_add_mul", GetParam());
+    runTensorFlowNet("eltwise_add_mul");
+}
+
+TEST_P(Test_TensorFlow_layers, pad_and_concat)
+{
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
+        throw SkipTestException("");
+    runTensorFlowNet("pad_and_concat");
 }

-TEST_P(Test_TensorFlow_layers, concat)
+TEST_P(Test_TensorFlow_layers, concat_axis_1)
 {
-    runTensorFlowNet("pad_and_concat", GetParam());
-    runTensorFlowNet("concat_axis_1", GetParam());
+    runTensorFlowNet("concat_axis_1");
 }

 TEST_P(Test_TensorFlow_layers, batch_norm)
 {
-    int targetId = GetParam();
-    runTensorFlowNet("batch_norm", targetId);
-    runTensorFlowNet("fused_batch_norm", targetId);
-    runTensorFlowNet("batch_norm_text", targetId, true);
-    runTensorFlowNet("mvn_batch_norm", targetId);
-    runTensorFlowNet("mvn_batch_norm_1x1", targetId);
-    runTensorFlowNet("unfused_batch_norm", targetId);
-    runTensorFlowNet("fused_batch_norm_no_gamma", targetId);
-    runTensorFlowNet("unfused_batch_norm_no_gamma", targetId);
+    runTensorFlowNet("batch_norm");
+    runTensorFlowNet("batch_norm", false, 0.0, 0.0, true);
+    runTensorFlowNet("fused_batch_norm");
+    runTensorFlowNet("fused_batch_norm", false, 0.0, 0.0, true);
+    runTensorFlowNet("batch_norm_text", true);
+    runTensorFlowNet("batch_norm_text", true, 0.0, 0.0, true);
+    runTensorFlowNet("unfused_batch_norm");
+    runTensorFlowNet("fused_batch_norm_no_gamma");
+    runTensorFlowNet("unfused_batch_norm_no_gamma");
+}
+
+TEST_P(Test_TensorFlow_layers, mvn_batch_norm)
+{
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE)
+        throw SkipTestException("");
+    runTensorFlowNet("mvn_batch_norm");
+    runTensorFlowNet("mvn_batch_norm_1x1");
 }

 TEST_P(Test_TensorFlow_layers, pooling)
 {
-    int targetId = GetParam();
-    cv::ocl::Device d = cv::ocl::Device::getDefault();
-    bool loosenFlag = targetId == DNN_TARGET_OPENCL && d.isIntel() && d.type() == cv::ocl::Device::TYPE_CPU;
-    runTensorFlowNet("max_pool_even", targetId);
-    runTensorFlowNet("max_pool_odd_valid", targetId);
-    runTensorFlowNet("ave_pool_same", targetId);
-    runTensorFlowNet("max_pool_odd_same", targetId, false, loosenFlag ? 3e-5 : 1e-5, loosenFlag ? 3e-4 : 1e-4);
-    runTensorFlowNet("reduce_mean", targetId);  // an average pooling over all spatial dimensions.
+    runTensorFlowNet("max_pool_even");
+    runTensorFlowNet("max_pool_odd_valid");
+    runTensorFlowNet("max_pool_odd_same");
+    runTensorFlowNet("reduce_mean");  // an average pooling over all spatial dimensions.
+}
+
+// TODO: fix tests and replace to pooling
+TEST_P(Test_TensorFlow_layers, ave_pool_same)
+{
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
+        throw SkipTestException("");
+    runTensorFlowNet("ave_pool_same");
 }

 TEST_P(Test_TensorFlow_layers, deconvolution)
 {
-    int targetId = GetParam();
-    runTensorFlowNet("deconvolution", targetId);
-    runTensorFlowNet("deconvolution_same", targetId);
-    runTensorFlowNet("deconvolution_stride_2_same", targetId);
-    runTensorFlowNet("deconvolution_adj_pad_valid", targetId);
-    runTensorFlowNet("deconvolution_adj_pad_same", targetId);
-    runTensorFlowNet("keras_deconv_valid", targetId);
-    runTensorFlowNet("keras_deconv_same", targetId);
+    runTensorFlowNet("deconvolution");
+    runTensorFlowNet("deconvolution_same");
+    runTensorFlowNet("deconvolution_stride_2_same");
+    runTensorFlowNet("deconvolution_adj_pad_valid");
+    runTensorFlowNet("deconvolution_adj_pad_same");
+    runTensorFlowNet("keras_deconv_valid");
+    runTensorFlowNet("keras_deconv_same");
 }

 TEST_P(Test_TensorFlow_layers, matmul)
 {
-    int targetId = GetParam();
-    runTensorFlowNet("matmul", targetId);
-    runTensorFlowNet("nhwc_reshape_matmul", targetId);
-    runTensorFlowNet("nhwc_transpose_reshape_matmul", targetId);
+    if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
+        throw SkipTestException("");
+    runTensorFlowNet("matmul");
+    runTensorFlowNet("nhwc_reshape_matmul");
+    runTensorFlowNet("nhwc_transpose_reshape_matmul");
 }

 TEST_P(Test_TensorFlow_layers, reshape)
 {
-    int targetId = GetParam();
-    runTensorFlowNet("shift_reshape_no_reorder", targetId);
-    runTensorFlowNet("reshape_reduce", targetId);
-    runTensorFlowNet("flatten", targetId, true);
-    runTensorFlowNet("unfused_flatten", targetId);
-    runTensorFlowNet("unfused_flatten_unknown_batch", targetId);
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE)
+        throw SkipTestException("");
+    runTensorFlowNet("shift_reshape_no_reorder");
+    runTensorFlowNet("reshape_no_reorder");
+    runTensorFlowNet("reshape_reduce");
+}
+
+TEST_P(Test_TensorFlow_layers, flatten)
+{
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE &&
+        (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
+        throw SkipTestException("");
+    runTensorFlowNet("flatten", true);
+    runTensorFlowNet("unfused_flatten");
+    runTensorFlowNet("unfused_flatten_unknown_batch");
 }

 TEST_P(Test_TensorFlow_layers, l2_normalize)
 {
-    int targetId = GetParam();
-    runTensorFlowNet("l2_normalize", targetId);
-    runTensorFlowNet("l2_normalize_3d", targetId);
+    runTensorFlowNet("l2_normalize");
 }

-INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_layers, availableDnnTargets());
+// TODO: fix it and add to l2_normalize
+TEST_P(Test_TensorFlow_layers, l2_normalize_3d)
+{
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
+        throw SkipTestException("");
+    runTensorFlowNet("l2_normalize_3d");
+}

 typedef testing::TestWithParam<DNNTarget> Test_TensorFlow_nets;

@ -358,90 +388,96 @@ TEST_P(Test_TensorFlow_nets, EAST_text_detection)

 INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_nets, availableDnnTargets());

-typedef testing::TestWithParam<DNNTarget> Test_TensorFlow_fp16;
-
-TEST_P(Test_TensorFlow_fp16, tests)
+TEST_P(Test_TensorFlow_layers, fp16_weights)
 {
-    int targetId = GetParam();
-    const float l1 = 7e-4;
-    const float lInf = 1e-2;
-    runTensorFlowNet("fp16_single_conv", targetId, false, l1, lInf);
-    runTensorFlowNet("fp16_deconvolution", targetId, false, l1, lInf);
-    runTensorFlowNet("fp16_max_pool_odd_same", targetId, false, l1, lInf);
-    runTensorFlowNet("fp16_padding_valid", targetId, false, l1, lInf);
-    runTensorFlowNet("fp16_eltwise_add_mul", targetId, false, l1, lInf);
-    runTensorFlowNet("fp16_max_pool_odd_valid", targetId, false, l1, lInf);
-    runTensorFlowNet("fp16_pad_and_concat", targetId, false, l1, lInf);
-    runTensorFlowNet("fp16_max_pool_even", targetId, false, l1, lInf);
-    runTensorFlowNet("fp16_padding_same", targetId, false, l1, lInf);
+    const float l1 = 0.00071;
+    const float lInf = 0.012;
+    runTensorFlowNet("fp16_single_conv", false, l1, lInf);
+    runTensorFlowNet("fp16_deconvolution", false, l1, lInf);
+    runTensorFlowNet("fp16_max_pool_odd_same", false, l1, lInf);
+    runTensorFlowNet("fp16_padding_valid", false, l1, lInf);
+    runTensorFlowNet("fp16_eltwise_add_mul", false, l1, lInf);
+    runTensorFlowNet("fp16_max_pool_odd_valid", false, l1, lInf);
+    runTensorFlowNet("fp16_max_pool_even", false, l1, lInf);
+    runTensorFlowNet("fp16_padding_same", false, l1, lInf);
 }

-INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_fp16,
-                        Values(DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16));
+// TODO: fix pad_and_concat and add this test case to fp16_weights
+TEST_P(Test_TensorFlow_layers, fp16_pad_and_concat)
+{
+    const float l1 = 0.00071;
+    const float lInf = 0.012;
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
+        throw SkipTestException("");
+    runTensorFlowNet("fp16_pad_and_concat", false, l1, lInf);
+}

-TEST(Test_TensorFlow, defun)
+TEST_P(Test_TensorFlow_layers, defun)
 {
    runTensorFlowNet("defun_dropout");
 }

-TEST(Test_TensorFlow, quantized)
+TEST_P(Test_TensorFlow_layers, quantized)
 {
    runTensorFlowNet("uint8_single_conv");
 }

-TEST(Test_TensorFlow, lstm)
+TEST_P(Test_TensorFlow_layers, lstm)
 {
-    runTensorFlowNet("lstm", DNN_TARGET_CPU, true);
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE ||
+        (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
+        throw SkipTestException("");
+    runTensorFlowNet("lstm", true);
+    runTensorFlowNet("lstm", true, 0.0, 0.0, true);
 }

-TEST(Test_TensorFlow, split)
+TEST_P(Test_TensorFlow_layers, split)
 {
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE)
+        throw SkipTestException("");
    runTensorFlowNet("split_equals");
 }

-TEST(Test_TensorFlow, resize_nearest_neighbor)
+TEST_P(Test_TensorFlow_layers, resize_nearest_neighbor)
 {
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_MYRIAD)
+        throw SkipTestException("");
    runTensorFlowNet("resize_nearest_neighbor");
+    runTensorFlowNet("keras_upsampling2d");
 }

-TEST(Test_TensorFlow, slice)
+TEST_P(Test_TensorFlow_layers, slice)
 {
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE &&
+        (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
+        throw SkipTestException("");
    runTensorFlowNet("slice_4d");
 }

-TEST(Test_TensorFlow, softmax)
+TEST_P(Test_TensorFlow_layers, softmax)
 {
    runTensorFlowNet("keras_softmax");
 }

-TEST(Test_TensorFlow, relu6)
+TEST_P(Test_TensorFlow_layers, relu6)
 {
    runTensorFlowNet("keras_relu6");
-    runTensorFlowNet("keras_relu6", DNN_TARGET_CPU, /*hasText*/ true);
+    runTensorFlowNet("keras_relu6", /*hasText*/ true);
 }

-TEST(Test_TensorFlow, keras_mobilenet_head)
+TEST_P(Test_TensorFlow_layers, keras_mobilenet_head)
 {
    runTensorFlowNet("keras_mobilenet_head");
 }

-TEST(Test_TensorFlow, memory_read)
-{
-    double l1 = 1e-5;
-    double lInf = 1e-4;
-    runTensorFlowNet("lstm", DNN_TARGET_CPU, true, l1, lInf, true);
-
-    runTensorFlowNet("batch_norm", DNN_TARGET_CPU, false, l1, lInf, true);
-    runTensorFlowNet("fused_batch_norm", DNN_TARGET_CPU, false, l1, lInf, true);
-    runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true, l1, lInf, true);
-}
-
-TEST(Test_TensorFlow, resize_bilinear)
+TEST_P(Test_TensorFlow_layers, resize_bilinear)
 {
    runTensorFlowNet("resize_bilinear");
    runTensorFlowNet("resize_bilinear_factor");
 }

+INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_layers, dnnBackendsAndTargets());
+
 TEST(Test_TensorFlow, two_inputs)
 {
    Net net = readNet(path("two_inputs_net.pbtxt"));
--- a/modules/dnn/test/test_torch_importer.cpp
+++ b/modules/dnn/test/test_torch_importer.cpp
@ -296,7 +296,6 @@ TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy)
        Mat inputBlob = blobFromImage(img, 1.0, Size(), Scalar(103.939, 116.779, 123.68), false);

        net.setInput(inputBlob);
-        net.setPreferableBackend(DNN_BACKEND_OPENCV);
        Mat out = net.forward();

        // Deprocessing.
--- a/modules/features2d/src/fast.cpp
+++ b/modules/features2d/src/fast.cpp
@ -83,7 +83,7 @@ void FAST_t(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bo

    AutoBuffer<uchar> _buf((img.cols+16)*3*(sizeof(int) + sizeof(uchar)) + 128);
    uchar* buf[3];
-    buf[0] = _buf; buf[1] = buf[0] + img.cols; buf[2] = buf[1] + img.cols;
+    buf[0] = _buf.data(); buf[1] = buf[0] + img.cols; buf[2] = buf[1] + img.cols;
    int* cpbuf[3];
    cpbuf[0] = (int*)alignPtr(buf[2] + img.cols, sizeof(int)) + 1;
    cpbuf[1] = cpbuf[0] + img.cols + 1;
--- a/modules/features2d/src/orb.cpp
+++ b/modules/features2d/src/orb.cpp
@ -143,7 +143,7 @@ HarrisResponses(const Mat& img, const std::vector<Rect>& layerinfo,
    float scale_sq_sq = scale * scale * scale * scale;

    AutoBuffer<int> ofsbuf(blockSize*blockSize);
-    int* ofs = ofsbuf;
+    int* ofs = ofsbuf.data();
    for( int i = 0; i < blockSize; i++ )
        for( int j = 0; j < blockSize; j++ )
            ofs[i*blockSize + j] = (int)(i*step + j);
--- a/modules/flann/include/opencv2/flann/kmeans_index.h
+++ b/modules/flann/include/opencv2/flann/kmeans_index.h
@ -726,7 +726,7 @@ private:
        }

        cv::AutoBuffer<int> centers_idx_buf(branching);
-        int* centers_idx = (int*)centers_idx_buf;
+        int* centers_idx = centers_idx_buf.data();
        int centers_length;
        (this->*chooseCenters)(branching, indices, indices_length, centers_idx, centers_length);

@ -739,7 +739,7 @@ private:


        cv::AutoBuffer<double> dcenters_buf(branching*veclen_);
-        Matrix<double> dcenters((double*)dcenters_buf,branching,veclen_);
+        Matrix<double> dcenters(dcenters_buf.data(), branching, veclen_);
        for (int i=0; i<centers_length; ++i) {
            ElementType* vec = dataset_[centers_idx[i]];
            for (size_t k=0; k<veclen_; ++k) {
@ -749,7 +749,7 @@ private:

        std::vector<DistanceType> radiuses(branching);
        cv::AutoBuffer<int> count_buf(branching);
-        int* count = (int*)count_buf;
+        int* count = count_buf.data();
        for (int i=0; i<branching; ++i) {
            radiuses[i] = 0;
            count[i] = 0;
@ -757,7 +757,7 @@ private:

        //	assign points to clusters
        cv::AutoBuffer<int> belongs_to_buf(indices_length);
-        int* belongs_to = (int*)belongs_to_buf;
+        int* belongs_to = belongs_to_buf.data();
        for (int i=0; i<indices_length; ++i) {

            DistanceType sq_dist = distance_(dataset_[indices[i]], dcenters[0], veclen_);
--- a/modules/highgui/src/window_gtk.cpp
+++ b/modules/highgui/src/window_gtk.cpp
@ -40,9 +40,6 @@
 //M*/

 #include "precomp.hpp"
-#include "opencv2/imgproc.hpp"
-
-using namespace cv;

 #ifndef _WIN32

@ -66,6 +63,11 @@ using namespace cv;
    #include <GL/glu.h>
 #endif

+#include <opencv2/core/utils/logger.hpp>
+#include "opencv2/imgproc.hpp"
+
+using namespace cv;
+
 #ifndef BIT_ALLIN
    #define BIT_ALLIN(x,y) ( ((x)&(y)) == (y) )
 #endif
@ -447,8 +449,9 @@ cvImageWidget_destroy (GtkObject *object)
 #endif //GTK_VERSION3
 }

-static void cvImageWidget_class_init (CvImageWidgetClass * klass)
+static void cvImageWidget_class_init (gpointer g_class, gpointer /*class_data*/)
 {
+  CvImageWidgetClass* klass = (CvImageWidgetClass*)g_class;
 #if defined (GTK_VERSION3)
  GtkWidgetClass *widget_class = GTK_WIDGET_CLASS (klass);
 #else
@ -478,8 +481,9 @@ static void cvImageWidget_class_init (CvImageWidgetClass * klass)
 }

 static void
-cvImageWidget_init (CvImageWidget *image_widget)
+cvImageWidget_init(GTypeInstance* instance, gpointer /*g_class*/)
 {
+    CvImageWidget* image_widget = (CvImageWidget*)instance;
    image_widget->original_image=0;
    image_widget->scaled_image=0;
    image_widget->flags=0;
@ -494,9 +498,9 @@ GType cvImageWidget_get_type (void){
          GTK_TYPE_WIDGET,
          (gchar*) "CvImageWidget",
          sizeof(CvImageWidgetClass),
-          (GClassInitFunc) cvImageWidget_class_init,
+          cvImageWidget_class_init,
          sizeof(CvImageWidget),
-          (GInstanceInitFunc) cvImageWidget_init,
+          cvImageWidget_init,
          (GTypeFlags)0
          );
    }
@ -590,13 +594,18 @@ static gboolean icvOnMouse( GtkWidget *widget, GdkEvent *event, gpointer user_da

 #ifdef HAVE_GTHREAD
 int thread_started=0;
-static gpointer icvWindowThreadLoop();
+static gpointer icvWindowThreadLoop(gpointer data);
 GMutex*				   last_key_mutex = NULL;
 GCond*				   cond_have_key = NULL;
-GMutex*				   window_mutex = NULL;
 GThread*			   window_thread = NULL;
 #endif

+static cv::Mutex& getWindowMutex()
+{
+    static cv::Mutex* g_window_mutex = new cv::Mutex();
+    return *g_window_mutex;
+}
+
 static int             last_key = -1;
 static std::vector< Ptr<CvWindow> > g_windows;

@ -623,28 +632,28 @@ CV_IMPL int cvInitSystem( int argc, char** argv )
 CV_IMPL int cvStartWindowThread(){
 #ifdef HAVE_GTHREAD
    cvInitSystem(0,NULL);
-    if (!thread_started) {
-    if (!g_thread_supported ()) {
-        /* the GThread system wasn't inited, so init it */
-        g_thread_init(NULL);
-    }
-
-    // this mutex protects the window resources
-    window_mutex = g_mutex_new();
-
-    // protects the 'last key pressed' variable
-    last_key_mutex = g_mutex_new();
-
-    // conditional that indicates a key has been pressed
-    cond_have_key = g_cond_new();
-
-#if !GLIB_CHECK_VERSION(2, 32, 0)
-    // this is the window update thread
-    window_thread = g_thread_create((GThreadFunc) icvWindowThreadLoop,
-                    NULL, TRUE, NULL);
-#else
-    window_thread = g_thread_new("OpenCV window update", (GThreadFunc)icvWindowThreadLoop, NULL);
-#endif
+    if (!thread_started)
+    {
+       if (!g_thread_supported ()) {
+           /* the GThread system wasn't inited, so init it */
+           g_thread_init(NULL);
+       }
+
+       (void)getWindowMutex();  // force mutex initialization
+
+       // protects the 'last key pressed' variable
+       last_key_mutex = g_mutex_new();
+
+       // conditional that indicates a key has been pressed
+       cond_have_key = g_cond_new();
+
+   #if !GLIB_CHECK_VERSION(2, 32, 0)
+       // this is the window update thread
+       window_thread = g_thread_create(icvWindowThreadLoop,
+                       NULL, TRUE, NULL);
+   #else
+       window_thread = g_thread_new("OpenCV window update", icvWindowThreadLoop, NULL);
+   #endif
    }
    thread_started = window_thread!=NULL;
    return thread_started;
@ -654,12 +663,13 @@ CV_IMPL int cvStartWindowThread(){
 }

 #ifdef HAVE_GTHREAD
-gpointer icvWindowThreadLoop()
+gpointer icvWindowThreadLoop(gpointer /*data*/)
 {
    while(1){
-        g_mutex_lock(window_mutex);
-        gtk_main_iteration_do(FALSE);
-        g_mutex_unlock(window_mutex);
+        {
+            cv::AutoLock lock(getWindowMutex());
+            gtk_main_iteration_do(FALSE);
+        }

        // little sleep
        g_usleep(500);
@ -669,20 +679,10 @@ gpointer icvWindowThreadLoop()
    return NULL;
 }

-
-class GMutexLock {
-    GMutex* mutex_;
-public:
-    GMutexLock(GMutex* mutex) : mutex_(mutex) { if (mutex_) g_mutex_lock(mutex_); }
-    ~GMutexLock() { if (mutex_) g_mutex_unlock(mutex_); mutex_ = NULL; }
-};
-
-#define CV_LOCK_MUTEX() GMutexLock lock(window_mutex);
-
-#else
-#define CV_LOCK_MUTEX()
 #endif

+#define CV_LOCK_MUTEX() cv::AutoLock lock(getWindowMutex())
+
 static CvWindow* icvFindWindowByName( const char* name )
 {
    for(size_t i = 0; i < g_windows.size(); ++i)
@ -1703,18 +1703,19 @@ static gboolean icvOnKeyPress(GtkWidget* widget, GdkEventKey* event, gpointer us
    code |= event->state << 16;

 #ifdef HAVE_GTHREAD
-    if(thread_started) g_mutex_lock(last_key_mutex);
-#endif
-
-    last_key = code;
-
-#ifdef HAVE_GTHREAD
-    if(thread_started){
+    if(thread_started)
+    {
+        g_mutex_lock(last_key_mutex);
+        last_key = code;
        // signal any waiting threads
        g_cond_broadcast(cond_have_key);
        g_mutex_unlock(last_key_mutex);
    }
+    else
 #endif
+    {
+        last_key = code;
+    }

    return FALSE;
 }
@ -1884,10 +1885,12 @@ static gboolean icvAlarm( gpointer user_data )
 CV_IMPL int cvWaitKey( int delay )
 {
 #ifdef HAVE_GTHREAD
-    if(thread_started && g_thread_self()!=window_thread){
-        gboolean expired;
+    if (thread_started && g_thread_self() != window_thread)
+    {
+        gboolean expired = true;
        int my_last_key;

+        g_mutex_lock(last_key_mutex);
        // wait for signal or timeout if delay > 0
        if(delay>0){
            GTimeVal timer;
@ -1896,8 +1899,15 @@ CV_IMPL int cvWaitKey( int delay )
            expired = !g_cond_timed_wait(cond_have_key, last_key_mutex, &timer);
        }
        else{
-            g_cond_wait(cond_have_key, last_key_mutex);
-            expired=false;
+            if (g_windows.empty())
+            {
+                CV_LOG_WARNING(NULL, "cv::waitKey() is called without timeout and missing active windows. Ignoring");
+            }
+            else
+            {
+                g_cond_wait(cond_have_key, last_key_mutex);
+                expired=false;
+            }
        }
        my_last_key = last_key;
        g_mutex_unlock(last_key_mutex);
@ -1906,21 +1916,20 @@ CV_IMPL int cvWaitKey( int delay )
        }
        return my_last_key;
    }
-    else{
+    else
 #endif
+    {
        int expired = 0;
        guint timer = 0;
        if( delay > 0 )
            timer = g_timeout_add( delay, icvAlarm, &expired );
        last_key = -1;
-        while( gtk_main_iteration_do(TRUE) && last_key < 0 && !expired && !g_windows.empty())
+        while( gtk_main_iteration_do(TRUE) && last_key < 0 && !expired && (delay > 0 || !g_windows.empty()))
            ;

        if( delay > 0 && !expired )
            g_source_remove(timer);
-#ifdef HAVE_GTHREAD
    }
-#endif
    return last_key;
 }

--- a/modules/imgcodecs/src/grfmt_bmp.cpp
+++ b/modules/imgcodecs/src/grfmt_bmp.cpp
@ -223,7 +223,7 @@ bool  BmpDecoder::readData( Mat& img )
        }
        _bgr.allocate(m_width*3 + 32);
    }
-    uchar *src = _src, *bgr = _bgr;
+    uchar *src = _src.data(), *bgr = _bgr.data();

    CV_TRY
    {
--- a/modules/imgcodecs/src/grfmt_exr.cpp
+++ b/modules/imgcodecs/src/grfmt_exr.cpp
@ -199,7 +199,7 @@ bool  ExrDecoder::readData( Mat& img )
    if( !justcopy )
    {
        copy_buffer.allocate(sizeof(float) * m_width * 3);
-        buffer = copy_buffer;
+        buffer = copy_buffer.data();
        ystep = 0;
    }
    else
--- a/modules/imgcodecs/src/grfmt_jpeg.cpp
+++ b/modules/imgcodecs/src/grfmt_jpeg.cpp
@ -681,7 +681,7 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )

        if( channels > 1 )
            _buffer.allocate(width*channels);
-        buffer = _buffer;
+        buffer = _buffer.data();

        for( int y = 0; y < height; y++ )
        {
--- a/modules/imgcodecs/src/grfmt_pam.cpp
+++ b/modules/imgcodecs/src/grfmt_pam.cpp
@ -496,9 +496,7 @@ bool  PAMDecoder::readData( Mat& img )

    /* setting buffer to max data size so scaling up is possible */
    AutoBuffer<uchar> _src(src_elems_per_row * 2);
-    uchar* src = _src;
-    AutoBuffer<uchar> _gray_palette;
-    uchar* gray_palette = _gray_palette;
+    uchar* src = _src.data();

    if( m_offset < 0 || !m_strm.isOpened())
        return false;
@ -544,10 +542,7 @@ bool  PAMDecoder::readData( Mat& img )
            if (bit_mode) {
                if( target_channels == 1 )
                {
-                    _gray_palette.allocate(2);
-                    gray_palette = _gray_palette;
-                    gray_palette[0] = 0;
-                    gray_palette[1] = 255;
+                    uchar gray_palette[2] = {0, 255};
                    for( y = 0; y < m_height; y++, data += imp_stride )
                    {
                        m_strm.getBytes( src, src_stride );
@ -683,7 +678,7 @@ bool PAMEncoder::write( const Mat& img, const std::vector<int>& params )
        bufsize = tmp;

    AutoBuffer<char> _buffer(bufsize);
-    char* buffer = _buffer;
+    char* buffer = _buffer.data();

    /* write header */
    tmp = 0;
--- a/modules/imgcodecs/src/grfmt_png.cpp
+++ b/modules/imgcodecs/src/grfmt_png.cpp
@ -225,7 +225,7 @@ bool  PngDecoder::readData( Mat& img )
 {
    volatile bool result = false;
    AutoBuffer<uchar*> _buffer(m_height);
-    uchar** buffer = _buffer;
+    uchar** buffer = _buffer.data();
    int color = img.channels() > 1;

    png_structp png_ptr = (png_structp)m_png_ptr;
@ -426,7 +426,7 @@ bool  PngEncoder::write( const Mat& img, const std::vector<int>& params )
                    for( y = 0; y < height; y++ )
                        buffer[y] = img.data + y*img.step;

-                    png_write_image( png_ptr, buffer );
+                    png_write_image( png_ptr, buffer.data() );
                    png_write_end( png_ptr, info_ptr );

                    result = true;
--- a/modules/imgcodecs/src/grfmt_pxm.cpp
+++ b/modules/imgcodecs/src/grfmt_pxm.cpp
@ -245,7 +245,7 @@ bool PxMDecoder::readData( Mat& img )
            if( !m_binary )
            {
                AutoBuffer<uchar> _src(m_width);
-                uchar* src = _src;
+                uchar* src = _src.data();

                for (int y = 0; y < m_height; y++, data += img.step)
                {
@ -261,7 +261,7 @@ bool PxMDecoder::readData( Mat& img )
            else
            {
                AutoBuffer<uchar> _src(src_pitch);
-                uchar* src = _src;
+                uchar* src = _src.data();

                for (int y = 0; y < m_height; y++, data += img.step)
                {
@ -281,7 +281,7 @@ bool PxMDecoder::readData( Mat& img )
        case 24:
        {
            AutoBuffer<uchar> _src(std::max<size_t>(width3*2, src_pitch));
-            uchar* src = _src;
+            uchar* src = _src.data();

            for (int y = 0; y < m_height; y++, data += img.step)
            {
@ -463,7 +463,7 @@ bool PxMEncoder::write(const Mat& img, const std::vector<int>& params)
        bufferSize = lineLength;

    AutoBuffer<char> _buffer(bufferSize);
-    char* buffer = _buffer;
+    char* buffer = _buffer.data();

    // write header;
    const int code = ((mode == PXM_TYPE_PBM) ? 1 : (mode == PXM_TYPE_PGM) ? 2 : 3)
--- a/modules/imgcodecs/src/grfmt_sunras.cpp
+++ b/modules/imgcodecs/src/grfmt_sunras.cpp
@ -174,7 +174,7 @@ bool  SunRasterDecoder::readData( Mat& img )
        return false;

    AutoBuffer<uchar> _src(src_pitch + 32);
-    uchar* src = _src;
+    uchar* src = _src.data();

    if( !color && m_maptype == RMT_EQUAL_RGB )
        CvtPaletteToGray( m_palette, gray_palette, 1 << m_bpp );
--- a/modules/imgcodecs/src/grfmt_tiff.cpp
+++ b/modules/imgcodecs/src/grfmt_tiff.cpp
@ -355,7 +355,7 @@ bool  TiffDecoder::readData( Mat& img )
            }
            const size_t buffer_size = (bpp/bitsPerByte) * ncn * tile_height0 * tile_width0;
            AutoBuffer<uchar> _buffer( buffer_size );
-            uchar* buffer = _buffer;
+            uchar* buffer = _buffer.data();
            ushort* buffer16 = (ushort*)buffer;
            float* buffer32 = (float*)buffer;
            double* buffer64 = (double*)buffer;
@ -834,7 +834,7 @@ bool TiffEncoder::writeLibTiff( const std::vector<Mat>& img_vec, const std::vect
        // row buffer, because TIFFWriteScanline modifies the original data!
        size_t scanlineSize = TIFFScanlineSize(pTiffHandle);
        AutoBuffer<uchar> _buffer(scanlineSize + 32);
-        uchar* buffer = _buffer;
+        uchar* buffer = _buffer.data();
        if (!buffer)
        {
            TIFFClose(pTiffHandle);
--- a/modules/imgproc/src/approx.cpp
+++ b/modules/imgproc/src/approx.cpp
@ -63,7 +63,7 @@ CvSeq* icvApproximateChainTC89( CvChain* chain, int header_size,
    cv::AutoBuffer<_CvPtInfo> buf(chain->total + 8);

    _CvPtInfo       temp;
-    _CvPtInfo       *array = buf, *first = 0, *current = 0, *prev_current = 0;
+    _CvPtInfo       *array = buf.data(), *first = 0, *current = 0, *prev_current = 0;
    int             i, j, i1, i2, s, len;
    int             count = chain->total;

@ -475,14 +475,14 @@ namespace cv

 template<typename T> static int
 approxPolyDP_( const Point_<T>* src_contour, int count0, Point_<T>* dst_contour,
-              bool is_closed0, double eps, AutoBuffer<Range>* _stack )
+              bool is_closed0, double eps, AutoBuffer<Range>& _stack )
 {
    #define PUSH_SLICE(slice) \
        if( top >= stacksz ) \
        { \
-            _stack->resize(stacksz*3/2); \
-            stack = *_stack; \
-            stacksz = _stack->size(); \
+            _stack.resize(stacksz*3/2); \
+            stack = _stack.data(); \
+            stacksz = _stack.size(); \
        } \
        stack[top++] = slice

@ -504,8 +504,8 @@ approxPolyDP_( const Point_<T>* src_contour, int count0, Point_<T>* dst_contour,
    int             i = 0, j, pos = 0, wpos, count = count0, new_count=0;
    int             is_closed = is_closed0;
    bool            le_eps = false;
-    size_t top = 0, stacksz = _stack->size();
-    Range*          stack = *_stack;
+    size_t top = 0, stacksz = _stack.size();
+    Range*          stack = _stack.data();

    if( count == 0  )
        return 0;
@ -689,13 +689,13 @@ void cv::approxPolyDP( InputArray _curve, OutputArray _approxCurve,

    AutoBuffer<Point> _buf(npoints);
    AutoBuffer<Range> _stack(npoints);
-    Point* buf = _buf;
+    Point* buf = _buf.data();
    int nout = 0;

    if( depth == CV_32S )
-        nout = approxPolyDP_(curve.ptr<Point>(), npoints, buf, closed, epsilon, &_stack);
+        nout = approxPolyDP_(curve.ptr<Point>(), npoints, buf, closed, epsilon, _stack);
    else if( depth == CV_32F )
-        nout = approxPolyDP_(curve.ptr<Point2f>(), npoints, (Point2f*)buf, closed, epsilon, &_stack);
+        nout = approxPolyDP_(curve.ptr<Point2f>(), npoints, (Point2f*)buf, closed, epsilon, _stack);
    else
        CV_Error( CV_StsUnsupportedFormat, "" );

@ -783,7 +783,7 @@ cvApproxPoly( const void* array, int header_size,
            {
            int npoints = src_seq->total, nout = 0;
            _buf.allocate(npoints*2);
-            cv::Point *src = _buf, *dst = src + npoints;
+            cv::Point *src = _buf.data(), *dst = src + npoints;
            bool closed = CV_IS_SEQ_CLOSED(src_seq);

            if( src_seq->first->next == src_seq->first )
@ -792,10 +792,10 @@ cvApproxPoly( const void* array, int header_size,
                cvCvtSeqToArray(src_seq, src);

            if( CV_SEQ_ELTYPE(src_seq) == CV_32SC2 )
-                nout = cv::approxPolyDP_(src, npoints, dst, closed, parameter, &stack);
+                nout = cv::approxPolyDP_(src, npoints, dst, closed, parameter, stack);
            else if( CV_SEQ_ELTYPE(src_seq) == CV_32FC2 )
                nout = cv::approxPolyDP_((cv::Point2f*)src, npoints,
-                                         (cv::Point2f*)dst, closed, parameter, &stack);
+                                         (cv::Point2f*)dst, closed, parameter, stack);
            else
                CV_Error( CV_StsUnsupportedFormat, "" );

--- a/modules/imgproc/src/canny.cpp
+++ b/modules/imgproc/src/canny.cpp
@ -390,21 +390,21 @@ public:
        {
            dxMax.allocate(2 * dx.cols);
            dyMax.allocate(2 * dy.cols);
-            _dx_a = (short*)dxMax;
+            _dx_a = dxMax.data();
            _dx_n = _dx_a + dx.cols;
-            _dy_a = (short*)dyMax;
+            _dy_a = dyMax.data();
            _dy_n = _dy_a + dy.cols;
        }

        // _mag_p: previous row, _mag_a: actual row, _mag_n: next row
 #if CV_SIMD128
        AutoBuffer<int> buffer(3 * (mapstep * cn + CV_MALLOC_SIMD128));
-        _mag_p = alignPtr((int*)buffer + 1, CV_MALLOC_SIMD128);
+        _mag_p = alignPtr(buffer.data() + 1, CV_MALLOC_SIMD128);
        _mag_a = alignPtr(_mag_p + mapstep * cn, CV_MALLOC_SIMD128);
        _mag_n = alignPtr(_mag_a + mapstep * cn, CV_MALLOC_SIMD128);
 #else
        AutoBuffer<int> buffer(3 * (mapstep * cn));
-        _mag_p = (int*)buffer + 1;
+        _mag_p = buffer.data() + 1;
        _mag_a = _mag_p + mapstep * cn;
        _mag_n = _mag_a + mapstep * cn;
 #endif
--- a/modules/imgproc/src/clahe.cpp
+++ b/modules/imgproc/src/clahe.cpp
@ -230,7 +230,7 @@ namespace
            src_(src), dst_(dst), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY)
        {
            buf.allocate(src.cols << 2);
-            ind1_p = (int *)buf;
+            ind1_p = buf.data();
            ind2_p = ind1_p + src.cols;
            xa_p = (float *)(ind2_p + src.cols);
            xa1_p = xa_p + src.cols;
--- a/modules/imgproc/src/color_lab.cpp
+++ b/modules/imgproc/src/color_lab.cpp
@ -1398,7 +1398,7 @@ static LABLUVLUT_s16_t initLUTforLABLUVs16(const softfloat & un, const softfloat
                for (int p_ = 0; p_ < 2; ++p_)
                    for (int q_ = 0; q_ < 2; ++q_)
                        for (int r_ = 0; r_ < 2; ++r_)
-                            fill_one(RGB2LabLUT_s16, RGB2Labprev, RGB2LuvLUT_s16, RGB2Luvprev, p, q, r, p_, q_, r_);
+                            fill_one(RGB2LabLUT_s16, RGB2Labprev.data(), RGB2LuvLUT_s16, RGB2Luvprev.data(), p, q, r, p_, q_, r_);
    LABLUVLUT_s16_t res;
    res.RGB2LabLUT_s16 = RGB2LabLUT_s16;
    res.RGB2LuvLUT_s16 = RGB2LuvLUT_s16;
--- a/modules/imgproc/src/convhull.cpp
+++ b/modules/imgproc/src/convhull.cpp
@ -147,11 +147,11 @@ void convexHull( InputArray _points, OutputArray _hull, bool clockwise, bool ret
    bool is_float = depth == CV_32F;
    AutoBuffer<Point*> _pointer(total);
    AutoBuffer<int> _stack(total + 2), _hullbuf(total);
-    Point** pointer = _pointer;
+    Point** pointer = _pointer.data();
    Point2f** pointerf = (Point2f**)pointer;
    Point* data0 = points.ptr<Point>();
-    int* stack = _stack;
-    int* hullbuf = _hullbuf;
+    int* stack = _stack.data();
+    int* hullbuf = _hullbuf.data();

    CV_Assert(points.isContinuous());

--- a/modules/imgproc/src/corner.cpp
+++ b/modules/imgproc/src/corner.cpp
@ -538,7 +538,7 @@ static bool ipp_cornerMinEigenVal( InputArray _src, OutputArray _dst, int blockS
                if (ok >= 0)
                {
                    AutoBuffer<uchar> buffer(bufferSize);
-                    ok = CV_INSTRUMENT_FUN_IPP(ippiMinEigenVal_C1R, src.ptr(), (int) src.step, dst.ptr<Ipp32f>(), (int) dst.step, srcRoi, kerType, kerSize, blockSize, buffer);
+                    ok = CV_INSTRUMENT_FUN_IPP(ippiMinEigenVal_C1R, src.ptr(), (int) src.step, dst.ptr<Ipp32f>(), (int) dst.step, srcRoi, kerType, kerSize, blockSize, buffer.data());
                    CV_SUPPRESS_DEPRECATED_START
                    if (ok >= 0) ok = CV_INSTRUMENT_FUN_IPP(ippiMulC_32f_C1IR, norm_coef, dst.ptr<Ipp32f>(), (int) dst.step, srcRoi);
                    CV_SUPPRESS_DEPRECATED_END
--- a/modules/imgproc/src/demosaicing.cpp
+++ b/modules/imgproc/src/demosaicing.cpp
@ -976,7 +976,7 @@ static void Bayer2RGB_VNG_8u( const Mat& srcmat, Mat& dstmat, int code )
    int N = size.width, N2 = N*2, N3 = N*3, N4 = N*4, N5 = N*5, N6 = N*6, N7 = N*7;
    int i, bufstep = N7*bcn;
    cv::AutoBuffer<ushort> _buf(bufstep*brows);
-    ushort* buf = (ushort*)_buf;
+    ushort* buf = _buf.data();

    bayer += bstep*2;

--- a/modules/imgproc/src/distransform.cpp
+++ b/modules/imgproc/src/distransform.cpp
@ -458,7 +458,7 @@ struct DTColumnInvoker : ParallelLoopBody
        int m = src->rows;
        size_t sstep = src->step, dstep = dst->step/sizeof(float);
        AutoBuffer<int> _d(m);
-        int* d = _d;
+        int* d = _d.data();

        for( i = i1; i < i2; i++ )
        {
@ -503,7 +503,7 @@ struct DTRowInvoker : ParallelLoopBody
        int i, i1 = range.start, i2 = range.end;
        int n = dst->cols;
        AutoBuffer<uchar> _buf((n+2)*2*sizeof(float) + (n+2)*sizeof(int));
-        float* f = (float*)(uchar*)_buf;
+        float* f = (float*)_buf.data();
        float* z = f + n;
        int* v = alignPtr((int*)(z + n + 1), sizeof(int));

@ -564,7 +564,7 @@ trueDistTrans( const Mat& src, Mat& dst )

    cv::AutoBuffer<uchar> _buf(std::max(m*2*sizeof(float) + (m*3+1)*sizeof(int), n*2*sizeof(float)));
    // stage 1: compute 1d distance transform of each column
-    float* sqr_tab = (float*)(uchar*)_buf;
+    float* sqr_tab = (float*)_buf.data();
    int* sat_tab = cv::alignPtr((int*)(sqr_tab + m*2), sizeof(int));
    int shift = m*2;

--- a/modules/imgproc/src/drawing.cpp
+++ b/modules/imgproc/src/drawing.cpp
@ -2397,8 +2397,8 @@ void cv::fillPoly(InputOutputArray _img, InputArrayOfArrays pts,
        return;
    AutoBuffer<Point*> _ptsptr(ncontours);
    AutoBuffer<int> _npts(ncontours);
-    Point** ptsptr = _ptsptr;
-    int* npts = _npts;
+    Point** ptsptr = _ptsptr.data();
+    int* npts = _npts.data();

    for( i = 0; i < ncontours; i++ )
    {
@ -2425,8 +2425,8 @@ void cv::polylines(InputOutputArray _img, InputArrayOfArrays pts,
        return;
    AutoBuffer<Point*> _ptsptr(ncontours);
    AutoBuffer<int> _npts(ncontours);
-    Point** ptsptr = _ptsptr;
-    int* npts = _npts;
+    Point** ptsptr = _ptsptr.data();
+    int* npts = _npts.data();

    for( i = 0; i < ncontours; i++ )
    {
--- a/modules/imgproc/src/emd.cpp
+++ b/modules/imgproc/src/emd.cpp
@ -359,7 +359,7 @@ static int icvInitEMD( const float* signature1, int size1,
    /* allocate buffers */
    _buffer.allocate(buffer_size);

-    state->buffer = buffer = _buffer;
+    state->buffer = buffer = _buffer.data();
    buffer_end = buffer + buffer_size;

    state->idx1 = (int*) buffer;
--- a/modules/imgproc/src/filter.cpp
+++ b/modules/imgproc/src/filter.cpp
@ -1444,7 +1444,7 @@ private:
                return 0;
        }
        AutoBuffer<uchar> buf(bufsz + 64);
-        uchar* bufptr = alignPtr((uchar*)buf, 32);
+        uchar* bufptr = alignPtr(buf.data(), 32);
        int step = (int)(width*sizeof(dst[0])*cn);
        float borderValue[] = {0.f, 0.f, 0.f};
        // here is the trick. IPP needs border type and extrapolates the row. We did it already.
--- a/modules/imgproc/src/geometry.cpp
+++ b/modules/imgproc/src/geometry.cpp
@ -524,7 +524,7 @@ float cv::intersectConvexConvex( InputArray _p1, InputArray _p2, OutputArray _p1
    }

    AutoBuffer<Point2f> _result(n*2 + m*2 + 1);
-    Point2f *fp1 = _result, *fp2 = fp1 + n;
+    Point2f *fp1 = _result.data(), *fp2 = fp1 + n;
    Point2f* result = fp2 + m;
    int orientation = 0;

--- a/modules/imgproc/src/hough.cpp
+++ b/modules/imgproc/src/hough.cpp
@ -165,11 +165,11 @@ HoughLinesStandard( InputArray src, OutputArray lines, int type,
    AutoBuffer<float> _tabSin(numangle);
    AutoBuffer<float> _tabCos(numangle);
    int *accum = _accum.ptr<int>();
-    float *tabSin = _tabSin, *tabCos = _tabCos;
+    float *tabSin = _tabSin.data(), *tabCos = _tabCos.data();

    // create sin and cos table
    createTrigTable( numangle, min_theta, theta,
-                     irho, tabSin, tabCos );
+                     irho, tabSin, tabCos);

    // stage 1. fill accumulator
    for( i = 0; i < height; i++ )
@ -963,7 +963,7 @@ void HoughLinesPointSet( InputArray _point, OutputArray _lines, int lines_max, i
    AutoBuffer<float> _tabSin(numangle);
    AutoBuffer<float> _tabCos(numangle);
    int *accum = _accum.ptr<int>();
-    float *tabSin = _tabSin, *tabCos = _tabCos;
+    float *tabSin = _tabSin.data(), *tabCos = _tabCos.data();

    // create sin and cos table
    createTrigTable( numangle, min_theta, theta_step,
@ -1408,8 +1408,8 @@ protected:
        int nBins = cvRound((maxRadius - minRadius)/dr*nBinsPerDr);
        AutoBuffer<int> bins(nBins);
        AutoBuffer<float> distBuf(nzSz), distSqrtBuf(nzSz);
-        float *ddata = distBuf;
-        float *dSqrtData = distSqrtBuf;
+        float *ddata = distBuf.data();
+        float *dSqrtData = distSqrtBuf.data();

        bool singleThread = (boundaries == Range(0, centerSz));
        int i = boundaries.start;
@ -1434,7 +1434,7 @@ protected:
                Mat_<float> distSqrtMat(1, nzCount, dSqrtData);
                sqrt(distMat, distSqrtMat);

-                memset(bins, 0, sizeof(bins[0])*bins.size());
+                memset(bins.data(), 0, sizeof(bins[0])*bins.size());
                for(int k = 0; k < nzCount; k++)
                {
                    int bin = std::max(0, std::min(nBins-1, cvRound((dSqrtData[k] - minRadius)/dr*nBinsPerDr)));
--- a/modules/imgproc/src/imgwarp.cpp
+++ b/modules/imgproc/src/imgwarp.cpp
@ -228,7 +228,7 @@ static const void* initInterTab2D( int method, bool fixpt )
    {
        AutoBuffer<float> _tab(8*INTER_TAB_SIZE);
        int i, j, k1, k2;
-        initInterTab1D(method, _tab, INTER_TAB_SIZE);
+        initInterTab1D(method, _tab.data(), INTER_TAB_SIZE);
        for( i = 0; i < INTER_TAB_SIZE; i++ )
            for( j = 0; j < INTER_TAB_SIZE; j++, tab += ksize*ksize, itab += ksize*ksize )
            {
--- a/modules/imgproc/src/linefit.cpp
+++ b/modules/imgproc/src/linefit.cpp
@ -360,7 +360,7 @@ static void fitLine2D( const Point2f * points, int count, int dist,
    }

    AutoBuffer<float> wr(count*2);
-    float *w = wr, *r = w + count;
+    float *w = wr.data(), *r = w + count;

    for( k = 0; k < 20; k++ )
    {
@ -495,7 +495,7 @@ static void fitLine3D( Point3f * points, int count, int dist,
    }

    AutoBuffer<float> buf(count*2);
-    float *w = buf, *r = w + count;
+    float *w = buf.data(), *r = w + count;

    for( k = 0; k < 20; k++ )
    {
--- a/Show More
+++ b/Show More