diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt
index 7e7a47199..8b7cdfbbb 100644
--- a/modules/dnn/CMakeLists.txt
+++ b/modules/dnn/CMakeLists.txt
@@ -29,6 +29,10 @@ else()
   )
 endif()
 
+if(ANDROID)
+  add_definitions(-DDISABLE_POSIX_MEMALIGN -DTH_DISABLE_HEAP_TRACKING)
+endif()
+
 # ----------------------------------------------------------------------------
 # Resolve libprotobuf dependency
 # ----------------------------------------------------------------------------
@@ -55,7 +59,7 @@ endif()
 # ----------------------------------------------------------------------------
 # Torch7 importer of blobs and models, produced by Torch.nn module
 # ----------------------------------------------------------------------------
-OCV_OPTION(${the_module}_BUILD_TORCH_IMPORTER "Build Torch model importer (experimental functionality!)" OFF)
+OCV_OPTION(${the_module}_BUILD_TORCH_IMPORTER "Build Torch model importer" ON)
 if(${the_module}_BUILD_TORCH_IMPORTER)
     add_definitions(-DENABLE_TORCH_IMPORTER=1)
     ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4702 /wd4127 /wd4267) #supress warnings in original torch files
diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp
index ace63ddfe..9d26b35e7 100644
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@@ -209,7 +209,7 @@ namespace dnn
     {
     public:
 
-        CV_PROP_RW Size kernel, stride, pad, dilation;
+        CV_PROP_RW Size kernel, stride, pad, dilation, adjustPad;
         CV_PROP_RW String padMode;
     };
 
@@ -224,7 +224,7 @@ namespace dnn
     {
     public:
 
-        static CV_WRAP Ptr<BaseConvolutionLayer> create(Size kernel = Size(3, 3), Size stride = Size(1, 1), Size pad = Size(0, 0), Size dilation = Size(1, 1));
+        static CV_WRAP Ptr<BaseConvolutionLayer> create(Size kernel = Size(3, 3), Size stride = Size(1, 1), Size pad = Size(0, 0), Size dilation = Size(1, 1), Size adjustPad = Size());
     };
 
     class CV_EXPORTS_W LRNLayer : public Layer
@@ -341,6 +341,12 @@ namespace dnn
         static CV_WRAP Ptr<ReLULayer> create(double negativeSlope = 0);
     };
 
+    class CV_EXPORTS_W ChannelsPReLULayer : public Layer
+    {
+    public:
+        static CV_WRAP Ptr<ChannelsPReLULayer> create();
+    };
+
     class CV_EXPORTS_W TanHLayer : public Layer
     {
     public:
@@ -397,6 +403,18 @@ namespace dnn
         static Ptr<EltwiseLayer> create(EltwiseOp op, const std::vector<int> &coeffs);
     };
 
+    class CV_EXPORTS_W BatchNormLayer : public Layer
+    {
+    public:
+        static CV_WRAP Ptr<BatchNormLayer> create(float eps, bool has_weights, bool has_bias);
+    };
+
+    class CV_EXPORTS_W MaxUnpoolLayer : public Layer
+    {
+    public:
+        static CV_WRAP Ptr<MaxUnpoolLayer> create(Size unpoolSize);
+    };
+
 //! @}
 //! @}
 
diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp
index ca4a6abd5..41d975bdf 100644
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@@ -270,6 +270,9 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
          */
         CV_WRAP Blob getParam(LayerId layer, int numParam = 0);
 
+        /** @brief Returns indexes of layers with unconnected outputs.
+         */
+        CV_WRAP std::vector<int> getUnconnectedOutLayers() const;
     private:
 
         struct Impl;
diff --git a/modules/dnn/samples/torch_enet.cpp b/modules/dnn/samples/torch_enet.cpp
new file mode 100644
index 000000000..74939a26a
--- /dev/null
+++ b/modules/dnn/samples/torch_enet.cpp
@@ -0,0 +1,176 @@
+/*
+Sample of using OpenCV dnn module with Torch ENet model.
+*/
+
+#include <opencv2/dnn.hpp>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/highgui.hpp>
+using namespace cv;
+using namespace cv::dnn;
+
+#include <fstream>
+#include <iostream>
+#include <cstdlib>
+#include <sstream>
+using namespace std;
+
+const String keys =
+        "{help h    || Sample app for loading ENet Torch model. "
+                       "The model and class names list can be downloaded here: "
+                       "https://www.dropbox.com/sh/dywzk3gyb12hpe5/AAD5YkUa8XgMpHs2gCRgmCVCa }"
+        "{model m   || path to Torch .net model file (model_best.net) }"
+        "{image i   || path to image file }"
+        "{i_blob    | .0 | input blob name) }"
+        "{o_blob    || output blob name) }"
+        "{c_names c || path to file with classnames for channels (categories.txt) }"
+        "{result r  || path to save output blob (optional, binary format, NCHW order) }"
+        ;
+
+std::vector<String> readClassNames(const char *filename);
+
+int main(int argc, char **argv)
+{
+    cv::CommandLineParser parser(argc, argv, keys);
+
+    if (parser.has("help"))
+    {
+        parser.printMessage();
+        return 0;
+    }
+
+    String modelFile = parser.get<String>("model");
+    String imageFile = parser.get<String>("image");
+    String inBlobName = parser.get<String>("i_blob");
+    String outBlobName = parser.get<String>("o_blob");
+
+    if (!parser.check())
+    {
+        parser.printErrors();
+        return 0;
+    }
+
+    String classNamesFile = parser.get<String>("c_names");
+    String resultFile = parser.get<String>("result");
+
+    //! [Create the importer of TensorFlow model]
+    Ptr<dnn::Importer> importer;
+    try                                     //Try to import TensorFlow AlexNet model
+    {
+        importer = dnn::createTorchImporter(modelFile);
+    }
+    catch (const cv::Exception &err)        //Importer can throw errors, we will catch them
+    {
+        std::cerr << err.msg << std::endl;
+    }
+    //! [Create the importer of Caffe model]
+
+    if (!importer)
+    {
+        std::cerr << "Can't load network by using the mode file: " << std::endl;
+        std::cerr << modelFile << std::endl;
+        exit(-1);
+    }
+
+    //! [Initialize network]
+    dnn::Net net;
+    importer->populateNet(net);
+    importer.release();                     //We don't need importer anymore
+    //! [Initialize network]
+
+    //! [Prepare blob]
+    Mat img = imread(imageFile);
+    if (img.empty())
+    {
+        std::cerr << "Can't read image from the file: " << imageFile << std::endl;
+        exit(-1);
+    }
+
+    cv::Size inputImgSize = cv::Size(512, 512);
+
+    if (inputImgSize != img.size())
+        resize(img, img, inputImgSize);       //Resize image to input size
+
+    if(img.channels() == 3)
+        cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
+
+    img.convertTo(img, CV_32F, 1/255.0);
+
+    dnn::Blob inputBlob = dnn::Blob::fromImages(img);   //Convert Mat to dnn::Blob image batch
+    //! [Prepare blob]
+
+    //! [Set input blob]
+    net.setBlob(inBlobName, inputBlob);        //set the network input
+    //! [Set input blob]
+
+    cv::TickMeter tm;
+    tm.start();
+
+    //! [Make forward pass]
+    net.forward();                          //compute output
+    //! [Make forward pass]
+
+    tm.stop();
+
+    //! [Gather output]
+    dnn::Blob prob = net.getBlob(outBlobName);   //gather output of "prob" layer
+
+    Mat& result = prob.matRef();
+
+    BlobShape shape = prob.shape();
+
+    if (!resultFile.empty()) {
+        CV_Assert(result.isContinuous());
+
+        ofstream fout(resultFile.c_str(), ios::out | ios::binary);
+        fout.write((char*)result.data, result.total() * sizeof(float));
+        fout.close();
+    }
+
+    std::cout << "Output blob shape " << shape  << std::endl;
+    std::cout << "Inference time, ms: " << tm.getTimeMilli()  << std::endl;
+
+    std::vector<String> classNames;
+    if(!classNamesFile.empty()) {
+        classNames = readClassNames(classNamesFile.c_str());
+        if (classNames.size() > prob.channels())
+            classNames = std::vector<String>(classNames.begin() + classNames.size() - prob.channels(),
+                                             classNames.end());
+    }
+
+    for(int i_c = 0; i_c < prob.channels(); i_c++) {
+        ostringstream convert;
+        convert << "Channel #" << i_c;
+
+        if(classNames.size() == prob.channels())
+            convert << ": " << classNames[i_c];
+
+        imshow(convert.str().c_str(), prob.getPlane(0, i_c));
+    }
+    waitKey();
+
+    return 0;
+} //main
+
+
+std::vector<String> readClassNames(const char *filename)
+{
+    std::vector<String> classNames;
+
+    std::ifstream fp(filename);
+    if (!fp.is_open())
+    {
+        std::cerr << "File with classes labels not found: " << filename << std::endl;
+        exit(-1);
+    }
+
+    std::string name;
+    while (!fp.eof())
+    {
+        std::getline(fp, name);
+        if (name.length())
+            classNames.push_back(name);
+    }
+
+    fp.close();
+    return classNames;
+}
diff --git a/modules/dnn/src/caffe/layer_loaders.cpp b/modules/dnn/src/caffe/layer_loaders.cpp
index 40a7e567a..8b3372003 100644
--- a/modules/dnn/src/caffe/layer_loaders.cpp
+++ b/modules/dnn/src/caffe/layer_loaders.cpp
@@ -23,6 +23,9 @@ static void initConvDeconvLayerFromCaffe(Ptr<BaseConvolutionLayer> l, LayerParam
     int numOutput = params.get<int>("num_output");
     int group = params.get<int>("group", 1);
 
+    l->adjustPad.height = params.get<int>("adj_h", 0);
+    l->adjustPad.width = params.get<int>("adj_w", 0);
+
     CV_Assert(numOutput % group == 0);
     CV_Assert((bias && l->blobs.size() == 2) || (!bias && l->blobs.size() == 1));
 }
@@ -40,6 +43,7 @@ Ptr<Layer> createLayerFromCaffe<DeconvolutionLayer>(LayerParams &params)
 {
     Ptr<BaseConvolutionLayer> l = DeconvolutionLayer::create();
     initConvDeconvLayerFromCaffe(l, params);
+
     return Ptr<Layer>(l);
 }
 
@@ -248,7 +252,7 @@ Ptr<Layer> createLayerFromCaffe<CropLayer>(LayerParams& params)
     return Ptr<Layer>(CropLayer::create(start_axis, offset));
 }
 
-template<> //Power specialization
+template<> //Eltwise specialization
 Ptr<Layer> createLayerFromCaffe<EltwiseLayer>(LayerParams& params)
 {
     EltwiseLayer::EltwiseOp op = EltwiseLayer::SUM;
@@ -278,6 +282,42 @@ Ptr<Layer> createLayerFromCaffe<EltwiseLayer>(LayerParams& params)
     return Ptr<Layer>(EltwiseLayer::create(op, coeffs));
 }
 
+template<> //BatchNormLayer specialization
+Ptr<Layer> createLayerFromCaffe<BatchNormLayer>(LayerParams& params)
+{
+    const std::vector<Blob> &blobs = params.blobs;
+    CV_Assert(blobs.size() == 4);
+
+    float eps = params.get<float>("eps");
+    bool hasWeights = params.get<bool>("has_weight", false);
+    bool hasBias = params.get<bool>("has_bias", false);
+
+    Ptr<BatchNormLayer> l = BatchNormLayer::create(eps, hasWeights, hasBias);
+    l->setParamsFrom(params);
+
+    return Ptr<Layer>(l);
+}
+
+template<> //ChannelsPReLULayer specialization
+Ptr<Layer> createLayerFromCaffe<ChannelsPReLULayer>(LayerParams& params)
+{
+   CV_Assert(params.blobs.size() == 1);
+   Ptr<ChannelsPReLULayer> l = ChannelsPReLULayer::create();
+   l->setParamsFrom(params);
+
+   return Ptr<Layer>(l);
+}
+
+template<> //MaxUnpoolLayer specialization
+Ptr<Layer> createLayerFromCaffe<MaxUnpoolLayer>(LayerParams& params)
+{
+   Size outSize(params.get<int>("out_w"),
+                params.get<int>("out_h"));
+   Ptr<MaxUnpoolLayer> l = MaxUnpoolLayer::create(outSize);
+
+   return Ptr<Layer>(l);
+}
+
 //Explicit instantiation
 template Ptr<Layer> createLayerFromCaffe<ConvolutionLayer>(LayerParams&);
 template Ptr<Layer> createLayerFromCaffe<DeconvolutionLayer>(LayerParams&);
@@ -299,6 +339,9 @@ template Ptr<Layer> createLayerFromCaffe<PowerLayer>(LayerParams&);
 
 template Ptr<Layer> createLayerFromCaffe<CropLayer>(LayerParams&);
 template Ptr<Layer> createLayerFromCaffe<EltwiseLayer>(LayerParams&);
+template Ptr<Layer> createLayerFromCaffe<BatchNormLayer>(LayerParams&);
+template Ptr<Layer> createLayerFromCaffe<ChannelsPReLULayer>(LayerParams&);
+template Ptr<Layer> createLayerFromCaffe<MaxUnpoolLayer>(LayerParams&);
 
 }
 }
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp
index 5c009272f..9d9748df8 100644
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -592,6 +592,24 @@ bool Net::empty() const
     return impl->layers.size() <= 1; //first layer is default Data layer
 }
 
+std::vector<int> Net::getUnconnectedOutLayers() const
+{
+    std::vector<int> layersIds;
+
+    Impl::MapIdToLayerData::iterator it;
+    for (it = impl->layers.begin(); it != impl->layers.end(); it++)
+    {
+        int lid = it->first;
+        LayerData &ld = it->second;
+
+        if (ld.requiredOutputs.size() == 0)
+            layersIds.push_back(lid);
+    }
+
+    return layersIds;
+}
+
+
 //////////////////////////////////////////////////////////////////////////
 
 Importer::~Importer() {}
diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp
index 5b675bdef..c10b21e62 100644
--- a/modules/dnn/src/init.cpp
+++ b/modules/dnn/src/init.cpp
@@ -51,6 +51,7 @@
 #include "layers/detection_output_layer.hpp"
 #include "layers/normalize_bbox_layer.hpp"
 #include "layers/shift_layer.hpp"
+#include "layers/padding_layer.hpp"
 
 namespace cv
 {
@@ -89,11 +90,14 @@ void initModule()
     REG_RUNTIME_LAYER_FUNC(MVN,             createLayerFromCaffe<MVNLayer>);
 
     REG_RUNTIME_LAYER_FUNC(ReLU,            createLayerFromCaffe<ReLULayer>);
+    REG_RUNTIME_LAYER_FUNC(ChannelsPReLU,   createLayerFromCaffe<ChannelsPReLULayer>);
     REG_RUNTIME_LAYER_FUNC(Sigmoid,         createLayerFromCaffe<SigmoidLayer>);
     REG_RUNTIME_LAYER_FUNC(TanH,            createLayerFromCaffe<TanHLayer>);
     REG_RUNTIME_LAYER_FUNC(BNLL,            createLayerFromCaffe<BNLLLayer>);
     REG_RUNTIME_LAYER_FUNC(AbsVal,          createLayerFromCaffe<AbsLayer>);
     REG_RUNTIME_LAYER_FUNC(Power,           createLayerFromCaffe<PowerLayer>);
+    REG_RUNTIME_LAYER_FUNC(BatchNorm,       createLayerFromCaffe<BatchNormLayer>);
+    REG_RUNTIME_LAYER_FUNC(MaxUnpool,       createLayerFromCaffe<MaxUnpoolLayer>);
     REG_RUNTIME_LAYER_CLASS(Dropout,        BlankLayer);
     REG_RUNTIME_LAYER_CLASS(Identity,       BlankLayer);
 
@@ -104,6 +108,7 @@ void initModule()
     REG_RUNTIME_LAYER_CLASS(DetectionOutput, DetectionOutputLayer);
     REG_RUNTIME_LAYER_CLASS(NormalizeBBox,  NormalizeBBoxLayer);
     REG_RUNTIME_LAYER_CLASS(Shift,          ShiftLayer);
+    REG_RUNTIME_LAYER_CLASS(Padding,        PaddingLayer);
 
     init.status = true;
 }
diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp
new file mode 100644
index 000000000..e2a651d66
--- /dev/null
+++ b/modules/dnn/src/layers/batch_norm_layer.cpp
@@ -0,0 +1,69 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2016, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+/*
+Implementation of Batch Normalization layer.
+*/
+
+#include "batch_norm_layer.hpp"
+
+namespace cv
+{
+namespace dnn
+{
+
+BatchNormLayerImpl::BatchNormLayerImpl(float eps_, bool hasWeights_, bool hasBias_):
+    eps(eps_),
+    hasWeights(hasWeights_),
+    hasBias(hasBias_)
+{}
+
+void BatchNormLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+{
+    CV_Assert(blobs.size() == 4);
+
+    outputs.resize(inputs.size());
+    for (size_t i = 0; i < inputs.size(); i++)
+    {
+        outputs[i].create(inputs[i]->shape());
+    }
+}
+
+void BatchNormLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+{
+    CV_Assert(inputs.size() == 1);
+
+    Blob &inpBlob = *inputs[0];
+
+    for (size_t ii = 0; ii < outputs.size(); ii++)
+    {
+      Blob &outBlob = outputs[ii];
+
+      if (hasWeights)
+        CV_Assert(inpBlob.channels() == blobs[2].total());
+
+      if (hasBias)
+        CV_Assert(inpBlob.channels() == blobs[3].total());
+
+      for (int n = 0; n < inpBlob.channels(); n++)
+      {
+          float mean = blobs[0].matRefConst().at<float>(n);
+          float invstd = 1 / sqrt(blobs[1].matRefConst().at<float>(n) + eps);
+          float w = hasWeights ? blobs[2].matRefConst().at<float>(n) : 1;
+          float b = hasBias ? blobs[3].matRefConst().at<float>(n) : 0;
+          outBlob.getPlane(0, n) = (inpBlob.getPlane(0, n) - mean)*(w*invstd) + b;
+      }
+    }
+}
+
+Ptr<BatchNormLayer> BatchNormLayer::create(float eps, bool has_weights, bool has_bias)
+{
+    return Ptr<BatchNormLayer>(new BatchNormLayerImpl(eps, has_weights, has_bias));
+}
+
+}  // namespace dnn
+}  // namespace cv
diff --git a/modules/dnn/src/layers/batch_norm_layer.hpp b/modules/dnn/src/layers/batch_norm_layer.hpp
new file mode 100644
index 000000000..ebc69a336
--- /dev/null
+++ b/modules/dnn/src/layers/batch_norm_layer.hpp
@@ -0,0 +1,37 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2016, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+/*
+Declaration of Batch Normalization layer.
+*/
+
+#ifndef __OPENCV_DNN_LAYERS_BATCH_NORM_LAYER_HPP__
+#define __OPENCV_DNN_LAYERS_BATCH_NORM_LAYER_HPP__
+#include <opencv2/dnn/all_layers.hpp>
+
+namespace cv
+{
+namespace dnn
+{
+
+class BatchNormLayerImpl : public BatchNormLayer
+{
+public:
+    BatchNormLayerImpl(float eps_, bool hasWeights_, bool hasBias_);
+
+    void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+
+    void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+
+private:
+    float eps;
+    bool hasWeights, hasBias;
+};
+
+}
+}
+#endif // BATCH_NORM_LAYER_HPP
diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
index d10df0131..8a3dafabd 100644
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -53,12 +53,14 @@ namespace cv
 namespace dnn
 {
 
-ConvolutionLayerImpl::ConvolutionLayerImpl()
+BaseConvolutionLayerImpl::BaseConvolutionLayerImpl():
+    numOutput(-1), group(-1),
+    inpH(0), inpW(0), inpCn(0),
+    outH(0), outW(0), outCn(0),
+    inpGroupCn(0), outGroupCn(0),
+    ksize(0), colBlobCols(0),
+    bias(false), tryUseOpenCL(false)
 {
-    tryUseOpenCL = false; //true;
-    numOutput = -1;
-    group = -1;
-
     #if HAVE_CBLAS
         if (getBlasThreads() != cv::getThreadNum())
         {
@@ -67,37 +69,23 @@ ConvolutionLayerImpl::ConvolutionLayerImpl()
     #endif
 }
 
-void ConvolutionLayerImpl::init()
+void BaseConvolutionLayerImpl::init()
 {
-    CV_Assert(1 <= blobs.size() && blobs.size() <= 2);
-
-    bias = (blobs.size() >= 2);
-    numOutput = blobs[0].num();
-
+    CV_Assert(blobs.size() >= 1 && blobs.size() <= 2);
     CV_Assert(blobs[0].dims() == 4 && blobs[0].cols() == kernel.width && blobs[0].rows() == kernel.height);
-    CV_Assert(!bias || blobs[1].total() == (size_t)blobs[0].num());
 
-    //TODO: dilation in OCL mode
+    bias = (blobs.size() >= 2);
     useOpenCL = ocl::useOpenCL() && tryUseOpenCL && dilation == Size(1, 1);
 }
 
-void ConvolutionLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+void BaseConvolutionLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
 {
+    CV_Assert(inputs.size() > 0);
+
     init();
 
-    CV_Assert(inputs.size() > 0);
     const Blob &input = *inputs[0];
     CV_Assert(input.dims() == 4 && (input.type() == CV_32F || input.type() == CV_64F));
-    computeInpOutShape(input);
-
-    group = inpCn / blobs[0].channels();
-    CV_Assert(inpCn % group == 0 && outCn % group == 0);
-    CV_Assert(blobs[0].num() == outCn && blobs[0].channels() == inpCn / group);
-
-    outGroupCn = outCn / group;
-    inpGroupCn = inpCn / group;
-    ksize = inpGroupCn * kernel.height * kernel.width;
-
     for (size_t i = 0; i < inputs.size(); i++)
     {
         CV_Assert(inputs[i]->type() == input.type());
@@ -105,36 +93,73 @@ void ConvolutionLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vecto
         CV_Assert(inputs[i]->rows() == input.rows() && inputs[i]->cols() == input.cols());
     }
 
-    int allocFlags = useOpenCL ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT;
+    computeInpOutShape(input);
 
-    if (!is1x1())
-    {
-        colBlob.create(Shape(ksize, outH * outW), input.type(), allocFlags);
-    }
+    int allocFlags = useOpenCL ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT;
 
     if (bias)
     {
-        biasOnesBlob.create(Shape(1, topH * topW), input.type(), allocFlags);
+        biasOnesBlob.create(Shape(1, outH * outW), input.type(), allocFlags);
         biasOnesBlob.setTo(1);
     }
 
     outputs.resize(inputs.size());
     for (size_t i = 0; i < inputs.size(); i++)
     {
-        outputs[i].create(Shape(inputs[i]->num(), topCn, topH, topW), input.type(), allocFlags);
+        outputs[i].create(Shape(inputs[i]->num(), outCn, outH, outW), input.type(), allocFlags);
+    }
+
+    if (!is1x1())
+    {
+        colBlob.create(Shape(ksize, colBlobCols), input.type(), allocFlags);
     }
 }
 
-bool ConvolutionLayerImpl::is1x1() const
+bool BaseConvolutionLayerImpl::is1x1() const
 {
     return (kernel.height == 1 && kernel.width == 1) &&
            (stride.height == 1 && stride.width == 1) &&
            (dilation.height == 1 && dilation.width == 1);
 }
 
+void ConvolutionLayerImpl::computeInpOutShape(const Blob &input)
+{
+    CV_Assert(!bias || blobs[1].total() == (size_t)blobs[0].num());
+
+    numOutput = blobs[0].num();
+
+    inpH = input.rows();
+    inpW = input.cols();
+    inpCn = input.channels();
+    outCn = numOutput;
+
+    if (padMode.empty())
+    {
+        outH = (inpH + 2 * pad.height - (dilation.height * (kernel.height - 1) + 1)) / stride.height + 1;
+        outW = (inpW + 2 * pad.width - (dilation.width * (kernel.width - 1) + 1)) / stride.width + 1;
+    }
+    else
+    {
+        getConvPoolOutParams(inpH, inpW, kernel, stride, pad, padMode, outH, outW);
+    }
+
+    group = inpCn / blobs[0].channels();
+
+    CV_Assert(inpCn % group == 0 && outCn % group == 0);
+    CV_Assert(blobs[0].num() == outCn && blobs[0].channels() == inpCn / group);
+
+    outGroupCn = outCn / group;
+    inpGroupCn = inpCn / group;
+    ksize = inpGroupCn * kernel.height * kernel.width;
+
+    colBlobCols = outH * outW;
+}
+
 template<typename XMat>
 void ConvolutionLayerImpl::forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
 {
+    CV_Assert(inputs.size() > 0);
+
     XMat weightsMat = reshaped(blobs[0].getRefConst<XMat>(), Shape(outCn, ksize));
     XMat biasesMat  = (bias) ? reshaped(blobs[1].getRefConst<XMat>(), Shape(outCn, 1)) : XMat();
 
@@ -213,44 +238,33 @@ void ConvolutionLayerImpl::im2col(const Mat &srcImg, Mat &dstCol)
     dstCol = colMat;
 }
 
-void ConvolutionLayerImpl::computeInpOutShape(const Blob &input)
-{
-    inpH = input.rows();
-    inpW = input.cols();
-    inpCn = input.channels();
-    outCn = numOutput;
-
-    if (padMode.empty())
-    {
-        outH = (inpH + 2 * pad.height - (dilation.height * (kernel.height - 1) + 1)) / stride.height + 1;
-        outW = (inpW + 2 * pad.width - (dilation.width * (kernel.width - 1) + 1)) / stride.width + 1;
-    }
-    else
-    {
-        getConvPoolOutParams(inpH, inpW, kernel, stride, pad, padMode, outH, outW);
-    }
-
-    topH = outH; topW = outW; topCn = outCn;
-}
-
 //Deconvolution
 
-DeConvolutionLayerImpl::DeConvolutionLayerImpl()
+void DeConvolutionLayerImpl::computeInpOutShape(const Blob &inpBlob)
 {
+    BlobShape bs0 = blobs[0].shape();
+    BlobShape bs1 = blobs[1].shape();
+    CV_Assert(!bias || blobs[1].total() == (size_t)blobs[0].channels());
 
-}
+    numOutput = blobs[0].channels();
 
-void DeConvolutionLayerImpl::computeInpOutShape(const Blob &inpBlob)
-{
-    outH = inpBlob.rows();
-    outW = inpBlob.cols();
-    outCn = inpBlob.channels();
+    inpH = inpBlob.rows();
+    inpW = inpBlob.cols();
+    inpCn = inpBlob.channels();
 
-    inpH = stride.height * (outH - 1) + kernel.height - 2 * pad.height;
-    inpW = stride.width * (outW - 1) + kernel.width - 2 * pad.width;
-    inpCn = numOutput;
+    outH = stride.height * (inpH - 1) + kernel.height - 2 * pad.height + adjustPad.height;
+    outW = stride.width * (inpW - 1) + kernel.width - 2 * pad.width + adjustPad.width;
+    outCn = numOutput;
 
-    topH = inpH; topW = inpW; topCn = inpCn;
+    group = inpCn / blobs[0].num();
+    outGroupCn = outCn / group;
+    inpGroupCn = inpCn / group;
+    ksize = outGroupCn * kernel.height * kernel.width;
+
+    CV_Assert(inpCn % group == 0 && outCn % group == 0);
+    CV_Assert(blobs[0].channels() == outCn && blobs[0].num() == inpCn / group);
+
+    colBlobCols = inpH * inpW;
 }
 
 void DeConvolutionLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
@@ -264,24 +278,24 @@ void DeConvolutionLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blo
 template<typename XMat>
 void DeConvolutionLayerImpl::forward_(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
 {
-    XMat weightsMat = reshaped(blobs[0].getRefConst<XMat>(), Shape(outCn, ksize));
+    XMat weightsMat = reshaped(blobs[0].getRefConst<XMat>(), Shape(inpCn, ksize));
     XMat biasesMat  = (bias) ? reshaped(blobs[1].getRefConst<XMat>(), Shape(outCn, 1)) : XMat();
 
     for (size_t ii = 0; ii < outputs.size(); ii++)
     {
         int numImg = inputs[ii]->size(0);
-        XMat convBlob = reshaped(inputs[ii]->getRefConst<XMat>(), Shape(numImg*outCn, outH*outW));
-        XMat decnBlob = reshaped(outputs[ii].getRef<XMat>(), Shape(numImg*inpCn, inpH*inpW));
+        XMat convBlob = reshaped(inputs[ii]->getRefConst<XMat>(), Shape(numImg*inpCn, inpH*inpW));
+        XMat decnBlob = reshaped(outputs[ii].getRef<XMat>(), Shape(numImg*outCn, outH*outW));
 
         for (int n = 0; n < numImg; n++)
         {
             for (int g = 0; g < group; g++)
             {
-                XMat dstMat = decnBlob.rowRange(_Range((g + n * group) * inpGroupCn, inpGroupCn));
+                XMat dstMat = decnBlob.rowRange(_Range((g + n * group) * outGroupCn, outGroupCn));
                 XMat &colMat = (is1x1()) ? dstMat : colBlob.getRef<XMat>();
 
-                XMat convMat = convBlob.rowRange(_Range((g + n * group) * outGroupCn, outGroupCn));
-                XMat wghtMat = weightsMat.rowRange(_Range(g * outGroupCn, outGroupCn));
+                XMat convMat = convBlob.rowRange(_Range((g + n * group) * inpGroupCn, inpGroupCn));
+                XMat wghtMat = weightsMat.rowRange(_Range(g * inpGroupCn, inpGroupCn));
 
                 dnn::gemm(wghtMat, convMat, 1, colMat, 0, GEMM_1_T);
 
@@ -306,7 +320,7 @@ void DeConvolutionLayerImpl::col2im(const Mat &colMat, Mat &dstImg)
         return;
     }
     if (dstImg.type() == CV_32F)
-        col2im_CpuPBody<float>::run(colMat.ptr<float>(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dstImg.ptr<float>());
+        col2im_CpuPBody<float>::run(colMat.ptr<float>(), outGroupCn, outH, outW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dstImg.ptr<float>());
     if (dstImg.type() == CV_64F)
         col2im_CpuPBody<double>::run(colMat.ptr<double>(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dstImg.ptr<double>());
 }
@@ -338,13 +352,15 @@ Ptr<BaseConvolutionLayer> ConvolutionLayer::create(Size kernel, Size stride, Siz
     return Ptr<BaseConvolutionLayer>(l);
 }
 
-Ptr<BaseConvolutionLayer> DeconvolutionLayer::create(Size kernel, Size stride, Size pad, Size dilation)
+Ptr<BaseConvolutionLayer> DeconvolutionLayer::create(Size kernel, Size stride, Size pad, Size dilation, Size adjustPad)
 {
     DeConvolutionLayerImpl *l = new DeConvolutionLayerImpl();
     l->kernel = kernel;
     l->pad = pad;
     l->stride = stride;
     l->dilation = dilation;
+    l->adjustPad = adjustPad;
+
     return Ptr<BaseConvolutionLayer>(l);
 }
 
diff --git a/modules/dnn/src/layers/convolution_layer.hpp b/modules/dnn/src/layers/convolution_layer.hpp
index 3b59bb638..f9baca2c5 100644
--- a/modules/dnn/src/layers/convolution_layer.hpp
+++ b/modules/dnn/src/layers/convolution_layer.hpp
@@ -49,30 +49,38 @@ namespace cv
 namespace dnn
 {
 
-//TODO: simultaneously convolution and bias addition for cache optimization
-class ConvolutionLayerImpl : public ConvolutionLayer
+class BaseConvolutionLayerImpl : public ConvolutionLayer
 {
 public:
-
-    ConvolutionLayerImpl();
+    BaseConvolutionLayerImpl();
     virtual void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
-    virtual void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
-    virtual void init();
 
 protected:
+    void init();
+    virtual void computeInpOutShape(const Blob &inpBlob) = 0;
+    bool is1x1() const;
+
     int numOutput, group;
     int inpH, inpW, inpCn;
     int outH, outW, outCn;
-    int topH, topW, topCn; //switched between inp/out on deconv/conv
     int inpGroupCn, outGroupCn;
     int ksize;
+    int colBlobCols;
 
     bool bias;
     bool tryUseOpenCL, useOpenCL;
 
     Blob colBlob, biasOnesBlob;
 
-    bool is1x1() const;
+};
+
+//TODO: simultaneously convolution and bias addition for cache optimization
+class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
+{
+public:
+    virtual void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+
+protected:
     virtual void computeInpOutShape(const Blob &inpBlob);
 
     template<typename XMat>
@@ -81,10 +89,9 @@ protected:
     void im2col(const UMat &srcImg, UMat &dstCol);
 };
 
-class DeConvolutionLayerImpl : public ConvolutionLayerImpl
+class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
 {
 public:
-    DeConvolutionLayerImpl();
     virtual void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
 
 protected:
diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp
index 6ed755811..11d772e51 100644
--- a/modules/dnn/src/layers/elementwise_layers.cpp
+++ b/modules/dnn/src/layers/elementwise_layers.cpp
@@ -1,5 +1,6 @@
 #include "../precomp.hpp"
 #include "elementwise_layers.hpp"
+#include "opencv2/imgproc.hpp"
 
 namespace cv
 {
@@ -42,5 +43,45 @@ Ptr<PowerLayer> PowerLayer::create(double power /*= 1*/, double scale /*= 1*/, d
     return Ptr<PowerLayer>(new ElementWiseLayer<PowerFunctor>(f));
 }
 
+////////////////////////////////////////////////////////////////////////////
+
+void ChannelsPReLULayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+{
+    CV_Assert(blobs.size() == 1);
+
+    outputs.resize(inputs.size());
+    for (size_t i = 0; i < inputs.size(); i++)
+    {
+        outputs[i].create(inputs[i]->shape());
+    }
+}
+
+void ChannelsPReLULayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+{
+    CV_Assert(inputs.size() == 1);
+
+    Blob &inpBlob = *inputs[0];
+
+    for (size_t ii = 0; ii < outputs.size(); ii++)
+    {
+        Blob &outBlob = outputs[ii];
+
+        CV_Assert(blobs[0].total() == inpBlob.channels());
+
+        for (int n = 0; n < inpBlob.channels(); n++)
+        {
+            float slopeWeight = blobs[0].matRefConst().at<float>(n);
+
+            cv::threshold(inpBlob.getPlane(0, n), outBlob.getPlane(0, n), 0, 0, cv::THRESH_TOZERO_INV);
+            outBlob.getPlane(0, n) = inpBlob.getPlane(0, n) + (slopeWeight - 1)*outBlob.getPlane(0, n);
+        }
+    }
+}
+
+Ptr<ChannelsPReLULayer> ChannelsPReLULayer::create()
+{
+    return Ptr<ChannelsPReLULayer>(new ChannelsPReLULayerImpl());
+}
+
+}
 }
-}
\ No newline at end of file
diff --git a/modules/dnn/src/layers/elementwise_layers.hpp b/modules/dnn/src/layers/elementwise_layers.hpp
index 2f67f0ae8..0331433c7 100644
--- a/modules/dnn/src/layers/elementwise_layers.hpp
+++ b/modules/dnn/src/layers/elementwise_layers.hpp
@@ -313,6 +313,16 @@ struct PowerFunctor
     #endif
 };
 
+class ChannelsPReLULayerImpl : public ChannelsPReLULayer
+{
+public:
+    ChannelsPReLULayerImpl() {}
+
+    void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+
+    void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+};
+
 }
 }
 #endif
diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp
index ecd09e738..3568ccdd4 100755
--- a/modules/dnn/src/layers/eltwise_layer.cpp
+++ b/modules/dnn/src/layers/eltwise_layer.cpp
@@ -62,7 +62,8 @@ namespace dnn
         const BlobShape &shape0 = inputs[0]->shape();
         for (size_t i = 1; i < inputs.size(); ++i)
         {
-            CV_Assert(shape0 == inputs[i]->shape());
+            BlobShape iShape = inputs[i]->shape();
+            CV_Assert(shape0 == iShape);
         }
         outputs.resize(1);
         outputs[0].create(shape0);
diff --git a/modules/dnn/src/layers/max_unpooling_layer.cpp b/modules/dnn/src/layers/max_unpooling_layer.cpp
new file mode 100644
index 000000000..67c352c8b
--- /dev/null
+++ b/modules/dnn/src/layers/max_unpooling_layer.cpp
@@ -0,0 +1,69 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2016, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+/*
+Implementation of Batch Normalization layer.
+*/
+
+#include "max_unpooling_layer.hpp"
+
+namespace cv
+{
+namespace dnn
+{
+
+MaxUnpoolLayerImpl::MaxUnpoolLayerImpl(Size outSize_):
+    outSize(outSize_)
+{}
+
+void MaxUnpoolLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+{
+    CV_Assert(inputs.size() == 2);
+
+    BlobShape outShape = inputs[0]->shape();
+    outShape[2] = outSize.height;
+    outShape[3] = outSize.width;
+
+    outputs.resize(1);
+    outputs[0].create(outShape);
+}
+
+void MaxUnpoolLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+{
+    CV_Assert(inputs.size() == 2);
+    Blob& input = *inputs[0];
+    Blob& indices = *inputs[1];
+
+    CV_Assert(input.total() == indices.total());
+    CV_Assert(input.num() == 1);
+
+    for(int i_n = 0; i_n < outputs.size(); i_n++)
+    {
+        Blob& outBlob = outputs[i_n];
+        CV_Assert(input.channels() == outBlob.channels());
+
+        for (int i_c = 0; i_c < input.channels(); i_c++)
+        {
+            Mat outPlane = outBlob.getPlane(0, i_c);
+            for(int i_wh = 0; i_wh < input.size2().area(); i_wh++)
+            {
+                int index = indices.getPlane(0, i_c).at<float>(i_wh);
+
+                CV_Assert(index < outPlane.total());
+                outPlane.at<float>(index) = input.getPlane(0, i_c).at<float>(i_wh);
+            }
+        }
+    }
+}
+
+Ptr<MaxUnpoolLayer> MaxUnpoolLayer::create(Size unpoolSize)
+{
+    return Ptr<MaxUnpoolLayer>(new MaxUnpoolLayerImpl(unpoolSize));
+}
+
+}
+}
diff --git a/modules/dnn/src/layers/max_unpooling_layer.hpp b/modules/dnn/src/layers/max_unpooling_layer.hpp
new file mode 100644
index 000000000..42c546fea
--- /dev/null
+++ b/modules/dnn/src/layers/max_unpooling_layer.hpp
@@ -0,0 +1,37 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2016, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+/*
+Declaration of MaxUnpooling layer.
+*/
+
+#ifndef __OPENCV_DNN_LAYERS_MAX_UNPOOLING_LAYER_HPP__
+#define __OPENCV_DNN_LAYERS_MAX_UNPOOLING_LAYER_HPP__
+#include "../precomp.hpp"
+#include <opencv2/dnn/all_layers.hpp>
+
+namespace cv
+{
+namespace dnn
+{
+
+class MaxUnpoolLayerImpl : public MaxUnpoolLayer
+{
+public:
+    MaxUnpoolLayerImpl(Size outSize_);
+
+    void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+
+    void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+
+private:
+    Size outSize;
+};
+
+}
+}
+#endif // __OPENCV_DNN_LAYERS_MAX_UNPOOLING_LAYER_HPP__
diff --git a/modules/dnn/src/layers/padding_layer.cpp b/modules/dnn/src/layers/padding_layer.cpp
new file mode 100644
index 000000000..2b8265aad
--- /dev/null
+++ b/modules/dnn/src/layers/padding_layer.cpp
@@ -0,0 +1,86 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2016, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+/*
+Implementation of padding layer, which adds paddings to input blob.
+*/
+
+#include "padding_layer.hpp"
+#include <vector>
+
+namespace cv
+{
+namespace dnn
+{
+
+PaddingLayer::PaddingLayer(LayerParams &params)
+{
+    paddingDim = params.get<int>("padding_dim");
+    padding = abs(params.get<int>("padding"));
+    inputDims = params.get<int>("input_dims", 0);
+    index = params.get<int>("index", 0);
+    paddingValue = params.get<double>("value", 0);
+
+    if(paddingDim < 0 || padding < 0)
+        CV_Error(cv::Error::StsNotImplemented, "Negative padding and dim aren't supported");
+}
+
+void PaddingLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+{
+    outputs.resize(inputs.size());
+    for(int i = 0; i < inputs.size(); i++)
+    {
+        BlobShape shape = inputs[i]->shape();
+        int dim = getPadDim(shape);
+        CV_Assert(dim < shape.dims());
+
+        shape[dim] += padding;
+        outputs[i].create(shape);
+    }
+}
+
+void PaddingLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+{
+    for(int i = 0; i < inputs.size(); i++)
+    {
+        outputs[i].matRef() = paddingValue;
+        BlobShape inShape = inputs[i]->shape();
+        BlobShape outShape = outputs[i].shape();
+        int dim = getPadDim(inShape);
+
+        int actualIndex = index;
+        if(index == 0)
+            actualIndex = inShape[dim];
+
+        std::vector<std::pair<Range, Range> > srcDstRanges;
+        srcDstRanges.push_back(std::make_pair(Range(0, actualIndex), Range(0, actualIndex)));
+        srcDstRanges.push_back(std::make_pair(Range(actualIndex, inShape[dim]),
+                                              Range(actualIndex + padding, outShape[dim])));
+
+        std::vector<Range> srcRanges(inShape.dims(), Range::all()), dstRanges = srcRanges;
+
+        for(int i = 0; i < srcDstRanges.size(); i++)
+        {
+            if(!srcDstRanges[i].first.empty())
+            {
+                srcRanges[dim] = srcDstRanges[i].first;
+                dstRanges[dim] = srcDstRanges[i].second;
+                Mat dst = outputs[i].matRef()(&dstRanges[0]);
+                Mat src = inputs[i]->matRef()(&srcRanges[0]).clone();
+                src.copyTo(dst);
+            }
+        }
+    }
+}
+
+int PaddingLayer::getPadDim(const BlobShape& shape) const
+{
+    return inputDims > 0 && shape.dims() > inputDims ? paddingDim + 1 : paddingDim;
+}
+
+}
+}
diff --git a/modules/dnn/src/layers/padding_layer.hpp b/modules/dnn/src/layers/padding_layer.hpp
new file mode 100644
index 000000000..18de09610
--- /dev/null
+++ b/modules/dnn/src/layers/padding_layer.hpp
@@ -0,0 +1,37 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2016, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+/*
+Declaration of padding layer, which adds paddings to input blob.
+*/
+
+#ifndef __OPENCV_DNN_LAYERS_PADDING_LAYER_HPP__
+#define __OPENCV_DNN_LAYERS_PADDING_LAYER_HPP__
+#include "../precomp.hpp"
+
+namespace cv
+{
+namespace dnn
+{
+
+class PaddingLayer : public Layer
+{
+public:
+    PaddingLayer() {}
+    PaddingLayer(LayerParams &params);
+    void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+    void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+
+private:
+    int getPadDim(const BlobShape& shape) const;
+    int paddingDim, padding, inputDims, index;
+    float paddingValue;
+};
+
+}
+}
+#endif
diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp
index 2e184503b..f27df8591 100644
--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@@ -72,7 +72,7 @@ PoolingLayerImpl::PoolingLayerImpl(int type_, Size kernel_, Size stride_, Size p
 
 void PoolingLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
 {
-    CV_Assert(inputs.size() > 0);
+    CV_Assert(inputs.size() == 1);
 
     inp = inputs[0]->size2();
 
@@ -85,11 +85,19 @@ void PoolingLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Bl
 
     useOpenCL = ocl::useOpenCL();
 
-    outputs.resize(inputs.size());
+    outputs.resize(type == MAX ? 2 * inputs.size() : inputs.size());
     for (size_t i = 0; i < inputs.size(); i++)
     {
         CV_Assert(inputs[i]->rows() == inp.height && inputs[i]->cols() == inp.width);
-        outputs[i].create(BlobShape(inputs[i]->num(), inputs[i]->channels(), out.height, out.width));
+        if (type == MAX)
+        {
+            outputs[2 * i].create(BlobShape(inputs[i]->num(), inputs[i]->channels(), out.height, out.width));
+            outputs[2 * i + 1].create(BlobShape(inputs[i]->num(), inputs[i]->channels(), out.height, out.width));
+        }
+        else
+        {
+           outputs[i].create(BlobShape(inputs[i]->num(), inputs[i]->channels(), out.height, out.width));
+        }
     }
 }
 
@@ -100,7 +108,7 @@ void PoolingLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &ou
         switch (type)
         {
         case MAX:
-            maxPooling(*inputs[ii], outputs[ii]);
+            maxPooling(*inputs[ii], outputs[2 * ii], outputs[2 * ii + 1]);
             break;
         case AVE:
             avePooling(*inputs[ii], outputs[ii]);
@@ -112,17 +120,17 @@ void PoolingLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &ou
     }
 }
 
-void PoolingLayerImpl::maxPooling(Blob &src, Blob &dst)
+void PoolingLayerImpl::maxPooling(Blob &src, Blob &dst, Blob &mask)
 {
     if (!useOpenCL)
-        maxPooling_cpu(src, dst);
+        maxPooling_cpu(src, dst, mask);
     else
     {
-        CV_Assert(maxPooling_ocl(src, dst));
+        CV_Assert(maxPooling_ocl(src, dst, mask));
     }
 }
 
-bool PoolingLayerImpl::maxPooling_ocl(Blob &src, Blob &dst)
+bool PoolingLayerImpl::maxPooling_ocl(Blob &src, Blob &dst, Blob &mask)
 {
     return pooling_ocl("MaxPoolForward", src, dst);
 }
@@ -142,7 +150,7 @@ bool PoolingLayerImpl::avePooling_ocl(Blob &src, Blob &dst)
     return pooling_ocl("AvePoolForward", src, dst);
 }
 
-void PoolingLayerImpl::maxPooling_cpu(Blob &src, Blob &dst)
+void PoolingLayerImpl::maxPooling_cpu(Blob &src, Blob &dst, Blob &mask)
 {
     CV_DbgAssert(dst.rows() == out.height && dst.cols() == out.width);
 
@@ -152,6 +160,7 @@ void PoolingLayerImpl::maxPooling_cpu(Blob &src, Blob &dst)
         {
             const float *srcData = src.ptrf(n, c);
             float *dstData = dst.ptrf(n, c);
+            float *dstMaskData = mask.ptrf(n, c);
 
             for (int ph = 0; ph < out.height; ++ph)
             {
@@ -165,16 +174,21 @@ void PoolingLayerImpl::maxPooling_cpu(Blob &src, Blob &dst)
                     wstart = max(wstart, 0);
                     const int poolIndex = ph * out.width + pw;
                     float max_val = -FLT_MAX;
+                    int max_index = -1;
 
                     for (int h = hstart; h < hend; ++h)
                         for (int w = wstart; w < wend; ++w)
                         {
                             const int index = h * inp.width + w;
                             if (srcData[index] > max_val)
+                            {
                                 max_val = srcData[index];
+                                max_index = index;
+                            }
                         }
 
                     dstData[poolIndex] = max_val;
+                    dstMaskData[poolIndex] = max_index;
                 }
             }
         }
@@ -187,7 +201,9 @@ bool PoolingLayerImpl::pooling_ocl(const char *kname, const Blob &src, Blob &dst
 {
     const UMat &srcMat = src.umatRefConst();
     UMat &dstMat = dst.umatRef();
-    CV_Assert(mask == NULL && srcMat.offset == 0 && dstMat.offset == 0);
+    UMat* indexesMat = mask == NULL ? NULL : &dst.umatRef();
+
+    CV_Assert(srcMat.offset == 0 && dstMat.offset == 0);
 
     ocl::Kernel ker(kname, ocl::dnn::pooling_oclsrc, String("-DT=") + ocl::typeToStr(src.type()));
     if (ker.empty())
@@ -199,7 +215,8 @@ bool PoolingLayerImpl::pooling_ocl(const char *kname, const Blob &src, Blob &dst
              ocl::KernelArg::PtrReadOnly(srcMat), s[0], s[1], s[2], s[3],
              out.height, out.width, kernel.height, kernel.width,
              stride.height, stride.width, pad.height, pad.width,
-             ocl::KernelArg::PtrWriteOnly(dstMat));
+             ocl::KernelArg::PtrWriteOnly(dstMat),
+             ocl::KernelArg(ocl::KernelArg::PTR_ONLY + ocl::KernelArg::WRITE_ONLY, indexesMat));
 
     size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
     if (!ker.run(1, &nthreads, &wgSize, true))
diff --git a/modules/dnn/src/layers/pooling_layer.hpp b/modules/dnn/src/layers/pooling_layer.hpp
index c5723cd1f..266db1c50 100644
--- a/modules/dnn/src/layers/pooling_layer.hpp
+++ b/modules/dnn/src/layers/pooling_layer.hpp
@@ -58,9 +58,9 @@ class PoolingLayerImpl : public PoolingLayer
 
     bool pooling_ocl(const char *kname, const Blob &src, Blob &dst, Blob *mask = NULL);
 
-    void maxPooling(Blob &src, Blob &dst);
-    void maxPooling_cpu(Blob &src, Blob &dst);
-    bool maxPooling_ocl(Blob &src, Blob &dst);
+    void maxPooling(Blob &src, Blob &dst, Blob &mask);
+    void maxPooling_cpu(Blob &src, Blob &dst, Blob &mask);
+    bool maxPooling_ocl(Blob &src, Blob &dst, Blob &mask);
 
     void avePooling(Blob &src, Blob &dst);
     void avePooling_cpu(Blob &src, Blob &dst);
diff --git a/modules/dnn/src/layers/shift_layer.cpp b/modules/dnn/src/layers/shift_layer.cpp
index 98bfdfc73..6663640af 100644
--- a/modules/dnn/src/layers/shift_layer.cpp
+++ b/modules/dnn/src/layers/shift_layer.cpp
@@ -39,17 +39,17 @@ public:
     virtual void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs, const std::vector<Blob>& blobs) {
         for (size_t ii = 0; ii < outputs.size(); ii++)
         {
-          Blob &inpBlob = *inputs[ii];
-          Blob &outBlob = outputs[ii];
+            Blob &inpBlob = *inputs[ii];
+            Blob &outBlob = outputs[ii];
 
-          inpBlob.matRef().copyTo(outBlob.matRef());
+            inpBlob.matRef().copyTo(outBlob.matRef());
 
-          for (int n = 0; n < inpBlob.num(); n++)
-          {
-            Mat dstMat(inpBlob.channels(), inpBlob.rows() * inpBlob.cols(),
-                       outBlob.type(), outBlob.ptr(n));
-           dnn::gemm(blobs[0].matRefConst(), biasOnesMat, 1, dstMat, 1); //TODO: gemv
-          }
+            for (int n = 0; n < inpBlob.num(); n++)
+            {
+                Mat dstMat(inpBlob.channels(), inpBlob.rows() * inpBlob.cols(),
+                           outBlob.type(), outBlob.ptr(n));
+                dnn::gemm(blobs[0].matRefConst(), biasOnesMat, 1, dstMat, 1); //TODO: gemv
+            }
         }
     }
 
diff --git a/modules/dnn/src/layers/shift_layer.hpp b/modules/dnn/src/layers/shift_layer.hpp
index 1d1c70a85..36808ffbf 100644
--- a/modules/dnn/src/layers/shift_layer.hpp
+++ b/modules/dnn/src/layers/shift_layer.hpp
@@ -22,13 +22,15 @@ class ShiftLayerImpl;
 
 class ShiftLayer : public Layer
 {
-    cv::Ptr<ShiftLayerImpl> impl;
-
 public:
     ShiftLayer() {}
     ShiftLayer(LayerParams &params);
     void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
     void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+
+private:
+    cv::Ptr<ShiftLayerImpl> impl;
+
 };
 
 }
diff --git a/modules/dnn/src/opencl/pooling.cl b/modules/dnn/src/opencl/pooling.cl
index aeb70bc55..80c96f5ae 100644
--- a/modules/dnn/src/opencl/pooling.cl
+++ b/modules/dnn/src/opencl/pooling.cl
@@ -24,10 +24,7 @@
  * POSSIBILITY OF SUCH DAMAGE.
  **************************************************************************************/
 
-__kernel void MaxPoolForward(const int nthreads, __global T* bottom_data, const int num, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, __global T* top_data
-#ifdef MASK
-  , __global int* mask, __global T* top_mask
-#endif
+__kernel void MaxPoolForward(const int nthreads, __global T* bottom_data, const int num, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, __global T* top_data, __global int* mask
 ) {
   int index = get_global_id(0);
   int tmp = get_global_size(0);
@@ -55,13 +52,10 @@ __kernel void MaxPoolForward(const int nthreads, __global T* bottom_data, const
       }
     }
     top_data[index] = maxval;
-#ifdef MASK
+
     if (mask) {
       mask[index] = maxidx;
-    } else {
-      top_mask[index] = maxidx;
     }
-#endif
   }
 }
 
diff --git a/modules/dnn/src/torch/torch_importer.cpp b/modules/dnn/src/torch/torch_importer.cpp
index f11028f13..64c3ed200 100644
--- a/modules/dnn/src/torch/torch_importer.cpp
+++ b/modules/dnn/src/torch/torch_importer.cpp
@@ -45,11 +45,11 @@
 #include <map>
 #include <algorithm>
 #include <iostream>
+#include <fstream>
 
 namespace cv {
 namespace dnn {
 
-#if defined(ENABLE_TORCH_IMPORTER) && ENABLE_TORCH_IMPORTER
 #include "THDiskFile.h"
 
 #ifdef NDEBUG
@@ -91,6 +91,7 @@ static inline bool endsWith(const String &str, const char *substr)
 
 struct TorchImporter : public ::cv::dnn::Importer
 {
+    typedef std::map<String, std::pair<int, Blob> > TensorsMap;
     Net net;
 
     THFile *file;
@@ -102,16 +103,10 @@ struct TorchImporter : public ::cv::dnn::Importer
     {
         String thName, apiType;
         dnn::LayerParams params;
-        std::vector<Module*> modules;
+        std::vector<cv::Ptr<Module> > modules;
 
         Module(const String &_thName, const String &_apiType = String())
             : thName(_thName), apiType(_apiType) {}
-
-        ~Module()
-        {
-            for (size_t i = 0; i < modules.size(); i++)
-                delete modules[i];
-        }
     };
 
     Module *rootModule;
@@ -184,6 +179,7 @@ struct TorchImporter : public ::cv::dnn::Importer
         readedIndexes.insert(index);
 
         int size = readInt();
+
         for (int i = 0; i < size; i++)
         {
             readObject(); //key
@@ -271,7 +267,7 @@ struct TorchImporter : public ::cv::dnn::Importer
         storages.insert(std::make_pair(index, storageMat));
     }
 
-    void readTorchTable(Dict &scalarParams, std::map<String, Blob> &tensorParams)
+    void readTorchTable(Dict &scalarParams, TensorsMap &tensorParams)
     {
         int luaType = readInt();
         int index = readInt();
@@ -309,7 +305,7 @@ struct TorchImporter : public ::cv::dnn::Importer
 
                 if (tensors.count(index)) //tensor was readed
                 {
-                    tensorParams.insert(std::make_pair(key, tensors[index]));
+                    tensorParams.insert(std::make_pair(key, std::make_pair(index, tensors[index])));
                 }
                 else if (storages.count(index)) //storage was readed
                 {
@@ -347,9 +343,9 @@ struct TorchImporter : public ::cv::dnn::Importer
             std::cout << scalarParams;
 
             std::cout << "#" << tensorParams.size() << " tensorParams:\n";
-            std::map<String,Blob>::const_iterator it;
+            std::map<String,std::pair<int, Blob> >::const_iterator it;
             for (it = tensorParams.begin(); it != tensorParams.end(); it++)
-                std::cout << it->first << ": Tensor " << it->second.shape() << "\n";
+                std::cout << it->first << ": Tensor " << it->second.second.shape() << "\n";
         }
     }
 
@@ -375,9 +371,11 @@ struct TorchImporter : public ::cv::dnn::Importer
         int indexStorage = readInt();
         if (readedIndexes.count(indexStorage) == 0)
         {
-            int typeStorage = parseStorageType(readTorchClassName());
+            String className = readTorchClassName();
+            int typeStorage = parseStorageType(className);
             CV_Assert(typeStorage >= 0 && typeTensor == typeStorage);
             readTorchStorage(indexStorage, typeStorage);
+            readedIndexes.insert(indexStorage);
         }
 
         //small check
@@ -396,8 +394,7 @@ struct TorchImporter : public ::cv::dnn::Importer
         }
 
         //allocate Blob
-        Mat srcMat(ndims, (int*)isizes, typeTensor , storages[indexStorage].ptr() + offset, (size_t*)ssteps);
-        //int dstType = (typeTensor == CV_64F) ? CV_64F : CV_32F;
+        Mat srcMat(ndims, (int*)isizes, typeTensor , storages[indexStorage].ptr() + offset*CV_ELEM_SIZE(typeTensor), (size_t*)ssteps);
         int dstType = CV_32F;
 
         Blob blob;
@@ -436,12 +433,7 @@ struct TorchImporter : public ::cv::dnn::Importer
     void readTorchObject(int index)
     {
         if(readedIndexes.count(index))
-        {
-            if(!storages.count(index) && !tensors.count(index))
-                CV_Error(Error::StsNotImplemented, "Objects which have multiple references are not supported");
-            else
-                return;
-        }
+            return;
 
         String className = readTorchClassName();
         String nnName;
@@ -461,12 +453,15 @@ struct TorchImporter : public ::cv::dnn::Importer
         else if (isNNClass(className, nnName))
         {
             Dict scalarParams;
-            std::map<String, Blob> tensorParams;
+            TensorsMap tensorParams;
 
-            Module *newModule = new Module(nnName);
+            cv::Ptr<Module> newModule(new Module(nnName));
             cv::dnn::LayerParams &layerParams = newModule->params;
 
-            if (nnName == "Sequential" || nnName == "Parallel" || nnName == "Concat")
+            layerParams.set("torch_index", index);
+
+            if (nnName == "Sequential" || nnName == "Parallel" ||
+                    nnName == "Concat" || nnName == "ConcatTable" || nnName == "JoinTable")
             {
                 Module *parentModule = curModule;
                 curModule->modules.push_back(newModule);
@@ -483,6 +478,10 @@ struct TorchImporter : public ::cv::dnn::Importer
                 {
                     layerParams.set("dimension", scalarParams.get<int>("dimension"));
                 }
+                if (nnName == "JoinTable")
+                {
+                    layerParams.set("dimension", scalarParams.get<int>("dimension"));
+                }
             }
             else if (nnName == "SpatialConvolution")
             {
@@ -490,12 +489,12 @@ struct TorchImporter : public ::cv::dnn::Importer
                 readTorchTable(scalarParams, tensorParams);
 
                 CV_Assert(tensorParams.count("weight"));
-                layerParams.blobs.push_back(tensorParams["weight"]);
+                layerParams.blobs.push_back(tensorParams["weight"].second);
 
                 bool bias = tensorParams.count("bias") != 0;
                 layerParams.set("bias_term", bias);
                 if (bias)
-                    layerParams.blobs.push_back(tensorParams["bias"]);
+                    layerParams.blobs.push_back(tensorParams["bias"].second);
 
                 layerParams.set("num_output", scalarParams.get<int>("nOutputPlane"));
                 convertTorchKernelsParams(scalarParams, layerParams);
@@ -507,8 +506,10 @@ struct TorchImporter : public ::cv::dnn::Importer
                 newModule->apiType = "Pooling";
                 readTorchTable(scalarParams, tensorParams);
 
-                if (nnName == "SpatialMaxPooling")
+                if (nnName == "SpatialMaxPooling") {
                     layerParams.set("pool", "MAX");
+                    layerParams.set("indices_blob_id", tensorParams["indices"].first);
+                }
                 if (nnName == "SpatialAveragePooling")
                     layerParams.set("pool", "AVE");
                 convertTorchKernelsParams(scalarParams, layerParams);
@@ -521,12 +522,12 @@ struct TorchImporter : public ::cv::dnn::Importer
                 readTorchTable(scalarParams, tensorParams);
 
                 CV_Assert(tensorParams.count("weight"));
-                Blob weightBlob = tensorParams["weight"];
+                Blob weightBlob = tensorParams["weight"].second;
                 layerParams.blobs.push_back(weightBlob);
 
                 bool bias = tensorParams.count("bias") != 0;
                 if (bias)
-                    layerParams.blobs.push_back(tensorParams["bias"]);
+                    layerParams.blobs.push_back(tensorParams["bias"].second);
                 layerParams.set("bias_term", bias);
 
                 layerParams.set("num_output", weightBlob.size(0));
@@ -549,24 +550,205 @@ struct TorchImporter : public ::cv::dnn::Importer
             }
             else if (nnName == "ReLU")
             {
-                curModule->modules.push_back(new Module(nnName, "ReLU"));
+                curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "ReLU")));
                 readObject();
             }
             else if (nnName == "Tanh")
             {
-                curModule->modules.push_back(new Module(nnName, "TanH"));
+                curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "TanH")));
                 readObject();
             }
             else if (nnName == "Sigmoid")
             {
-                curModule->modules.push_back(new Module(nnName, "Sigmoid"));
+                curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "Sigmoid")));
+                readObject();
+            }
+            else if (nnName == "SpatialBatchNormalization")
+            {
+                newModule->apiType = "BatchNorm";
+                readTorchTable(scalarParams, tensorParams);
+
+                CV_Assert(tensorParams.count("running_var") &&
+                          tensorParams.count("running_mean"));
+                layerParams.blobs.push_back(tensorParams["running_mean"].second);
+                layerParams.blobs.push_back(tensorParams["running_var"].second);
+
+                CV_Assert(scalarParams.has("eps"));
+                layerParams.set("eps", float(scalarParams.get<double>("eps")));
+
+                layerParams.blobs.push_back(Blob());
+                layerParams.blobs.push_back(Blob());
+
+                if (tensorParams.count("weight"))
+                {
+                    layerParams.set("has_weight", true);
+                    layerParams.blobs[2] = tensorParams["weight"].second;
+                }
+
+                if (tensorParams.count("bias"))
+                {
+                    layerParams.set("has_bias", true);
+                    layerParams.blobs[3] = tensorParams["bias"].second;
+                }
+
+                curModule->modules.push_back(newModule);
+            }
+            else if (nnName == "PReLU")
+            {
+                readTorchTable(scalarParams, tensorParams);
+
+                CV_Assert(tensorParams.count("weight"));
+
+                size_t outputChannels = static_cast<int>(scalarParams.get<double>("nOutputPlane"));
+                if (outputChannels) {
+
+                    CV_Assert(tensorParams["weight"].second.total() == outputChannels);
+                    layerParams.blobs.push_back(tensorParams["weight"].second);
+
+                    newModule->apiType = "ChannelsPReLU";
+                }
+                else {
+                    CV_Assert(tensorParams["weight"].second.total() == 1);
+                    float negative_slope = *tensorParams["weight"].second.ptrf();
+                    layerParams.set("negative_slope", negative_slope);
+
+                    newModule->apiType = "ReLU";
+                }
+
+                curModule->modules.push_back(newModule);
+            }
+            else if (nnName == "SpatialDropout")
+            {
+                readTorchTable(scalarParams, tensorParams);
+                CV_Assert(scalarParams.has("p"));
+
+                float scale = 1 -  scalarParams.get<double>("p");
+
+                CV_Assert(scale > 0);
+
+                newModule->apiType = "Power";
+                layerParams.set("scale", scale);
+                curModule->modules.push_back(newModule);
+            }
+            else if (nnName == "Identity")
+            {
+                readTorchTable(scalarParams, tensorParams);
+                newModule->apiType = "Identity";
+                curModule->modules.push_back(newModule);
+            }
+            else if (nnName == "Padding")
+            {
+                readTorchTable(scalarParams, tensorParams);
+                newModule->apiType = "Padding";
+
+                CV_Assert(scalarParams.has("pad") &&
+                          scalarParams.has("dim"));
+
+                layerParams.set("padding_dim",
+                                static_cast<int>(scalarParams.get<double>("dim") - 1));
+                layerParams.set("padding", static_cast<int>(scalarParams.get<double>("pad")));
+
+                if (scalarParams.has("nInputDim"))
+                    layerParams.set("input_dims",
+                                    static_cast<int>(scalarParams.get<double>("nInputDim")));
+
+                if (scalarParams.has("value"))
+                    layerParams.set("value", scalarParams.get<double>("value"));
+
+                if (scalarParams.has("index"))
+                    layerParams.set("index",
+                                    static_cast<int>(scalarParams.get<double>("index") - 1));
+
+                curModule->modules.push_back(newModule);
+            }
+            else if (nnName == "CAddTable")
+            {
+                curModule->modules.push_back(newModule);
                 readObject();
             }
+            else if (nnName == "SpatialDilatedConvolution")
+            {
+                readTorchTable(scalarParams, tensorParams);
+                newModule->apiType = "Convolution";
+                CV_Assert(scalarParams.has("padW") &&
+                          scalarParams.has("padH")&&
+                          scalarParams.has("dW")&&
+                          scalarParams.has("dH")&&
+                          scalarParams.has("dilationW")&&
+                          scalarParams.has("dilationH")&&
+                          scalarParams.has("kW")&&
+                          scalarParams.has("kH")&&
+                          scalarParams.has("nOutputPlane"));
+
+                layerParams.set("kernel_w", static_cast<int>(scalarParams.get<double>("kW")));
+                layerParams.set("kernel_h", static_cast<int>(scalarParams.get<double>("kH")));
+                layerParams.set("pad_w", static_cast<int>(scalarParams.get<double>("padW")));
+                layerParams.set("pad_h", static_cast<int>(scalarParams.get<double>("padH")));
+                layerParams.set("stride_w", static_cast<int>(scalarParams.get<double>("dW")));
+                layerParams.set("stride_h", static_cast<int>(scalarParams.get<double>("dH")));
+                layerParams.set("dilation_w", static_cast<int>(scalarParams.get<double>("dilationW")));
+                layerParams.set("dilation_h", static_cast<int>(scalarParams.get<double>("dilationH")));
+                layerParams.set("num_output", static_cast<int>(scalarParams.get<double>("nOutputPlane")));
+
+                layerParams.blobs.push_back(tensorParams["weight"].second);
+
+                bool bias = tensorParams.count("bias");
+                layerParams.set("bias_term", bias);
+                if (bias)
+                    layerParams.blobs.push_back(tensorParams["bias"].second);
+
+                curModule->modules.push_back(newModule);
+            }
+            else if (nnName == "SpatialFullConvolution")
+            {
+                readTorchTable(scalarParams, tensorParams);
+                newModule->apiType = "Deconvolution";
+                CV_Assert(scalarParams.has("padW") &&
+                          scalarParams.has("padH")&&
+                          scalarParams.has("dW")&&
+                          scalarParams.has("dH")&&
+                          scalarParams.has("adjW")&&
+                          scalarParams.has("adjH")&&
+                          scalarParams.has("kW")&&
+                          scalarParams.has("kH")&&
+                          scalarParams.has("nOutputPlane"));
+
+                layerParams.set("kernel_w", static_cast<int>(scalarParams.get<double>("kW")));
+                layerParams.set("kernel_h", static_cast<int>(scalarParams.get<double>("kH")));
+                layerParams.set("pad_w", static_cast<int>(scalarParams.get<double>("padW")));
+                layerParams.set("pad_h", static_cast<int>(scalarParams.get<double>("padH")));
+                layerParams.set("stride_w", static_cast<int>(scalarParams.get<double>("dW")));
+                layerParams.set("stride_h", static_cast<int>(scalarParams.get<double>("dH")));
+                layerParams.set("adj_w", static_cast<int>(scalarParams.get<double>("adjW")));
+                layerParams.set("adj_h", static_cast<int>(scalarParams.get<double>("adjH")));
+                layerParams.set("num_output", static_cast<int>(scalarParams.get<double>("nOutputPlane")));
+
+                layerParams.blobs.push_back(tensorParams["weight"].second);
+
+                bool bias = tensorParams.count("bias");
+                layerParams.set("bias_term", bias);
+                if (bias)
+                    layerParams.blobs.push_back(tensorParams["bias"].second);
+
+                curModule->modules.push_back(newModule);
+            }
+            else if (nnName == "SpatialMaxUnpooling")
+            {
+                readTorchTable(scalarParams, tensorParams);
+
+                CV_Assert(scalarParams.has("oheight") &&
+                          scalarParams.has("owidth"));
+
+                CV_Assert(tensorParams.count("indices"));
+
+                layerParams.set("out_h", static_cast<int>(scalarParams.get<double>("oheight")));
+                layerParams.set("out_w", static_cast<int>(scalarParams.get<double>("owidth"))/2);
+                layerParams.set("indices_blob_id", tensorParams["indices"].first);
+                curModule->modules.push_back(newModule);
+            }
             else
             {
-                delete newModule;
                 CV_Error(Error::StsNotImplemented, "Unknown nn class \"" + className + "\"");
-                readObject();
             }
         }
         else
@@ -606,15 +788,16 @@ struct TorchImporter : public ::cv::dnn::Importer
         return "l" + toString(++this->moduleCounter) + "_" + label;
     }
 
-    int fill(Module *module, int prevLayerId = 0, int prevOutNum = 0)
+    int fill(Module *module, std::vector<std::pair<int, Module*> >& addedModules, int prevLayerId = 0, int prevOutNum = 0)
     {
         if (module == NULL)
             return prevLayerId;
 
         if (module->apiType.length())
         {
-            int newLayerId = this->net.addLayer(generateLayerName(module->apiType), module->apiType, module->params);
+            int newLayerId = net.addLayer(generateLayerName(module->apiType), module->apiType, module->params);
             net.connect(prevLayerId, prevOutNum, newLayerId, 0);
+            addedModules.push_back(std::make_pair(newLayerId, module));
             return newLayerId;
         }
         else
@@ -623,7 +806,7 @@ struct TorchImporter : public ::cv::dnn::Importer
             {
                 for (size_t i = 0; i < module->modules.size(); i++)
                 {
-                    prevLayerId = fill(module->modules[i], prevLayerId, prevOutNum);
+                    prevLayerId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);
                     prevOutNum = 0;
                 }
                 return prevLayerId;
@@ -640,10 +823,11 @@ struct TorchImporter : public ::cv::dnn::Importer
 
                 for (int i = 0; i < (int)module->modules.size(); i++)
                 {
-                    newId = fill(module->modules[i], splitId, i);
+                    newId = fill(module->modules[i], addedModules, splitId, i);
                     net.connect(newId, 0, mergeId, i);
                 }
 
+                addedModules.push_back(std::make_pair(mergeId, module));
                 return mergeId;
             }
             else if (module->thName == "Parallel")
@@ -664,19 +848,92 @@ struct TorchImporter : public ::cv::dnn::Importer
                 for (int i = 0; i < (int)module->modules.size(); i++)
                 {
                     net.connect(splitId, i, reshapeId, i);
-                    newId = fill(module->modules[i], reshapeId, i);
+                    newId = fill(module->modules[i], addedModules, reshapeId, i);
                     net.connect(newId, 0, mergeId, i);
                 }
 
+                addedModules.push_back(std::make_pair(mergeId, module));
+                return mergeId;
+            }
+            else if (module->thName == "ConcatTable") {
+                int newId, splitId;
+                LayerParams splitParams;
+
+                splitId = net.addLayer(generateLayerName("torchSplit"), "Split", splitParams);
+                net.connect(prevLayerId, prevOutNum, splitId, 0);
+
+                addedModules.push_back(std::make_pair(splitId, module));
+
+                for (int i = 0; i < (int)module->modules.size(); i++)
+                {
+                    newId = fill(module->modules[i], addedModules, splitId, i);
+                }
+
+                return newId;
+            }
+            else if (module->thName == "JoinTable") {
+                std::vector<int> ids = net.getUnconnectedOutLayers();
+
+                int mergeId;
+                LayerParams mergeParams;
+                mergeParams.set("axis", module->params.get<int>("dimension") - 1);
+
+                mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
+                addedModules.push_back(std::make_pair(mergeId, module));
+
+                for (int i = 0; i < ids.size(); i++)
+                {
+                    net.connect(ids[i], 0, mergeId, i);
+                }
+
                 return mergeId;
             }
+            else if (module->thName == "CAddTable") {
+                String name = generateLayerName("torchCAddTable");
+                std::vector<int> ids = net.getUnconnectedOutLayers();
+                LayerParams params;
+                params.set("operation", "sum");
+
+
+                int id = net.addLayer(name, "Eltwise", params);
+
+                for (int i = 0; i < ids.size(); i++)
+                {
+                    net.connect(ids[i], 0, id, i);
+                }
+
+                addedModules.push_back(std::make_pair(id, module));
+                return id;
+            }
+            else if (module->thName == "SpatialMaxUnpooling") {
+                String name = generateLayerName("torchMaxUnpooling");
+                int id = net.addLayer(name, "MaxUnpool", module->params);
+                net.connect(prevLayerId, 0, id, 0);
+
+                CV_Assert(module->params.has("indices_blob_id"));
+
+                int indicesBlobId = module->params.get<int>("indices_blob_id");
+
+                for(int i = 0; i < addedModules.size(); i++)
+                {
+                    if (addedModules[i].second->apiType == "Pooling" &&
+                        addedModules[i].second->params.has("indices_blob_id") &&
+                        addedModules[i].second->params.get<int>("indices_blob_id") == indicesBlobId)
+                    {
+                        net.connect(addedModules[i].first, 1, id, 1);
+                        break;
+                    }
+                }
+
+                return id;
+            }
         }
 
         CV_Error(Error::StsInternal, "Unexpected torch container: " + module->thName);
         return -1;
     }
 
-    void populateNet(Net net)
+    void populateNet(Net net_)
     {
         if (rootModule == NULL)
         {
@@ -687,8 +944,9 @@ struct TorchImporter : public ::cv::dnn::Importer
             readObject();
         }
 
-        this->net = net;
-        fill(rootModule);
+        net = net_;
+        std::vector<std::pair<int, Module*> > addedModules;
+        fill(rootModule, addedModules);
     }
 };
 
@@ -707,20 +965,5 @@ Blob readTorchBlob(const String &filename, bool isBinary)
     return importer->tensors.begin()->second;
 }
 
-#else //ENABLE_TORCH_IMPORTER
-
-Ptr<Importer> createTorchImporter(const String&, bool)
-{
-    CV_Error(Error::StsNotImplemented, "Module was build without Torch importer");
-    return Ptr<Importer>();
-}
-
-Blob readTorchBlob(const String&, bool)
-{
-    CV_Error(Error::StsNotImplemented, "Module was build without Torch importer");
-    return Blob();
-}
-
-#endif //ENABLE_TORCH_IMPORTER
 }
 }
diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp
index 6680de31c..9a5660b19 100644
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@@ -154,6 +154,7 @@ TEST(Layer_Test_DeConvolution, Accuracy)
 {
      OCL_OFF(testLayerUsingCaffeModels("layer_deconvolution", true, false));
 }
+
 OCL_TEST(Layer_Test_DeConvolution, Accuracy)
 {
      OCL_ON(testLayerUsingCaffeModels("layer_deconvolution", true, false););
diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp
index 9f5d08637..8832766e1 100644
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@@ -38,13 +38,12 @@ TEST(Test_TensorFlow, read_inception)
     resize(sample, input, Size(224, 224));
     input -= 128; // mean sub
 
-    std::vector<Mat> inpMats;
-    inpMats.push_back(input);
+    dnn::Blob inputBlob = dnn::Blob::fromImages(input);
 
-    net.setBlob("_input.input", Blob(inpMats));
+    net.setBlob("_input.input", inputBlob);
     net.forward();
 
-    Blob out = net.getBlob("output");
+    Blob out = net.getBlob("softmax2");
     std::cout << out.dims() << std::endl;
 }