diff --git a/modules/dnn/include/opencv2/dnn/dict.hpp b/modules/dnn/include/opencv2/dnn/dict.hpp
index 25a87522f..889ce404b 100644
--- a/modules/dnn/include/opencv2/dnn/dict.hpp
+++ b/modules/dnn/include/opencv2/dnn/dict.hpp
@@ -122,13 +122,13 @@ inline unsigned DictValue::get<unsigned>() const
 template<>
 inline double DictValue::get<double>() const
 {
-    if (type == cv::Param::FLOAT)
+    if (type == cv::Param::REAL)
         return d;
     else if (type == cv::Param::INT)
         return i;
     else
     {
-        CV_Assert(type == cv::Param::FLOAT || type == cv::Param::INT);
+        CV_Assert(type == cv::Param::REAL || type == cv::Param::INT);
         return 0;
     }
 }
diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp
index 1d747eef7..681da980d 100644
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@@ -32,8 +32,6 @@ namespace dnn
         void fill(InputArray in);
         void fill(int ndims, const int *sizes, int type, void *data, bool deepCopy = true);
 
-        bool empty() const;
-
         Mat& getMatRef();
         const Mat& getMatRef() const;
         Mat getMat();
@@ -42,18 +40,26 @@ namespace dnn
         //shape getters
         int cols() const;
         int rows() const;
-        Size size() const;
         int channels() const;
         int num() const;
+        Size size2() const;
         Vec4i shape() const;
-        size_t total() const;
+        int size(int index) const;
+        size_t total(int startAxis = 0, int endAxis = -1) const;
 
         uchar *rawPtr(int num = 0, int cn = 0, int row = 0, int col = 0);
 
         template<typename TFloat>
         TFloat *ptr(int num = 0, int cn = 0, int row = 0, int col = 0);
 
+        int type() const;
+        bool isFloat() const;
+        bool isDouble() const;
+
     private:
+        const int *sizes() const;
+        int dims() const;
+
         Mat m;
     };
 
@@ -179,8 +185,8 @@ namespace dnn
     };
 
     //registers layer on module load time
-    #define REGISTER_LAYER(type, constuctorFunc) \
-    static _LayerRegisterer __layerRegisterer_##type(#type, func);
+    #define REGISTER_LAYER_FUNC(type, constuctorFunc) \
+    static _LayerRegisterer __layerRegisterer_##type(#type, constuctorFunc);
 
     #define REGISTER_LAYER_CLASS(type, class)                       \
     Ptr<Layer> __layerRegisterer_func_##type(LayerParams &params)   \
diff --git a/modules/dnn/include/opencv2/dnn/dnn.inl.hpp b/modules/dnn/include/opencv2/dnn/dnn.inl.hpp
index 3e13a9679..cf1fbfd71 100644
--- a/modules/dnn/include/opencv2/dnn/dnn.inl.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.inl.hpp
@@ -40,7 +40,7 @@ namespace dnn
         return m.size[m.dims-2];
     }
 
-    inline Size Blob::size() const
+    inline Size Blob::size2() const
     {
         return Size(cols(), rows());
     }
@@ -63,10 +63,24 @@ namespace dnn
         return Vec4i(m.size.p);
     }
 
-    inline size_t Blob::total() const
+    inline int Blob::size(int index) const
     {
-        CV_DbgAssert(m.dims == 4);
-        return (size_t) m.size[0] * m.size[1] * m.size[2] * m.size[3];
+        CV_Assert(index >= 0 && index < dims());
+        return sizes()[index];
+    }
+
+    inline size_t Blob::total(int startAxis, int endAxis) const
+    {
+        if (endAxis == -1)
+            endAxis = dims();
+
+        CV_Assert(0 <= startAxis && startAxis <= endAxis && endAxis <= dims());
+
+        size_t size = 1; //assume that blob isn't empty
+        for (int i = startAxis; i < endAxis; i++)
+            size *= (size_t) sizes()[i];
+
+        return size;
     }
 
     inline uchar* Blob::rawPtr(int num, int cn, int row, int col)
@@ -82,6 +96,33 @@ namespace dnn
         return (TFloat*) rawPtr(num, cn, row, col);
     }
 
+    inline int Blob::type() const
+    {
+        return m.depth();
+    }
+
+    inline bool Blob::isFloat() const
+    {
+        return (type() == CV_32F);
+    }
+
+    inline bool Blob::isDouble() const
+    {
+        return (type() == CV_32F);
+    }
+
+    inline const int * Blob::sizes() const
+    {
+        return &m.size[0];
+    }
+
+    inline int Blob::dims() const
+    {
+        return m.dims;
+    }
+
+
+
 }
 }
 
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp
index 3f8b1f20d..4cfd608e5 100644
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -133,13 +133,6 @@ struct LayerOutId
     LayerOutId() {}
     LayerOutId(int layerId, int outputId, const String &outputName = String()) 
         : lid(layerId), oid(outputId), name(outputName) {}
-
-    struct UnaryMatchName
-    {   
-        const String &name;
-        UnaryMatchName(const String &_name) : name(_name) {}
-        bool operator()(const String &other) { return name == other; }
-    };
 };
 
 struct LayerData
@@ -247,7 +240,7 @@ struct Net::Impl
             for (size_t oi = 0; oi < ld.outputNames.size() && count < maxCount; oi++)
             {
                 if (ld.outputNames[oi] == name)
-                    found[count++] = LayerOutId(lid, oi);
+                    found[count++] = LayerOutId(lid, (int)oi);
             }
         }
 
@@ -261,7 +254,6 @@ struct Net::Impl
         MapIdToLayerData::iterator it;
         for (it = layers.begin(); it != layers.end(); it++)
         {
-            int lid = it->first;
             LayerData &ld = it->second;
 
             ld.inputBlobs.resize(ld.inputNames.size());
@@ -402,6 +394,8 @@ struct Net::Impl
         }
 
         LayerData &ld = layers[layerId];
+
+        //forward parents
         for (set<int>::iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++)
         {
             LayerData &ild = layers[*i];
@@ -410,9 +404,17 @@ struct Net::Impl
             {
                 if (ild.layerInstance)
                     ild.layerInstance->forward(ild.inputBlobs, ild.outputBlobs);
-                ild.flag = true;
+                ild.flag = 1;
             }
         }
+
+        //forward itself
+        if (!ld.flag)
+        {
+            if (ld.layerInstance)
+                ld.layerInstance->forward(ld.inputBlobs, ld.outputBlobs);
+            ld.flag = 1;
+        }
     }
 
     void forwardAll()
diff --git a/modules/dnn/src/layers/blank_layer.cpp b/modules/dnn/src/layers/blank_layer.cpp
new file mode 100644
index 000000000..4a5ce96dc
--- /dev/null
+++ b/modules/dnn/src/layers/blank_layer.cpp
@@ -0,0 +1,39 @@
+#include "../precomp.hpp"
+#include "layers_common.hpp"
+
+namespace cv
+{
+namespace dnn
+{
+    class BlankLayer : public Layer
+    {
+    public:
+
+        BlankLayer(LayerParams &params)
+        {
+
+        }
+
+        void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+        {
+            outputs.resize(inputs.size());
+            for (size_t i = 0; i < inputs.size(); i++)
+                outputs[i] = *inputs[i];
+        }
+
+        void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+        {
+            for (size_t i = 0; i < inputs.size(); i++)
+                outputs[i] = *inputs[i];
+        }
+    };
+
+    static Ptr<Layer> blankLayerRegisterer(LayerParams &params)
+    {
+        return Ptr<Layer>(new BlankLayer(params));
+    }
+
+
+    REGISTER_LAYER_FUNC(Dropout, blankLayerRegisterer)
+}
+}
\ No newline at end of file
diff --git a/modules/dnn/src/layers/lrn_layer.cpp b/modules/dnn/src/layers/lrn_layer.cpp
new file mode 100644
index 000000000..a9a115f7e
--- /dev/null
+++ b/modules/dnn/src/layers/lrn_layer.cpp
@@ -0,0 +1,138 @@
+#include "../precomp.hpp"
+#include "layers_common.hpp"
+#include <opencv2/imgproc.hpp>
+
+namespace cv
+{
+namespace dnn
+{
+    class LRNLayer : public Layer
+    {
+        enum
+        {
+            CHANNEL_NRM,
+            SPATIAL_NRM,
+            SPATIAL_CONTRAST_NRM //cuda-convnet feature
+        } type;
+
+        int size;
+        double alpha, beta;
+
+        Blob bufBlob;
+
+        void channelNoramlization(Blob &src, Blob &dst);
+        void spatialNormalization(Blob &src, Blob &dst);
+
+    public:
+        
+        LRNLayer(LayerParams &params);
+        void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+        void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+    };
+
+
+    REGISTER_LAYER_CLASS(LRN, LRNLayer)
+
+
+    LRNLayer::LRNLayer(LayerParams &params)
+    {
+        String nrmType = params.get<String>("norm_region", "ACROSS_CHANNELS");
+        if (nrmType == "ACROSS_CHANNELS")
+            type = CHANNEL_NRM;
+        else if (nrmType == "WITHIN_CHANNEL")
+            type = SPATIAL_NRM;
+        else
+            CV_Error(cv::Error::StsBadArg, "Unknown region type \"" + nrmType + "\"");
+
+        size = params.get<int>("local_size", 5);
+        if (size % 2 != 1)
+            CV_Error(cv::Error::StsBadArg, "LRN layer only supports odd values for local_size");
+
+        alpha = params.get<double>("alpha", 1);
+        beta = params.get<double>("beta", 0.75);
+    }
+
+    void LRNLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+    {
+        CV_Assert(inputs.size() == 1);
+        outputs.resize(1);
+
+        Vec4i shape = inputs[0]->shape();
+        outputs[0].create(shape);
+
+        shape[1] = 1; //maybe make shape[0] = 1 too
+        bufBlob.create(shape);
+    }
+
+    void LRNLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+    {
+        Blob &src = *inputs[0];
+        Blob &dst = outputs[0];
+
+        switch (type)
+        {
+        case CHANNEL_NRM:
+            channelNoramlization(src, dst);
+            break;
+        case SPATIAL_NRM:
+            spatialNormalization(src, dst);
+            break;
+        default:
+            CV_Error(cv::Error::StsNotImplemented, "Unimplemented mode of LRN layer");
+            break;
+        }
+    }
+
+    void LRNLayer::channelNoramlization(Blob &srcBlob, Blob &dstBlob)
+    {
+        int num = srcBlob.num();
+        int channels = srcBlob.channels();
+
+        for (int n = 0; n < num; n++)
+        {
+            Mat buf = bufBlob.getMat(n, 0);
+            Mat accum = dstBlob.getMat(n, 0); //memory saving
+            accum.setTo(0);
+
+            for (int cn = 0; cn < channels; cn++)
+            {
+                cv::accumulateSquare(srcBlob.getMat(), accum);
+            }
+
+            accum.convertTo(accum, accum.type(), alpha/channels, 1);
+            cv::pow(accum, beta, accum);
+            
+            for (int cn = channels - 1; cn >= 0; cn--)
+            {
+                cv::divide(srcBlob.getMat(n, cn), accum, dstBlob.getMat(n, cn));
+            }
+        }
+    }
+
+    void LRNLayer::spatialNormalization(Blob &srcBlob, Blob &dstBlob)
+    {
+        int num = srcBlob.num();
+        int channels = srcBlob.channels();
+
+        for (int n = 0; n < num; n++)
+        {
+            for (int cn = 0; cn < channels; cn++)
+            {
+                Mat src = srcBlob.getMat(n, cn);
+                Mat dst = dstBlob.getMat(n, cn);
+                uchar *dataDst0 = dst.data;
+
+                cv::pow(srcBlob.getMat(n, cn), 2, dst);
+                //TODO: check border type
+                cv::boxFilter(dst, dst, dst.depth(), cv::Size(size, size), cv::Point(-1, -1), false, cv::BORDER_CONSTANT);
+                dst.convertTo(dst, dst.type(), alpha/(size*size), 1);
+                cv::pow(dst, beta, dst);
+                cv::divide(src, dst, dst);
+
+                CV_DbgAssert(dataDst0 == dst.data);
+            }
+        }
+    }
+
+}
+}
\ No newline at end of file
diff --git a/modules/dnn/src/layers/softmax_layer.cpp b/modules/dnn/src/layers/softmax_layer.cpp
new file mode 100644
index 000000000..e24e84f33
--- /dev/null
+++ b/modules/dnn/src/layers/softmax_layer.cpp
@@ -0,0 +1,116 @@
+#include "../precomp.hpp"
+#include "layers_common.hpp"
+#include <algorithm>
+#include <stdlib.h>
+using std::max;
+
+namespace cv
+{
+namespace dnn
+{
+    class SoftMaxLayer : public Layer
+    {
+        int axis;
+        Blob maxAggregator;
+
+    public:
+        SoftMaxLayer(LayerParams &params);
+        void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+        void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+    };
+
+
+    REGISTER_LAYER_CLASS(Softmax, SoftMaxLayer);
+
+
+    SoftMaxLayer::SoftMaxLayer(LayerParams &params)
+    {
+        axis = params.get<int>("axis", 1);
+        CV_Assert(0 <= axis && axis < 4);
+    }
+
+    void SoftMaxLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+    {
+        CV_Assert(inputs.size() == 1);
+        
+        Vec4i shape = inputs[0]->shape();
+        outputs.resize(1);
+        outputs[0].create(shape);
+
+        shape[axis] = 1;
+        maxAggregator.create(shape);
+    }
+
+    void SoftMaxLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+    {
+        Blob &src = *inputs[0];
+        Blob &dst = outputs[0];
+
+        float *srcPtr = src.ptr<float>();
+        float *dstPtr = dst.ptr<float>();
+        float *bufPtr = maxAggregator.ptr<float>();
+
+        size_t totalSize = src.total();
+        size_t outerSize = src.total(0, axis);
+        size_t channels = src.size(axis);
+        size_t innerSize = src.total(axis + 1, -1);
+
+        size_t outerStep = src.total(axis);
+        size_t cnStep = src.total(axis + 1);
+
+        //compute max along axis
+        for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
+        {
+            size_t srcOffset = outerDim * outerStep;
+            size_t bufOffset = outerDim * cnStep;
+
+            memcpy(bufPtr + bufOffset, srcPtr + srcOffset, innerSize * sizeof(float));
+
+            for (size_t cnDim = 1; cnDim < channels; cnDim++)
+            {
+                for (size_t i = 0; i < innerSize; i++)
+                    bufPtr[bufOffset + i] = std::max(bufPtr[bufOffset + i], srcPtr[srcOffset + cnDim * cnStep + i]);
+            }
+        }
+
+        //subtract max
+        for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
+        {
+            size_t srcOffset = outerDim * outerStep;
+            size_t bufOffset = outerDim * cnStep;
+
+            for (size_t cnDim = 0; cnDim < channels; cnDim++)
+            {
+                for (size_t i = 0; i < innerSize; i++)
+                    dstPtr[srcOffset + cnDim * cnStep + i] = srcPtr[srcOffset + cnDim * cnStep + i] - bufPtr[bufOffset + i];
+            }
+        }
+
+        cv::exp(dst.getMat(), dst.getMat());
+
+        for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
+        {
+            size_t srcOffset = outerDim * outerStep;
+            size_t bufOffset = outerDim * cnStep;
+            
+            //sum exp along axis
+            for (size_t i = 0; i < innerSize; i++)
+                bufPtr[bufOffset + i] = 0.f;
+
+            for (size_t cnDim = 0; cnDim < channels; cnDim++)
+            {
+                for (size_t i = 0; i < innerSize; i++)
+                    bufPtr[bufOffset + i] += dstPtr[srcOffset + cnDim * cnStep + i];
+            }
+
+            //divide by computed sum
+            for (size_t cnDim = 0; cnDim < channels; cnDim++)
+            {
+                for (size_t i = 0; i < innerSize; i++)
+                    dstPtr[srcOffset + cnDim * cnStep + i] /= bufPtr[bufOffset + i];
+            }
+        }
+    }
+
+}
+}
\ No newline at end of file