Merge pull request #10979 from dkurt:unite_dnn_samples

7 years ago · ab110c0ad1
parent cc06935a10 538fd42363
commit ab110c0ad1
45 changed files with 2301 additions and 10810 deletions
--- a/doc/tutorials/dnn/dnn_googlenet/dnn_googlenet.markdown
+++ b/doc/tutorials/dnn/dnn_googlenet/dnn_googlenet.markdown
@ -13,50 +13,53 @@ We will demonstrate results of this example on the following picture.
 Source Code
 -----------

-We will be using snippets from the example application, that can be downloaded [here](https://github.com/opencv/opencv/blob/master/samples/dnn/caffe_googlenet.cpp).
+We will be using snippets from the example application, that can be downloaded [here](https://github.com/opencv/opencv/blob/master/samples/dnn/classification.cpp).

-@include dnn/caffe_googlenet.cpp
+@include dnn/classification.cpp

 Explanation
 -----------

 -# Firstly, download GoogLeNet model files:
-   [bvlc_googlenet.prototxt  ](https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/bvlc_googlenet.prototxt) and
+   [bvlc_googlenet.prototxt  ](https://github.com/opencv/opencv_extra/blob/master/testdata/dnn/bvlc_googlenet.prototxt) and
   [bvlc_googlenet.caffemodel](http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel)

   Also you need file with names of [ILSVRC2012](http://image-net.org/challenges/LSVRC/2012/browse-synsets) classes:
-   [synset_words.txt](https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/synset_words.txt).
+   [classification_classes_ILSVRC2012.txt](https://github.com/opencv/opencv/tree/master/samples/dnn/classification_classes_ILSVRC2012.txt).

   Put these files into working dir of this program example.

 -# Read and initialize network using path to .prototxt and .caffemodel files
-   @snippet dnn/caffe_googlenet.cpp Read and initialize network
+   @snippet dnn/classification.cpp Read and initialize network

-# Check that network was read successfully
-   @snippet dnn/caffe_googlenet.cpp Check that network was read successfully
+   You can skip an argument `framework` if one of the files `model` or `config` has an
+   extension `.caffemodel` or `.prototxt`.
+   This way function cv::dnn::readNet can automatically detects a model's format.

 -# Read input image and convert to the blob, acceptable by GoogleNet
-   @snippet dnn/caffe_googlenet.cpp Prepare blob
-   We convert the image to a 4-dimensional blob (so-called batch) with 1x3x224x224 shape after applying necessary pre-processing like resizing and mean subtraction using cv::dnn::blobFromImage constructor.
+   @snippet dnn/classification.cpp Open a video file or an image file or a camera stream

-# Pass the blob to the network
-   @snippet dnn/caffe_googlenet.cpp Set input blob
-   In bvlc_googlenet.prototxt the network input blob named as "data", therefore this blob labeled as ".data" in opencv_dnn API.
+   cv::VideoCapture can load both images and videos.
+
+   @snippet dnn/classification.cpp Create a 4D blob from a frame
+   We convert the image to a 4-dimensional blob (so-called batch) with `1x3x224x224` shape
+   after applying necessary pre-processing like resizing and mean subtraction
+   `(-104, -117, -123)` for each blue, green and red channels correspondingly using cv::dnn::blobFromImage function.

-   Other blobs labeled as "name_of_layer.name_of_layer_output".
+-# Pass the blob to the network
+   @snippet dnn/classification.cpp Set input blob

 -# Make forward pass
-   @snippet dnn/caffe_googlenet.cpp Make forward pass
-   During the forward pass output of each network layer is computed, but in this example we need output from "prob" layer only.
+   @snippet dnn/classification.cpp Make forward pass
+   During the forward pass output of each network layer is computed, but in this example we need output from the last layer only.

 -# Determine the best class
-   @snippet dnn/caffe_googlenet.cpp Gather output
-   We put the output of "prob" layer, which contain probabilities for each of 1000 ILSVRC2012 image classes, to the `prob` blob.
-   And find the index of element with maximal value in this one. This index correspond to the class of the image.
-
-# Print results
-   @snippet dnn/caffe_googlenet.cpp Print results
-   For our image we get:
-> Best class: #812 'space shuttle'
->
-> Probability: 99.6378%
+   @snippet dnn/classification.cpp Get a class with a highest score
+   We put the output of network, which contain probabilities for each of 1000 ILSVRC2012 image classes, to the `prob` blob.
+   And find the index of element with maximal value in this one. This index corresponds to the class of the image.
+
+-# Run an example from command line
+   @code
+   ./example_dnn_classification --model=bvlc_googlenet.caffemodel --config=bvlc_googlenet.prototxt --width=224 --height=224 --classes=classification_classes_ILSVRC2012.txt --input=space_shuttle.jpg --mean="104 117 123"
+   @endcode
+   For our image we get prediction of class `space shuttle` with more than 99% sureness.
--- a/doc/tutorials/dnn/dnn_halide/dnn_halide.markdown
+++ b/doc/tutorials/dnn/dnn_halide/dnn_halide.markdown
@ -74,46 +74,7 @@ When you build OpenCV add the following configuration flags:

 - `HALIDE_ROOT_DIR` - path to Halide build directory

-## Sample
-
-@include dnn/squeezenet_halide.cpp
-
-## Explanation
-Download Caffe model from SqueezeNet repository: [train_val.prototxt](https://github.com/DeepScale/SqueezeNet/blob/master/SqueezeNet_v1.1/train_val.prototxt) and [squeezenet_v1.1.caffemodel](https://github.com/DeepScale/SqueezeNet/blob/master/SqueezeNet_v1.1/squeezenet_v1.1.caffemodel).
-
-Also you need file with names of [ILSVRC2012](http://image-net.org/challenges/LSVRC/2012/browse-synsets) classes:
-[synset_words.txt](https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/synset_words.txt).
-
-Put these files into working dir of this program example.
-
-# Read and initialize network using path to .prototxt and .caffemodel files
-@snippet dnn/squeezenet_halide.cpp Read and initialize network
-
-# Check that network was read successfully
-@snippet dnn/squeezenet_halide.cpp Check that network was read successfully
-
-# Read input image and convert to the 4-dimensional blob, acceptable by SqueezeNet v1.1
-@snippet dnn/squeezenet_halide.cpp Prepare blob
-
-# Pass the blob to the network
-@snippet dnn/squeezenet_halide.cpp Set input blob
-
-# Enable Halide backend for layers where it is implemented
-@snippet dnn/squeezenet_halide.cpp Enable Halide backend
-
-# Make forward pass
-@snippet dnn/squeezenet_halide.cpp Make forward pass
-Remember that the first forward pass after initialization require quite more
-time that the next ones. It's because of runtime compilation of Halide pipelines
-at the first invocation.
-
-# Determine the best class
-@snippet dnn/squeezenet_halide.cpp Determine the best class
-
-# Print results
-@snippet dnn/squeezenet_halide.cpp Print results
-For our image we get:
-
-> Best class: #812 'space shuttle'
->
-> Probability: 97.9812%
+## Set Halide as a preferable backend
+@code
+net.setPreferableBackend(DNN_BACKEND_HALIDE);
+@endcode
--- a/doc/tutorials/dnn/dnn_yolo/dnn_yolo.markdown
+++ b/doc/tutorials/dnn/dnn_yolo/dnn_yolo.markdown
@ -18,40 +18,26 @@ VIDEO DEMO:
 Source Code
 -----------

-The latest version of sample source code can be downloaded [here](https://github.com/opencv/opencv/blob/master/samples/dnn/yolo_object_detection.cpp).
+Use a universal sample for object detection models written
+[in C++](https://github.com/opencv/opencv/blob/master/samples/dnn/object_detection.cpp) and
+[in Python](https://github.com/opencv/opencv/blob/master/samples/dnn/object_detection.py) languages

-@include dnn/yolo_object_detection.cpp
-
-How to compile in command line with pkg-config
----------------------------------------------
-
-@code{.bash}
-
-# g++ `pkg-config --cflags opencv` `pkg-config --libs opencv` yolo_object_detection.cpp -o yolo_object_detection
-
-@endcode
+Usage examples
+--------------

 Execute in webcam:

@code{.bash}

-$ yolo_object_detection -camera_device=0  -cfg=[PATH-TO-DARKNET]/cfg/yolo.cfg -model=[PATH-TO-DARKNET]/yolo.weights   -class_names=[PATH-TO-DARKNET]/data/coco.names
-
-@endcode
-
-Execute with image:
-
-@code{.bash}
-
-$ yolo_object_detection -source=[PATH-IMAGE]  -cfg=[PATH-TO-DARKNET]/cfg/yolo.cfg -model=[PATH-TO-DARKNET]/yolo.weights   -class_names=[PATH-TO-DARKNET]/data/coco.names
+$ example_dnn_object_detection --config=[PATH-TO-DARKNET]/cfg/yolo.cfg --model=[PATH-TO-DARKNET]/yolo.weights --classes=object_detection_classes_pascal_voc.txt --width=416 --height=416 --scale=0.00392

@endcode

-Execute in video file:
+Execute with image or video file:

@code{.bash}

-$ yolo_object_detection -source=[PATH-TO-VIDEO] -cfg=[PATH-TO-DARKNET]/cfg/yolo.cfg -model=[PATH-TO-DARKNET]/yolo.weights   -class_names=[PATH-TO-DARKNET]/data/coco.names
+$ example_dnn_object_detection --config=[PATH-TO-DARKNET]/cfg/yolo.cfg --model=[PATH-TO-DARKNET]/yolo.weights --classes=object_detection_classes_pascal_voc.txt --width=416 --height=416 --scale=0.00392 --input=[PATH-TO-IMAGE-OR-VIDEO-FILE]

@endcode

--- a/modules/core/include/opencv2/core.hpp
+++ b/modules/core/include/opencv2/core.hpp
@ -3159,7 +3159,7 @@ protected:

 struct Param {
    enum { INT=0, BOOLEAN=1, REAL=2, STRING=3, MAT=4, MAT_VECTOR=5, ALGORITHM=6, FLOAT=7,
-           UNSIGNED_INT=8, UINT64=9, UCHAR=11 };
+           UNSIGNED_INT=8, UINT64=9, UCHAR=11, SCALAR=12 };
 };


@ -3252,6 +3252,14 @@ template<> struct ParamType<uchar>
    enum { type = Param::UCHAR };
 };

+template<> struct ParamType<Scalar>
+{
+    typedef const Scalar& const_param_type;
+    typedef Scalar member_type;
+
+    enum { type = Param::SCALAR };
+};
+
 //! @} core_basic

 } //namespace cv
--- a/modules/core/src/command_line_parser.cpp
+++ b/modules/core/src/command_line_parser.cpp
@ -104,6 +104,12 @@ static void from_str(const String& str, int type, void* dst)
        ss >> *(double*)dst;
    else if( type == Param::STRING )
        *(String*)dst = str;
+    else if( type == Param::SCALAR)
+    {
+        Scalar& scalar = *(Scalar*)dst;
+        for (int i = 0; i < 4 && !ss.eof(); ++i)
+            ss >> scalar[i];
+    }
    else
        CV_Error(Error::StsBadArg, "unknown/unsupported parameter type");

--- a/modules/core/test/test_utils.cpp
+++ b/modules/core/test/test_utils.cpp
@ -261,4 +261,26 @@ TEST(AutoBuffer, allocate_test)
    EXPECT_EQ(6u, abuf.size());
 }

+TEST(CommandLineParser, testScalar)
+{
+    static const char * const keys3 =
+            "{ s0 | 3 4 5 | default scalar }"
+            "{ s1 |       | single value scalar }"
+            "{ s2 |       | two values scalar (default with zeros) }"
+            "{ s3 |       | three values scalar }"
+            "{ s4 |       | four values scalar }"
+            "{ s5 |       | five values scalar }";
+
+    const char* argv[] = {"<bin>", "--s1=1.1", "--s3=1.1 2.2 3",
+                          "--s4=-4.2 1 0 3", "--s5=5 -4 3 2 1"};
+    const int argc = 5;
+    CommandLineParser parser(argc, argv, keys3);
+    EXPECT_EQ(parser.get<Scalar>("s0"), Scalar(3, 4, 5));
+    EXPECT_EQ(parser.get<Scalar>("s1"), Scalar(1.1));
+    EXPECT_EQ(parser.get<Scalar>("s2"), Scalar(0));
+    EXPECT_EQ(parser.get<Scalar>("s3"), Scalar(1.1, 2.2, 3));
+    EXPECT_EQ(parser.get<Scalar>("s4"), Scalar(-4.2, 1, 0, 3));
+    EXPECT_EQ(parser.get<Scalar>("s5"), Scalar(5, -4, 3, 2));
+}
+
 }} // namespace
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@ -153,7 +153,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
        */

        int inputNameToIndex(String inputName);
-        int outputNameToIndex(String outputName);
+        int outputNameToIndex(const String& outputName);
    };

    /** @brief Classical recurrent layer
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@ -222,7 +222,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
        /** @brief Returns index of output blob in output array.
         *  @see inputNameToIndex()
         */
-        virtual int outputNameToIndex(String outputName);
+        CV_WRAP virtual int outputNameToIndex(const String& outputName);

        /**
         * @brief Ask layer if it support specific backend for doing computations.
@ -683,6 +683,29 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
     */
     CV_EXPORTS_W Net readNetFromTorch(const String &model, bool isBinary = true);

+     /**
+      * @brief Read deep learning network represented in one of the supported formats.
+      * @param[in] model Binary file contains trained weights. The following file
+      *                  extensions are expected for models from different frameworks:
+      *                  * `*.caffemodel` (Caffe, http://caffe.berkeleyvision.org/)
+      *                  * `*.pb` (TensorFlow, https://www.tensorflow.org/)
+      *                  * `*.t7` | `*.net` (Torch, http://torch.ch/)
+      *                  * `*.weights` (Darknet, https://pjreddie.com/darknet/)
+      * @param[in] config Text file contains network configuration. It could be a
+      *                   file with the following extensions:
+      *                  * `*.prototxt` (Caffe, http://caffe.berkeleyvision.org/)
+      *                  * `*.pbtxt` (TensorFlow, https://www.tensorflow.org/)
+      *                  * `*.cfg` (Darknet, https://pjreddie.com/darknet/)
+      * @param[in] framework Explicit framework name tag to determine a format.
+      * @returns Net object.
+      *
+      * This function automatically detects an origin framework of trained model
+      * and calls an appropriate function such @ref readNetFromCaffe, @ref readNetFromTensorflow,
+      * @ref readNetFromTorch or @ref readNetFromDarknet. An order of @p model and @p config
+      * arguments does not matter.
+      */
+     CV_EXPORTS_W Net readNet(const String& model, const String& config = "", const String& framework = "");
+
    /** @brief Loads blob which was serialized as torch.Tensor object of Torch7 framework.
     *  @warning This function has the same limitations as readNetFromTorch().
     */
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@ -399,7 +399,7 @@ struct DataLayer : public Layer
    void forward(std::vector<Mat*>&, std::vector<Mat>&, std::vector<Mat> &) {}
    void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) {}

-    int outputNameToIndex(String tgtName)
+    int outputNameToIndex(const String& tgtName)
    {
        int idx = (int)(std::find(outNames.begin(), outNames.end(), tgtName) - outNames.begin());
        return (idx < (int)outNames.size()) ? idx : -1;
@ -2521,7 +2521,7 @@ int Layer::inputNameToIndex(String)
    return -1;
 }

-int Layer::outputNameToIndex(String)
+int Layer::outputNameToIndex(const String&)
 {
    return -1;
 }
@ -2813,5 +2813,43 @@ BackendWrapper::BackendWrapper(const Ptr<BackendWrapper>& base, const MatShape&

 BackendWrapper::~BackendWrapper() {}

+Net readNet(const String& _model, const String& _config, const String& _framework)
+{
+    String framework = _framework.toLowerCase();
+    String model = _model;
+    String config = _config;
+    const std::string modelExt = model.substr(model.rfind('.') + 1);
+    const std::string configExt = config.substr(config.rfind('.') + 1);
+    if (framework == "caffe" || modelExt == "caffemodel" || configExt == "caffemodel" ||
+                                modelExt == "prototxt" || configExt == "prototxt")
+    {
+        if (modelExt == "prototxt" || configExt == "caffemodel")
+            std::swap(model, config);
+        return readNetFromCaffe(config, model);
+    }
+    if (framework == "tensorflow" || modelExt == "pb" || configExt == "pb" ||
+                                     modelExt == "pbtxt" || configExt == "pbtxt")
+    {
+        if (modelExt == "pbtxt" || configExt == "pb")
+            std::swap(model, config);
+        return readNetFromTensorflow(model, config);
+    }
+    if (framework == "torch" || modelExt == "t7" || modelExt == "net" ||
+                                configExt == "t7" || configExt == "net")
+    {
+        return readNetFromTorch(model.empty() ? config : model);
+    }
+    if (framework == "darknet" || modelExt == "weights" || configExt == "weights" ||
+                                  modelExt == "cfg" || configExt == "cfg")
+    {
+        if (modelExt == "cfg" || configExt == "weights")
+            std::swap(model, config);
+        return readNetFromDarknet(config, model);
+    }
+    CV_Error(Error::StsError, "Cannot determine an origin framework of files: " +
+                              model + (config.empty() ? "" : ", " + config));
+    return Net();
+}
+
 CV__DNN_EXPERIMENTAL_NS_END
 }} // namespace
--- a/modules/dnn/src/layers/recurrent_layers.cpp
+++ b/modules/dnn/src/layers/recurrent_layers.cpp
@ -355,7 +355,7 @@ int LSTMLayer::inputNameToIndex(String inputName)
    return -1;
 }

-int LSTMLayer::outputNameToIndex(String outputName)
+int LSTMLayer::outputNameToIndex(const String& outputName)
 {
    if (outputName.toLowerCase() == "h")
        return 0;
--- a/modules/dnn/test/test_misc.cpp
+++ b/modules/dnn/test/test_misc.cpp
@ -57,4 +57,22 @@ TEST(imagesFromBlob, Regression)
    }
 }

+TEST(readNet, Regression)
+{
+    Net net = readNet(findDataFile("dnn/squeezenet_v1.1.prototxt", false),
+                      findDataFile("dnn/squeezenet_v1.1.caffemodel", false));
+    EXPECT_FALSE(net.empty());
+    net = readNet(findDataFile("dnn/opencv_face_detector.caffemodel", false),
+                  findDataFile("dnn/opencv_face_detector.prototxt", false));
+    EXPECT_FALSE(net.empty());
+    net = readNet(findDataFile("dnn/openface_nn4.small2.v1.t7", false));
+    EXPECT_FALSE(net.empty());
+    net = readNet(findDataFile("dnn/tiny-yolo-voc.cfg", false),
+                  findDataFile("dnn/tiny-yolo-voc.weights", false));
+    EXPECT_FALSE(net.empty());
+    net = readNet(findDataFile("dnn/ssd_mobilenet_v1_coco.pbtxt", false),
+                  findDataFile("dnn/ssd_mobilenet_v1_coco.pb", false));
+    EXPECT_FALSE(net.empty());
+}
+
 }} // namespace
--- a/samples/data/dnn/.gitignore
+++ b/samples/data/dnn/.gitignore
@ -1 +0,0 @@
-*.caffemodel
--- a/samples/data/dnn/MobileNetSSD_300x300.prototxt
+++ b/samples/data/dnn/MobileNetSSD_300x300.prototxt
--- a/samples/data/dnn/VGG_VOC0712_SSD_300x300_iter_60000.prototxt
+++ b/samples/data/dnn/VGG_VOC0712_SSD_300x300_iter_60000.prototxt
--- a/samples/data/dnn/bvlc_googlenet.prototxt
+++ b/samples/data/dnn/bvlc_googlenet.prototxt
--- a/samples/data/dnn/classification_classes_ILSVRC2012.txt
+++ b/samples/data/dnn/classification_classes_ILSVRC2012.txt
--- a/samples/data/dnn/enet-classes.txt
+++ b/samples/data/dnn/enet-classes.txt
@ -1,20 +1,20 @@
-Unlabeled    0   0   0
-Road         128  64 128
-Sidewalk     244  35 232
-Building     70  70  70
-Wall         102 102 156
-Fence        190 153 153
-Pole         153 153 153
-TrafficLight 250 170  30
-TrafficSign  220 220   0
-Vegetation   107 142  35
-Terrain      152 251 152
-Sky          70 130 180
-Person       220  20  60
-Rider        255   0   0
-Car          0   0 142
-Truck        0   0  70
-Bus          0  60 100
-Train        0  80 100
-Motorcycle   0   0 230
-Bicycle      119  11  32
+Unlabeled
+Road
+Sidewalk
+Building
+Wall
+Fence
+Pole
+TrafficLight
+TrafficSign
+Vegetation
+Terrain
+Sky
+Person
+Rider
+Car
+Truck
+Bus
+Train
+Motorcycle
+Bicycle
--- a/samples/data/dnn/fcn32s-heavy-pascal.prototxt
+++ b/samples/data/dnn/fcn32s-heavy-pascal.prototxt
@ -1,502 +0,0 @@
-#
-# This prototxt is based on voc-fcn32s/val.prototxt file from
-# https://github.com/shelhamer/fcn.berkeleyvision.org, which is distributed under
-# Caffe (BSD) license:
-# http://caffe.berkeleyvision.org/model_zoo.html#bvlc-model-license
-#
-name: "voc-fcn32s"
-input: "data"
-input_dim: 1
-input_dim: 3
-input_dim: 500
-input_dim: 500
-layer {
-  name: "conv1_1"
-  type: "Convolution"
-  bottom: "data"
-  top: "conv1_1"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 64
-    pad: 100
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu1_1"
-  type: "ReLU"
-  bottom: "conv1_1"
-  top: "conv1_1"
-}
-layer {
-  name: "conv1_2"
-  type: "Convolution"
-  bottom: "conv1_1"
-  top: "conv1_2"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 64
-    pad: 1
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu1_2"
-  type: "ReLU"
-  bottom: "conv1_2"
-  top: "conv1_2"
-}
-layer {
-  name: "pool1"
-  type: "Pooling"
-  bottom: "conv1_2"
-  top: "pool1"
-  pooling_param {
-    pool: MAX
-    kernel_size: 2
-    stride: 2
-  }
-}
-layer {
-  name: "conv2_1"
-  type: "Convolution"
-  bottom: "pool1"
-  top: "conv2_1"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 128
-    pad: 1
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu2_1"
-  type: "ReLU"
-  bottom: "conv2_1"
-  top: "conv2_1"
-}
-layer {
-  name: "conv2_2"
-  type: "Convolution"
-  bottom: "conv2_1"
-  top: "conv2_2"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 128
-    pad: 1
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu2_2"
-  type: "ReLU"
-  bottom: "conv2_2"
-  top: "conv2_2"
-}
-layer {
-  name: "pool2"
-  type: "Pooling"
-  bottom: "conv2_2"
-  top: "pool2"
-  pooling_param {
-    pool: MAX
-    kernel_size: 2
-    stride: 2
-  }
-}
-layer {
-  name: "conv3_1"
-  type: "Convolution"
-  bottom: "pool2"
-  top: "conv3_1"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 256
-    pad: 1
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu3_1"
-  type: "ReLU"
-  bottom: "conv3_1"
-  top: "conv3_1"
-}
-layer {
-  name: "conv3_2"
-  type: "Convolution"
-  bottom: "conv3_1"
-  top: "conv3_2"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 256
-    pad: 1
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu3_2"
-  type: "ReLU"
-  bottom: "conv3_2"
-  top: "conv3_2"
-}
-layer {
-  name: "conv3_3"
-  type: "Convolution"
-  bottom: "conv3_2"
-  top: "conv3_3"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 256
-    pad: 1
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu3_3"
-  type: "ReLU"
-  bottom: "conv3_3"
-  top: "conv3_3"
-}
-layer {
-  name: "pool3"
-  type: "Pooling"
-  bottom: "conv3_3"
-  top: "pool3"
-  pooling_param {
-    pool: MAX
-    kernel_size: 2
-    stride: 2
-  }
-}
-layer {
-  name: "conv4_1"
-  type: "Convolution"
-  bottom: "pool3"
-  top: "conv4_1"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 512
-    pad: 1
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu4_1"
-  type: "ReLU"
-  bottom: "conv4_1"
-  top: "conv4_1"
-}
-layer {
-  name: "conv4_2"
-  type: "Convolution"
-  bottom: "conv4_1"
-  top: "conv4_2"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 512
-    pad: 1
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu4_2"
-  type: "ReLU"
-  bottom: "conv4_2"
-  top: "conv4_2"
-}
-layer {
-  name: "conv4_3"
-  type: "Convolution"
-  bottom: "conv4_2"
-  top: "conv4_3"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 512
-    pad: 1
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu4_3"
-  type: "ReLU"
-  bottom: "conv4_3"
-  top: "conv4_3"
-}
-layer {
-  name: "pool4"
-  type: "Pooling"
-  bottom: "conv4_3"
-  top: "pool4"
-  pooling_param {
-    pool: MAX
-    kernel_size: 2
-    stride: 2
-  }
-}
-layer {
-  name: "conv5_1"
-  type: "Convolution"
-  bottom: "pool4"
-  top: "conv5_1"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 512
-    pad: 1
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu5_1"
-  type: "ReLU"
-  bottom: "conv5_1"
-  top: "conv5_1"
-}
-layer {
-  name: "conv5_2"
-  type: "Convolution"
-  bottom: "conv5_1"
-  top: "conv5_2"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 512
-    pad: 1
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu5_2"
-  type: "ReLU"
-  bottom: "conv5_2"
-  top: "conv5_2"
-}
-layer {
-  name: "conv5_3"
-  type: "Convolution"
-  bottom: "conv5_2"
-  top: "conv5_3"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 512
-    pad: 1
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu5_3"
-  type: "ReLU"
-  bottom: "conv5_3"
-  top: "conv5_3"
-}
-layer {
-  name: "pool5"
-  type: "Pooling"
-  bottom: "conv5_3"
-  top: "pool5"
-  pooling_param {
-    pool: MAX
-    kernel_size: 2
-    stride: 2
-  }
-}
-layer {
-  name: "fc6"
-  type: "Convolution"
-  bottom: "pool5"
-  top: "fc6"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 4096
-    pad: 0
-    kernel_size: 7
-    stride: 1
-  }
-}
-layer {
-  name: "relu6"
-  type: "ReLU"
-  bottom: "fc6"
-  top: "fc6"
-}
-layer {
-  name: "fc7"
-  type: "Convolution"
-  bottom: "fc6"
-  top: "fc7"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 4096
-    pad: 0
-    kernel_size: 1
-    stride: 1
-  }
-}
-layer {
-  name: "relu7"
-  type: "ReLU"
-  bottom: "fc7"
-  top: "fc7"
-}
-layer {
-  name: "score_fr"
-  type: "Convolution"
-  bottom: "fc7"
-  top: "score_fr"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 21
-    pad: 0
-    kernel_size: 1
-  }
-}
-layer {
-  name: "upscore"
-  type: "Deconvolution"
-  bottom: "score_fr"
-  top: "upscore"
-  param {
-    lr_mult: 0
-  }
-  convolution_param {
-    num_output: 21
-    bias_term: false
-    kernel_size: 64
-    stride: 32
-  }
-}
-layer {
-  name: "score"
-  type: "Crop"
-  bottom: "upscore"
-  bottom: "data"
-  top: "score"
-  crop_param {
-    axis: 2
-    offset: 19
-  }
-}
--- a/samples/data/dnn/fcn8s-heavy-pascal.prototxt
+++ b/samples/data/dnn/fcn8s-heavy-pascal.prototxt
@ -1,612 +0,0 @@
-#
-# This prototxt is based on voc-fcn8s/val.prototxt file from
-# https://github.com/shelhamer/fcn.berkeleyvision.org, which is distributed under
-# Caffe (BSD) license:
-# http://caffe.berkeleyvision.org/model_zoo.html#bvlc-model-license
-#
-name: "voc-fcn8s"
-input: "data"
-input_dim: 1
-input_dim: 3
-input_dim: 500
-input_dim: 500
-layer {
-  name: "conv1_1"
-  type: "Convolution"
-  bottom: "data"
-  top: "conv1_1"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 64
-    pad: 100
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu1_1"
-  type: "ReLU"
-  bottom: "conv1_1"
-  top: "conv1_1"
-}
-layer {
-  name: "conv1_2"
-  type: "Convolution"
-  bottom: "conv1_1"
-  top: "conv1_2"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 64
-    pad: 1
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu1_2"
-  type: "ReLU"
-  bottom: "conv1_2"
-  top: "conv1_2"
-}
-layer {
-  name: "pool1"
-  type: "Pooling"
-  bottom: "conv1_2"
-  top: "pool1"
-  pooling_param {
-    pool: MAX
-    kernel_size: 2
-    stride: 2
-  }
-}
-layer {
-  name: "conv2_1"
-  type: "Convolution"
-  bottom: "pool1"
-  top: "conv2_1"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 128
-    pad: 1
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu2_1"
-  type: "ReLU"
-  bottom: "conv2_1"
-  top: "conv2_1"
-}
-layer {
-  name: "conv2_2"
-  type: "Convolution"
-  bottom: "conv2_1"
-  top: "conv2_2"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 128
-    pad: 1
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu2_2"
-  type: "ReLU"
-  bottom: "conv2_2"
-  top: "conv2_2"
-}
-layer {
-  name: "pool2"
-  type: "Pooling"
-  bottom: "conv2_2"
-  top: "pool2"
-  pooling_param {
-    pool: MAX
-    kernel_size: 2
-    stride: 2
-  }
-}
-layer {
-  name: "conv3_1"
-  type: "Convolution"
-  bottom: "pool2"
-  top: "conv3_1"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 256
-    pad: 1
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu3_1"
-  type: "ReLU"
-  bottom: "conv3_1"
-  top: "conv3_1"
-}
-layer {
-  name: "conv3_2"
-  type: "Convolution"
-  bottom: "conv3_1"
-  top: "conv3_2"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 256
-    pad: 1
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu3_2"
-  type: "ReLU"
-  bottom: "conv3_2"
-  top: "conv3_2"
-}
-layer {
-  name: "conv3_3"
-  type: "Convolution"
-  bottom: "conv3_2"
-  top: "conv3_3"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 256
-    pad: 1
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu3_3"
-  type: "ReLU"
-  bottom: "conv3_3"
-  top: "conv3_3"
-}
-layer {
-  name: "pool3"
-  type: "Pooling"
-  bottom: "conv3_3"
-  top: "pool3"
-  pooling_param {
-    pool: MAX
-    kernel_size: 2
-    stride: 2
-  }
-}
-layer {
-  name: "conv4_1"
-  type: "Convolution"
-  bottom: "pool3"
-  top: "conv4_1"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 512
-    pad: 1
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu4_1"
-  type: "ReLU"
-  bottom: "conv4_1"
-  top: "conv4_1"
-}
-layer {
-  name: "conv4_2"
-  type: "Convolution"
-  bottom: "conv4_1"
-  top: "conv4_2"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 512
-    pad: 1
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu4_2"
-  type: "ReLU"
-  bottom: "conv4_2"
-  top: "conv4_2"
-}
-layer {
-  name: "conv4_3"
-  type: "Convolution"
-  bottom: "conv4_2"
-  top: "conv4_3"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 512
-    pad: 1
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu4_3"
-  type: "ReLU"
-  bottom: "conv4_3"
-  top: "conv4_3"
-}
-layer {
-  name: "pool4"
-  type: "Pooling"
-  bottom: "conv4_3"
-  top: "pool4"
-  pooling_param {
-    pool: MAX
-    kernel_size: 2
-    stride: 2
-  }
-}
-layer {
-  name: "conv5_1"
-  type: "Convolution"
-  bottom: "pool4"
-  top: "conv5_1"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 512
-    pad: 1
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu5_1"
-  type: "ReLU"
-  bottom: "conv5_1"
-  top: "conv5_1"
-}
-layer {
-  name: "conv5_2"
-  type: "Convolution"
-  bottom: "conv5_1"
-  top: "conv5_2"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 512
-    pad: 1
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu5_2"
-  type: "ReLU"
-  bottom: "conv5_2"
-  top: "conv5_2"
-}
-layer {
-  name: "conv5_3"
-  type: "Convolution"
-  bottom: "conv5_2"
-  top: "conv5_3"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 512
-    pad: 1
-    kernel_size: 3
-    stride: 1
-  }
-}
-layer {
-  name: "relu5_3"
-  type: "ReLU"
-  bottom: "conv5_3"
-  top: "conv5_3"
-}
-layer {
-  name: "pool5"
-  type: "Pooling"
-  bottom: "conv5_3"
-  top: "pool5"
-  pooling_param {
-    pool: MAX
-    kernel_size: 2
-    stride: 2
-  }
-}
-layer {
-  name: "fc6"
-  type: "Convolution"
-  bottom: "pool5"
-  top: "fc6"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 4096
-    pad: 0
-    kernel_size: 7
-    stride: 1
-  }
-}
-layer {
-  name: "relu6"
-  type: "ReLU"
-  bottom: "fc6"
-  top: "fc6"
-}
-layer {
-  name: "fc7"
-  type: "Convolution"
-  bottom: "fc6"
-  top: "fc7"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 4096
-    pad: 0
-    kernel_size: 1
-    stride: 1
-  }
-}
-layer {
-  name: "relu7"
-  type: "ReLU"
-  bottom: "fc7"
-  top: "fc7"
-}
-layer {
-  name: "score_fr"
-  type: "Convolution"
-  bottom: "fc7"
-  top: "score_fr"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 21
-    pad: 0
-    kernel_size: 1
-  }
-}
-layer {
-  name: "upscore2"
-  type: "Deconvolution"
-  bottom: "score_fr"
-  top: "upscore2"
-  param {
-    lr_mult: 0
-  }
-  convolution_param {
-    num_output: 21
-    bias_term: false
-    kernel_size: 4
-    stride: 2
-  }
-}
-layer {
-  name: "score_pool4"
-  type: "Convolution"
-  bottom: "pool4"
-  top: "score_pool4"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 21
-    pad: 0
-    kernel_size: 1
-  }
-}
-layer {
-  name: "score_pool4c"
-  type: "Crop"
-  bottom: "score_pool4"
-  bottom: "upscore2"
-  top: "score_pool4c"
-  crop_param {
-    axis: 2
-    offset: 5
-  }
-}
-layer {
-  name: "fuse_pool4"
-  type: "Eltwise"
-  bottom: "upscore2"
-  bottom: "score_pool4c"
-  top: "fuse_pool4"
-  eltwise_param {
-    operation: SUM
-  }
-}
-layer {
-  name: "upscore_pool4"
-  type: "Deconvolution"
-  bottom: "fuse_pool4"
-  top: "upscore_pool4"
-  param {
-    lr_mult: 0
-  }
-  convolution_param {
-    num_output: 21
-    bias_term: false
-    kernel_size: 4
-    stride: 2
-  }
-}
-layer {
-  name: "score_pool3"
-  type: "Convolution"
-  bottom: "pool3"
-  top: "score_pool3"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 21
-    pad: 0
-    kernel_size: 1
-  }
-}
-layer {
-  name: "score_pool3c"
-  type: "Crop"
-  bottom: "score_pool3"
-  bottom: "upscore_pool4"
-  top: "score_pool3c"
-  crop_param {
-    axis: 2
-    offset: 9
-  }
-}
-layer {
-  name: "fuse_pool3"
-  type: "Eltwise"
-  bottom: "upscore_pool4"
-  bottom: "score_pool3c"
-  top: "fuse_pool3"
-  eltwise_param {
-    operation: SUM
-  }
-}
-layer {
-  name: "upscore8"
-  type: "Deconvolution"
-  bottom: "fuse_pool3"
-  top: "upscore8"
-  param {
-    lr_mult: 0
-  }
-  convolution_param {
-    num_output: 21
-    bias_term: false
-    kernel_size: 16
-    stride: 8
-  }
-}
-layer {
-  name: "score"
-  type: "Crop"
-  bottom: "upscore8"
-  bottom: "data"
-  top: "score"
-  crop_param {
-    axis: 2
-    offset: 31
-  }
-}
--- a/samples/data/dnn/object_detection_classes_coco.txt
+++ b/samples/data/dnn/object_detection_classes_coco.txt
@ -0,0 +1,90 @@
+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+
+backpack
+umbrella
+
+
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+
+dining table
+
+
+toilet
+
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
--- a/samples/data/dnn/object_detection_classes_pascal_voc.txt
+++ b/samples/data/dnn/object_detection_classes_pascal_voc.txt
@ -0,0 +1,20 @@
+aeroplane
+bicycle
+bird
+boat
+bottle
+bus
+car
+cat
+chair
+cow
+diningtable
+dog
+horse
+motorbike
+person
+pottedplant
+sheep
+sofa
+train
+tvmonitor
--- a/samples/data/dnn/pascal-classes.txt
+++ b/samples/data/dnn/pascal-classes.txt
@ -1,21 +0,0 @@
-background 0 0 0
-aeroplane 128 0 0
-bicycle 0 128 0
-bird 128 128 0
-boat 0 0 128
-bottle 128 0 128
-bus 0 128 128
-car 128 128 128
-cat 64 0 0
-chair 192 0 0
-cow 64 128 0
-diningtable 192 128 0
-dog 64 0 128
-horse 192 0 128
-motorbike 64 128 128
-person 192 128 128
-pottedplant 0 64 0
-sheep 128 64 0
-sofa 0 192 0
-train 128 192 0
-tvmonitor 0 64 128
--- a/samples/data/dnn/rgb.jpg
+++ b/samples/data/dnn/rgb.jpg
--- a/samples/data/dnn/space_shuttle.jpg
+++ b/samples/data/dnn/space_shuttle.jpg
--- a/samples/data/dnn/synset_words.txt
+++ b/samples/data/dnn/synset_words.txt
--- a/samples/dnn/README.md
+++ b/samples/dnn/README.md
@ -0,0 +1,33 @@
+# OpenCV deep learning module samples
+
+## Model Zoo
+
+### Object detection
+
+|    Model | Scale |   Size WxH|   Mean subtraction | Channels order |
+|---------------|-------|-----------|--------------------|-------|
+| [MobileNet-SSD, Caffe](https://github.com/chuanqi305/MobileNet-SSD/) | `0.00784 (2/255)` | `300x300` | `127.5 127.5 127.5` | BGR |
+| [OpenCV face detector](https://github.com/opencv/opencv/tree/master/samples/dnn/face_detector) | `1.0` | `300x300` | `104 177 123` | BGR |
+| [SSDs from TensorFlow](https://github.com/tensorflow/models/tree/master/research/object_detection/) | `0.00784 (2/255)` | `300x300` | `127.5 127.5 127.5` | RGB |
+| [YOLO](https://pjreddie.com/darknet/yolo/) | `0.00392 (1/255)` | `416x416` | `0 0 0` | RGB |
+| [VGG16-SSD](https://github.com/weiliu89/caffe/tree/ssd) | `1.0` | `300x300` | `104 117 123` | BGR |
+| [Faster-RCNN](https://github.com/rbgirshick/py-faster-rcnn) | `1.0` | `800x600` | `102.9801, 115.9465, 122.7717` | BGR |
+| [R-FCN](https://github.com/YuwenXiong/py-R-FCN) | `1.0` | `800x600` | `102.9801 115.9465 122.7717` | BGR |
+
+### Classification
+|    Model | Scale |   Size WxH|   Mean subtraction | Channels order |
+|---------------|-------|-----------|--------------------|-------|
+| GoogLeNet | `1.0` | `224x224` | `104 117 123` | BGR |
+| [SqueezeNet](https://github.com/DeepScale/SqueezeNet) | `1.0` | `227x227` | `0 0 0` | BGR |
+
+### Semantic segmentation
+|    Model | Scale |   Size WxH|   Mean subtraction | Channels order |
+|---------------|-------|-----------|--------------------|-------|
+| [ENet](https://github.com/e-lab/ENet-training) | `0.00392 (1/255)` | `1024x512` | `0 0 0` | RGB |
+| FCN8s | `1.0` | `500x500` | `0 0 0` | BGR |
+
+## References
+* [Models downloading script](https://github.com/opencv/opencv_extra/blob/master/testdata/dnn/download_models.py)
+* [Configuration files adopted for OpenCV](https://github.com/opencv/opencv_extra/tree/master/testdata/dnn)
+* [How to import models from TensorFlow Object Detection API](https://github.com/opencv/opencv/wiki/TensorFlow-Object-Detection-API)
+* [Names of classes from different datasets](https://github.com/opencv/opencv/tree/master/samples/data/dnn)
--- a/samples/dnn/caffe_googlenet.cpp
+++ b/samples/dnn/caffe_googlenet.cpp
@ -1,181 +0,0 @@
-/**M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include <opencv2/dnn.hpp>
-#include <opencv2/imgproc.hpp>
-#include <opencv2/highgui.hpp>
-#include <opencv2/core/utils/trace.hpp>
-using namespace cv;
-using namespace cv::dnn;
-
-#include <fstream>
-#include <iostream>
-#include <cstdlib>
-using namespace std;
-
-/* Find best class for the blob (i. e. class with maximal probability) */
-static void getMaxClass(const Mat &probBlob, int *classId, double *classProb)
-{
-    Mat probMat = probBlob.reshape(1, 1); //reshape the blob to 1x1000 matrix
-    Point classNumber;
-
-    minMaxLoc(probMat, NULL, classProb, NULL, &classNumber);
-    *classId = classNumber.x;
-}
-
-static std::vector<String> readClassNames(const char *filename )
-{
-    std::vector<String> classNames;
-
-    std::ifstream fp(filename);
-    if (!fp.is_open())
-    {
-        std::cerr << "File with classes labels not found: " << filename << std::endl;
-        exit(-1);
-    }
-
-    std::string name;
-    while (!fp.eof())
-    {
-        std::getline(fp, name);
-        if (name.length())
-            classNames.push_back( name.substr(name.find(' ')+1) );
-    }
-
-    fp.close();
-    return classNames;
-}
-
-const char* params
-    = "{ help           | false | Sample app for loading googlenet model }"
-      "{ proto          | bvlc_googlenet.prototxt | model configuration }"
-      "{ model          | bvlc_googlenet.caffemodel | model weights }"
-      "{ label          | synset_words.txt | names of ILSVRC2012 classes }"
-      "{ image          | space_shuttle.jpg | path to image file }"
-      "{ opencl         | false | enable OpenCL }"
-;
-
-int main(int argc, char **argv)
-{
-    CV_TRACE_FUNCTION();
-
-    CommandLineParser parser(argc, argv, params);
-
-    if (parser.get<bool>("help"))
-    {
-        parser.printMessage();
-        return 0;
-    }
-
-    String modelTxt = parser.get<string>("proto");
-    String modelBin = parser.get<string>("model");
-    String imageFile = parser.get<String>("image");
-    String classNameFile = parser.get<String>("label");
-
-    Net net;
-    try {
-        //! [Read and initialize network]
-        net = dnn::readNetFromCaffe(modelTxt, modelBin);
-        //! [Read and initialize network]
-    }
-    catch (const cv::Exception& e) {
-        std::cerr << "Exception: " << e.what() << std::endl;
-        //! [Check that network was read successfully]
-        if (net.empty())
-        {
-            std::cerr << "Can't load network by using the following files: " << std::endl;
-            std::cerr << "prototxt:   " << modelTxt << std::endl;
-            std::cerr << "caffemodel: " << modelBin << std::endl;
-            std::cerr << "bvlc_googlenet.caffemodel can be downloaded here:" << std::endl;
-            std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
-            exit(-1);
-        }
-        //! [Check that network was read successfully]
-    }
-
-    if (parser.get<bool>("opencl"))
-    {
-        net.setPreferableTarget(DNN_TARGET_OPENCL);
-    }
-
-    //! [Prepare blob]
-    Mat img = imread(imageFile);
-    if (img.empty())
-    {
-        std::cerr << "Can't read image from the file: " << imageFile << std::endl;
-        exit(-1);
-    }
-
-    //GoogLeNet accepts only 224x224 BGR-images
-    Mat inputBlob = blobFromImage(img, 1.0f, Size(224, 224),
-                                  Scalar(104, 117, 123), false);   //Convert Mat to batch of images
-    //! [Prepare blob]
-    net.setInput(inputBlob, "data");        //set the network input
-    Mat prob = net.forward("prob");         //compute output
-
-    cv::TickMeter t;
-    for (int i = 0; i < 10; i++)
-    {
-        CV_TRACE_REGION("forward");
-        //! [Set input blob]
-        net.setInput(inputBlob, "data");        //set the network input
-        //! [Set input blob]
-        t.start();
-        //! [Make forward pass]
-        prob = net.forward("prob");                          //compute output
-        //! [Make forward pass]
-        t.stop();
-    }
-
-    //! [Gather output]
-    int classId;
-    double classProb;
-    getMaxClass(prob, &classId, &classProb);//find the best class
-    //! [Gather output]
-
-    //! [Print results]
-    std::vector<String> classNames = readClassNames(classNameFile.c_str());
-    std::cout << "Best class: #" << classId << " '" << classNames.at(classId) << "'" << std::endl;
-    std::cout << "Probability: " << classProb * 100 << "%" << std::endl;
-    //! [Print results]
-    std::cout << "Time: " << (double)t.getTimeMilli() / t.getCounter() << " ms (average from " << t.getCounter() << " iterations)" << std::endl;
-
-    return 0;
-} //main
--- a/samples/dnn/classification.cpp
+++ b/samples/dnn/classification.cpp
@ -0,0 +1,136 @@
+#include <fstream>
+#include <sstream>
+
+#include <opencv2/dnn.hpp>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/highgui.hpp>
+
+const char* keys =
+    "{ help  h     | | Print help message. }"
+    "{ input i     | | Path to input image or video file. Skip this argument to capture frames from a camera.}"
+    "{ model m     | | Path to a binary file of model contains trained weights. "
+                      "It could be a file with extensions .caffemodel (Caffe), "
+                      ".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet) }"
+    "{ config c    | | Path to a text file of model contains network configuration. "
+                      "It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet) }"
+    "{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }"
+    "{ classes     | | Optional path to a text file with names of classes. }"
+    "{ mean        | | Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces. }"
+    "{ scale       | 1 | Preprocess input image by multiplying on a scale factor. }"
+    "{ width       |   | Preprocess input image by resizing to a specific width. }"
+    "{ height      |   | Preprocess input image by resizing to a specific height. }"
+    "{ rgb         |   | Indicate that model works with RGB input images instead BGR ones. }"
+    "{ backend     | 0 | Choose one of computation backends: "
+                        "0: default C++ backend, "
+                        "1: Halide language (http://halide-lang.org/), "
+                        "2: Intel's Deep Learning Inference Engine (https://software.seek.intel.com/deep-learning-deployment)}"
+    "{ target      | 0 | Choose one of target computation devices: "
+                        "0: CPU target (by default),"
+                        "1: OpenCL }";
+
+using namespace cv;
+using namespace dnn;
+
+std::vector<std::string> classes;
+
+int main(int argc, char** argv)
+{
+    CommandLineParser parser(argc, argv, keys);
+    parser.about("Use this script to run classification deep learning networks using OpenCV.");
+    if (argc == 1 || parser.has("help"))
+    {
+        parser.printMessage();
+        return 0;
+    }
+
+    float scale = parser.get<float>("scale");
+    Scalar mean = parser.get<Scalar>("mean");
+    bool swapRB = parser.get<bool>("rgb");
+    CV_Assert(parser.has("width"), parser.has("height"));
+    int inpWidth = parser.get<int>("width");
+    int inpHeight = parser.get<int>("height");
+    String model = parser.get<String>("model");
+    String config = parser.get<String>("config");
+    String framework = parser.get<String>("framework");
+    int backendId = parser.get<int>("backend");
+    int targetId = parser.get<int>("target");
+
+    // Open file with classes names.
+    if (parser.has("classes"))
+    {
+        std::string file = parser.get<String>("classes");
+        std::ifstream ifs(file.c_str());
+        if (!ifs.is_open())
+            CV_Error(Error::StsError, "File " + file + " not found");
+        std::string line;
+        while (std::getline(ifs, line))
+        {
+            classes.push_back(line);
+        }
+    }
+
+    CV_Assert(parser.has("model"));
+    //! [Read and initialize network]
+    Net net = readNet(model, config, framework);
+    net.setPreferableBackend(backendId);
+    net.setPreferableTarget(targetId);
+    //! [Read and initialize network]
+
+    // Create a window
+    static const std::string kWinName = "Deep learning image classification in OpenCV";
+    namedWindow(kWinName, WINDOW_NORMAL);
+
+    //! [Open a video file or an image file or a camera stream]
+    VideoCapture cap;
+    if (parser.has("input"))
+        cap.open(parser.get<String>("input"));
+    else
+        cap.open(0);
+    //! [Open a video file or an image file or a camera stream]
+
+    // Process frames.
+    Mat frame, blob;
+    while (waitKey(1) < 0)
+    {
+        cap >> frame;
+        if (frame.empty())
+        {
+            waitKey();
+            break;
+        }
+
+        //! [Create a 4D blob from a frame]
+        blobFromImage(frame, blob, scale, Size(inpWidth, inpHeight), mean, swapRB, false);
+        //! [Create a 4D blob from a frame]
+
+        //! [Set input blob]
+        net.setInput(blob);
+        //! [Set input blob]
+        //! [Make forward pass]
+        Mat prob = net.forward();
+        //! [Make forward pass]
+
+        //! [Get a class with a highest score]
+        Point classIdPoint;
+        double confidence;
+        minMaxLoc(prob.reshape(1, 1), 0, &confidence, 0, &classIdPoint);
+        int classId = classIdPoint.x;
+        //! [Get a class with a highest score]
+
+        // Put efficiency information.
+        std::vector<double> layersTimes;
+        double freq = getTickFrequency() / 1000;
+        double t = net.getPerfProfile(layersTimes) / freq;
+        std::string label = format("Inference time: %.2f ms", t);
+        putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
+
+        // Print predicted class.
+        label = format("%s: %.4f", (classes.empty() ? format("Class #%d", classId).c_str() :
+                                                      classes[classId].c_str()),
+                                   confidence);
+        putText(frame, label, Point(0, 40), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
+
+        imshow(kWinName, frame);
+    }
+    return 0;
+}
--- a/samples/dnn/classification.py
+++ b/samples/dnn/classification.py
@ -0,0 +1,86 @@
+import cv2 as cv
+import argparse
+import numpy as np
+import sys
+
+backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE)
+targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL)
+
+parser = argparse.ArgumentParser(description='Use this script to run classification deep learning networks using OpenCV.')
+parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.')
+parser.add_argument('--model', required=True,
+                    help='Path to a binary file of model contains trained weights. '
+                         'It could be a file with extensions .caffemodel (Caffe), '
+                         '.pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet)')
+parser.add_argument('--config',
+                    help='Path to a text file of model contains network configuration. '
+                         'It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet)')
+parser.add_argument('--framework', choices=['caffe', 'tensorflow', 'torch', 'darknet'],
+                    help='Optional name of an origin framework of the model. '
+                         'Detect it automatically if it does not set.')
+parser.add_argument('--classes', help='Optional path to a text file with names of classes.')
+parser.add_argument('--mean', nargs='+', type=float, default=[0, 0, 0],
+                    help='Preprocess input image by subtracting mean values. '
+                         'Mean values should be in BGR order.')
+parser.add_argument('--scale', type=float, default=1.0,
+                    help='Preprocess input image by multiplying on a scale factor.')
+parser.add_argument('--width', type=int, required=True,
+                    help='Preprocess input image by resizing to a specific width.')
+parser.add_argument('--height', type=int, required=True,
+                    help='Preprocess input image by resizing to a specific height.')
+parser.add_argument('--rgb', action='store_true',
+                    help='Indicate that model works with RGB input images instead BGR ones.')
+parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
+                    help="Choose one of computation backends: "
+                         "%d: default C++ backend, "
+                         "%d: Halide language (http://halide-lang.org/), "
+                         "%d: Intel's Deep Learning Inference Engine (https://software.seek.intel.com/deep-learning-deployment)" % backends)
+parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int,
+                    help='Choose one of target computation devices: '
+                         '%d: CPU target (by default), '
+                         '%d: OpenCL' % targets)
+args = parser.parse_args()
+
+# Load names of classes
+classes = None
+if args.classes:
+    with open(args.classes, 'rt') as f:
+        classes = f.read().rstrip('\n').split('\n')
+
+# Load a network
+net = cv.dnn.readNet(args.model, args.config, args.framework)
+net.setPreferableBackend(args.backend)
+net.setPreferableTarget(args.target)
+
+winName = 'Deep learning image classification in OpenCV'
+cv.namedWindow(winName, cv.WINDOW_NORMAL)
+
+cap = cv.VideoCapture(args.input if args.input else 0)
+while cv.waitKey(1) < 0:
+    hasFrame, frame = cap.read()
+    if not hasFrame:
+        cv.waitKey()
+        break
+
+    # Create a 4D blob from a frame.
+    blob = cv.dnn.blobFromImage(frame, args.scale, (args.width, args.height), args.mean, args.rgb, crop=False)
+
+    # Run a model
+    net.setInput(blob)
+    out = net.forward()
+
+    # Get a class with a highest score.
+    out = out.flatten()
+    classId = np.argmax(out)
+    confidence = out[classId]
+
+    # Put efficiency information.
+    t, _ = net.getPerfProfile()
+    label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
+    cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
+
+    # Print predicted class.
+    label = '%s: %.4f' % (classes[classId] if classes else 'Class #%d' % classId, confidence)
+    cv.putText(frame, label, (0, 40), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
+
+    cv.imshow(winName, frame)
--- a/samples/dnn/faster_rcnn.cpp
+++ b/samples/dnn/faster_rcnn.cpp
@ -1,93 +0,0 @@
-#include <opencv2/dnn.hpp>
-#include <opencv2/dnn/all_layers.hpp>
-#include <opencv2/imgproc.hpp>
-#include <opencv2/highgui.hpp>
-
-using namespace cv;
-using namespace dnn;
-
-const char* keys =
-    "{ help  h |     | print help message  }"
-    "{ proto p |     | path to .prototxt   }"
-    "{ model m |     | path to .caffemodel }"
-    "{ image i |     | path to input image }"
-    "{ conf  c | 0.8 | minimal confidence  }";
-
-const char* classNames[] = {
-    "__background__",
-    "aeroplane", "bicycle", "bird", "boat",
-    "bottle", "bus", "car", "cat", "chair",
-    "cow", "diningtable", "dog", "horse",
-    "motorbike", "person", "pottedplant",
-    "sheep", "sofa", "train", "tvmonitor"
-};
-
-static const int kInpWidth = 800;
-static const int kInpHeight = 600;
-
-int main(int argc, char** argv)
-{
-    // Parse command line arguments.
-    CommandLineParser parser(argc, argv, keys);
-    parser.about("This sample is used to run Faster-RCNN and R-FCN object detection "
-                 "models with OpenCV. You can get required models from "
-                 "https://github.com/rbgirshick/py-faster-rcnn (Faster-RCNN) and from "
-                 "https://github.com/YuwenXiong/py-R-FCN (R-FCN). Corresponding .prototxt "
-                 "files may be found at https://github.com/opencv/opencv_extra/tree/master/testdata/dnn.");
-    if (argc == 1 || parser.has("help"))
-    {
-        parser.printMessage();
-        return 0;
-    }
-
-    String protoPath = parser.get<String>("proto");
-    String modelPath = parser.get<String>("model");
-    String imagePath = parser.get<String>("image");
-    float confThreshold = parser.get<float>("conf");
-    CV_Assert(!protoPath.empty(), !modelPath.empty(), !imagePath.empty());
-
-    // Load a model.
-    Net net = readNetFromCaffe(protoPath, modelPath);
-
-    Mat img = imread(imagePath);
-    resize(img, img, Size(kInpWidth, kInpHeight));
-    Mat blob = blobFromImage(img, 1.0, Size(), Scalar(102.9801, 115.9465, 122.7717), false, false);
-    Mat imInfo = (Mat_<float>(1, 3) << img.rows, img.cols, 1.6f);
-
-    net.setInput(blob, "data");
-    net.setInput(imInfo, "im_info");
-
-    // Draw detections.
-    Mat detections = net.forward();
-    const float* data = (float*)detections.data;
-    for (size_t i = 0; i < detections.total(); i += 7)
-    {
-        // An every detection is a vector [id, classId, confidence, left, top, right, bottom]
-        float confidence = data[i + 2];
-        if (confidence > confThreshold)
-        {
-            int classId = (int)data[i + 1];
-            int left = max(0, min((int)data[i + 3], img.cols - 1));
-            int top = max(0, min((int)data[i + 4], img.rows - 1));
-            int right = max(0, min((int)data[i + 5], img.cols - 1));
-            int bottom = max(0, min((int)data[i + 6], img.rows - 1));
-
-            // Draw a bounding box.
-            rectangle(img, Point(left, top), Point(right, bottom), Scalar(0, 255, 0));
-
-            // Put a label with a class name and confidence.
-            String label = cv::format("%s, %.3f", classNames[classId], confidence);
-            int baseLine;
-            Size labelSize = cv::getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
-
-            top = max(top, labelSize.height);
-            rectangle(img, Point(left, top - labelSize.height),
-                      Point(left + labelSize.width, top + baseLine),
-                      Scalar(255, 255, 255), FILLED);
-            putText(img, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0));
-        }
-    }
-    imshow("frame", img);
-    waitKey();
-    return 0;
-}
--- a/samples/dnn/fcn_semsegm.cpp
+++ b/samples/dnn/fcn_semsegm.cpp
@ -1,138 +0,0 @@
-#include <opencv2/dnn.hpp>
-#include <opencv2/imgproc.hpp>
-#include <opencv2/highgui.hpp>
-using namespace cv;
-using namespace cv::dnn;
-
-#include <fstream>
-#include <iostream>
-#include <cstdlib>
-using namespace std;
-
-static const string fcnType = "fcn8s";
-
-static vector<cv::Vec3b> readColors(const string &filename = "pascal-classes.txt")
-{
-    vector<cv::Vec3b> colors;
-
-    ifstream fp(filename.c_str());
-    if (!fp.is_open())
-    {
-        cerr << "File with colors not found: " << filename << endl;
-        exit(-1);
-    }
-
-    string line;
-    while (!fp.eof())
-    {
-        getline(fp, line);
-        if (line.length())
-        {
-            stringstream ss(line);
-
-            string name; ss >> name;
-            int temp;
-            cv::Vec3b color;
-            ss >> temp; color[0] = (uchar)temp;
-            ss >> temp; color[1] = (uchar)temp;
-            ss >> temp; color[2] = (uchar)temp;
-            colors.push_back(color);
-        }
-    }
-
-    fp.close();
-    return colors;
-}
-
-static void colorizeSegmentation(const Mat &score, const vector<cv::Vec3b> &colors, cv::Mat &segm)
-{
-    const int rows = score.size[2];
-    const int cols = score.size[3];
-    const int chns = score.size[1];
-
-    cv::Mat maxCl=cv::Mat::zeros(rows, cols, CV_8UC1);
-    cv::Mat maxVal(rows, cols, CV_32FC1, cv::Scalar(-FLT_MAX));
-    for (int ch = 0; ch < chns; ch++)
-    {
-        for (int row = 0; row < rows; row++)
-        {
-            const float *ptrScore = score.ptr<float>(0, ch, row);
-            uchar *ptrMaxCl = maxCl.ptr<uchar>(row);
-            float *ptrMaxVal = maxVal.ptr<float>(row);
-            for (int col = 0; col < cols; col++)
-            {
-                if (ptrScore[col] > ptrMaxVal[col])
-                {
-                    ptrMaxVal[col] = ptrScore[col];
-                    ptrMaxCl[col] = (uchar)ch;
-                }
-            }
-        }
-    }
-
-    segm.create(rows, cols, CV_8UC3);
-    for (int row = 0; row < rows; row++)
-    {
-        const uchar *ptrMaxCl = maxCl.ptr<uchar>(row);
-        cv::Vec3b *ptrSegm = segm.ptr<cv::Vec3b>(row);
-        for (int col = 0; col < cols; col++)
-        {
-            ptrSegm[col] = colors[ptrMaxCl[col]];
-        }
-    }
-
-}
-
-int main(int argc, char **argv)
-{
-    String modelTxt = fcnType + "-heavy-pascal.prototxt";
-    String modelBin = fcnType + "-heavy-pascal.caffemodel";
-    String imageFile = (argc > 1) ? argv[1] : "rgb.jpg";
-
-    vector<cv::Vec3b> colors = readColors();
-
-    //! [Initialize network]
-    dnn::Net net = readNetFromCaffe(modelTxt, modelBin);
-    //! [Initialize network]
-
-    if (net.empty())
-    {
-        cerr << "Can't load network by using the following files: " << endl;
-        cerr << "prototxt:   " << modelTxt << endl;
-        cerr << "caffemodel: " << modelBin << endl;
-        cerr << fcnType << "-heavy-pascal.caffemodel can be downloaded here:" << endl;
-        cerr << "http://dl.caffe.berkeleyvision.org/" << fcnType << "-heavy-pascal.caffemodel" << endl;
-        exit(-1);
-    }
-
-    //! [Prepare blob]
-    Mat img = imread(imageFile);
-    if (img.empty())
-    {
-        cerr << "Can't read image from the file: " << imageFile << endl;
-        exit(-1);
-    }
-
-    resize(img, img, Size(500, 500), 0, 0, INTER_LINEAR_EXACT);       //FCN accepts 500x500 BGR-images
-    Mat inputBlob = blobFromImage(img, 1, Size(), Scalar(), false);   //Convert Mat to batch of images
-    //! [Prepare blob]
-
-    //! [Set input blob]
-    net.setInput(inputBlob, "data");        //set the network input
-    //! [Set input blob]
-
-    //! [Make forward pass]
-    double t = (double)cv::getTickCount();
-    Mat score = net.forward("score");                          //compute output
-    t = (double)cv::getTickCount() - t;
-    printf("processing time: %.1fms\n", t*1000./getTickFrequency());
-    //! [Make forward pass]
-
-    Mat colorize;
-    colorizeSegmentation(score, colors, colorize);
-    Mat show;
-    addWeighted(img, 0.4, colorize, 0.6, 0.0, show);
-    imshow("show", show);
-    waitKey(0);
-    return 0;
-} //main
--- a/samples/dnn/googlenet_python.py
+++ b/samples/dnn/googlenet_python.py
@ -1,24 +0,0 @@
-from __future__ import print_function
-import numpy as np
-import cv2 as cv
-from cv2 import dnn
-import timeit
-
-def timeit_forward(net):
-    print("Runtime:", timeit.timeit(lambda: net.forward(), number=10))
-
-def get_class_list():
-    with open('synset_words.txt', 'rt') as f:
-        return [x[x.find(" ") + 1:] for x in f]
-
-blob = dnn.blobFromImage(cv.imread('space_shuttle.jpg'), 1, (224, 224), (104, 117, 123), False)
-print("Input:", blob.shape, blob.dtype)
-
-net = dnn.readNetFromCaffe('bvlc_googlenet.prototxt', 'bvlc_googlenet.caffemodel')
-net.setInput(blob)
-prob = net.forward()
-#timeit_forward(net)        #Uncomment to check performance
-
-print("Output:", prob.shape, prob.dtype)
-classes = get_class_list()
-print("Best match", classes[prob.argmax()])
--- a/samples/dnn/mobilenet_ssd_python.py
+++ b/samples/dnn/mobilenet_ssd_python.py
@ -1,132 +0,0 @@
-# This script is used to demonstrate MobileNet-SSD network using OpenCV deep learning module.
-#
-# It works with model taken from https://github.com/chuanqi305/MobileNet-SSD/ that
-# was trained in Caffe-SSD framework, https://github.com/weiliu89/caffe/tree/ssd.
-# Model detects objects from 20 classes.
-#
-# Also TensorFlow model from TensorFlow object detection model zoo may be used to
-# detect objects from 90 classes:
-# http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_11_06_2017.tar.gz
-# Text graph definition must be taken from opencv_extra:
-# https://github.com/opencv/opencv_extra/tree/master/testdata/dnn/ssd_mobilenet_v1_coco.pbtxt
-import numpy as np
-import argparse
-
-try:
-    import cv2 as cv
-except ImportError:
-    raise ImportError('Can\'t find OpenCV Python module. If you\'ve built it from sources without installation, '
-                      'configure environment variable PYTHONPATH to "opencv_build_dir/lib" directory (with "python3" subdirectory if required)')
-
-inWidth = 300
-inHeight = 300
-WHRatio = inWidth / float(inHeight)
-inScaleFactor = 0.007843
-meanVal = 127.5
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description='Script to run MobileNet-SSD object detection network '
-                    'trained either in Caffe or TensorFlow frameworks.')
-    parser.add_argument("--video", help="path to video file. If empty, camera's stream will be used")
-    parser.add_argument("--prototxt", default="MobileNetSSD_deploy.prototxt",
-                                      help='Path to text network file: '
-                                           'MobileNetSSD_deploy.prototxt for Caffe model or '
-                                           'ssd_mobilenet_v1_coco.pbtxt from opencv_extra for TensorFlow model')
-    parser.add_argument("--weights", default="MobileNetSSD_deploy.caffemodel",
-                                     help='Path to weights: '
-                                          'MobileNetSSD_deploy.caffemodel for Caffe model or '
-                                          'frozen_inference_graph.pb from TensorFlow.')
-    parser.add_argument("--num_classes", default=20, type=int,
-                        help="Number of classes. It's 20 for Caffe model from "
-                             "https://github.com/chuanqi305/MobileNet-SSD/ and 90 for "
-                             "TensorFlow model from http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_11_06_2017.tar.gz")
-    parser.add_argument("--thr", default=0.2, type=float, help="confidence threshold to filter out weak detections")
-    args = parser.parse_args()
-
-    if args.num_classes == 20:
-        net = cv.dnn.readNetFromCaffe(args.prototxt, args.weights)
-        swapRB = False
-        classNames = { 0: 'background',
-            1: 'aeroplane', 2: 'bicycle', 3: 'bird', 4: 'boat',
-            5: 'bottle', 6: 'bus', 7: 'car', 8: 'cat', 9: 'chair',
-            10: 'cow', 11: 'diningtable', 12: 'dog', 13: 'horse',
-            14: 'motorbike', 15: 'person', 16: 'pottedplant',
-            17: 'sheep', 18: 'sofa', 19: 'train', 20: 'tvmonitor' }
-    else:
-        assert(args.num_classes == 90)
-        net = cv.dnn.readNetFromTensorflow(args.weights, args.prototxt)
-        swapRB = True
-        classNames = { 0: 'background',
-            1: 'person', 2: 'bicycle', 3: 'car', 4: 'motorcycle', 5: 'airplane', 6: 'bus',
-            7: 'train', 8: 'truck', 9: 'boat', 10: 'traffic light', 11: 'fire hydrant',
-            13: 'stop sign', 14: 'parking meter', 15: 'bench', 16: 'bird', 17: 'cat',
-            18: 'dog', 19: 'horse', 20: 'sheep', 21: 'cow', 22: 'elephant', 23: 'bear',
-            24: 'zebra', 25: 'giraffe', 27: 'backpack', 28: 'umbrella', 31: 'handbag',
-            32: 'tie', 33: 'suitcase', 34: 'frisbee', 35: 'skis', 36: 'snowboard',
-            37: 'sports ball', 38: 'kite', 39: 'baseball bat', 40: 'baseball glove',
-            41: 'skateboard', 42: 'surfboard', 43: 'tennis racket', 44: 'bottle',
-            46: 'wine glass', 47: 'cup', 48: 'fork', 49: 'knife', 50: 'spoon',
-            51: 'bowl', 52: 'banana', 53: 'apple', 54: 'sandwich', 55: 'orange',
-            56: 'broccoli', 57: 'carrot', 58: 'hot dog', 59: 'pizza', 60: 'donut',
-            61: 'cake', 62: 'chair', 63: 'couch', 64: 'potted plant', 65: 'bed',
-            67: 'dining table', 70: 'toilet', 72: 'tv', 73: 'laptop', 74: 'mouse',
-            75: 'remote', 76: 'keyboard', 77: 'cell phone', 78: 'microwave', 79: 'oven',
-            80: 'toaster', 81: 'sink', 82: 'refrigerator', 84: 'book', 85: 'clock',
-            86: 'vase', 87: 'scissors', 88: 'teddy bear', 89: 'hair drier', 90: 'toothbrush' }
-
-    if args.video:
-        cap = cv.VideoCapture(args.video)
-    else:
-        cap = cv.VideoCapture(0)
-
-    while True:
-        # Capture frame-by-frame
-        ret, frame = cap.read()
-        blob = cv.dnn.blobFromImage(frame, inScaleFactor, (inWidth, inHeight), (meanVal, meanVal, meanVal), swapRB)
-        net.setInput(blob)
-        detections = net.forward()
-
-        cols = frame.shape[1]
-        rows = frame.shape[0]
-
-        if cols / float(rows) > WHRatio:
-            cropSize = (int(rows * WHRatio), rows)
-        else:
-            cropSize = (cols, int(cols / WHRatio))
-
-        y1 = int((rows - cropSize[1]) / 2)
-        y2 = y1 + cropSize[1]
-        x1 = int((cols - cropSize[0]) / 2)
-        x2 = x1 + cropSize[0]
-        frame = frame[y1:y2, x1:x2]
-
-        cols = frame.shape[1]
-        rows = frame.shape[0]
-
-        for i in range(detections.shape[2]):
-            confidence = detections[0, 0, i, 2]
-            if confidence > args.thr:
-                class_id = int(detections[0, 0, i, 1])
-
-                xLeftBottom = int(detections[0, 0, i, 3] * cols)
-                yLeftBottom = int(detections[0, 0, i, 4] * rows)
-                xRightTop   = int(detections[0, 0, i, 5] * cols)
-                yRightTop   = int(detections[0, 0, i, 6] * rows)
-
-                cv.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop),
-                              (0, 255, 0))
-                if class_id in classNames:
-                    label = classNames[class_id] + ": " + str(confidence)
-                    labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
-
-                    yLeftBottom = max(yLeftBottom, labelSize[1])
-                    cv.rectangle(frame, (xLeftBottom, yLeftBottom - labelSize[1]),
-                                         (xLeftBottom + labelSize[0], yLeftBottom + baseLine),
-                                         (255, 255, 255), cv.FILLED)
-                    cv.putText(frame, label, (xLeftBottom, yLeftBottom),
-                                cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
-
-        cv.imshow("detections", frame)
-        if cv.waitKey(1) >= 0:
-            break
--- a/samples/dnn/object_detection.cpp
+++ b/samples/dnn/object_detection.cpp
@ -0,0 +1,229 @@
+#include <fstream>
+#include <sstream>
+
+#include <opencv2/dnn.hpp>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/highgui.hpp>
+
+const char* keys =
+    "{ help  h     | | Print help message. }"
+    "{ input i     | | Path to input image or video file. Skip this argument to capture frames from a camera.}"
+    "{ model m     | | Path to a binary file of model contains trained weights. "
+                      "It could be a file with extensions .caffemodel (Caffe), "
+                      ".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet) }"
+    "{ config c    | | Path to a text file of model contains network configuration. "
+                      "It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet) }"
+    "{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }"
+    "{ classes     | | Optional path to a text file with names of classes to label detected objects. }"
+    "{ mean        | | Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces. }"
+    "{ scale       |  1 | Preprocess input image by multiplying on a scale factor. }"
+    "{ width       | -1 | Preprocess input image by resizing to a specific width. }"
+    "{ height      | -1 | Preprocess input image by resizing to a specific height. }"
+    "{ rgb         |    | Indicate that model works with RGB input images instead BGR ones. }"
+    "{ thr         | .5 | Confidence threshold. }"
+    "{ backend     |  0 | Choose one of computation backends: "
+                         "0: default C++ backend, "
+                         "1: Halide language (http://halide-lang.org/), "
+                         "2: Intel's Deep Learning Inference Engine (https://software.seek.intel.com/deep-learning-deployment)}"
+    "{ target      |  0 | Choose one of target computation devices: "
+                         "0: CPU target (by default),"
+                         "1: OpenCL }";
+
+using namespace cv;
+using namespace dnn;
+
+float confThreshold;
+std::vector<std::string> classes;
+
+void postprocess(Mat& frame, const Mat& out, Net& net);
+
+void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame);
+
+void callback(int pos, void* userdata);
+
+int main(int argc, char** argv)
+{
+    CommandLineParser parser(argc, argv, keys);
+    parser.about("Use this script to run object detection deep learning networks using OpenCV.");
+    if (argc == 1 || parser.has("help"))
+    {
+        parser.printMessage();
+        return 0;
+    }
+
+    confThreshold = parser.get<float>("thr");
+    float scale = parser.get<float>("scale");
+    Scalar mean = parser.get<Scalar>("mean");
+    bool swapRB = parser.get<bool>("rgb");
+    int inpWidth = parser.get<int>("width");
+    int inpHeight = parser.get<int>("height");
+
+    // Open file with classes names.
+    if (parser.has("classes"))
+    {
+        std::string file = parser.get<String>("classes");
+        std::ifstream ifs(file.c_str());
+        if (!ifs.is_open())
+            CV_Error(Error::StsError, "File " + file + " not found");
+        std::string line;
+        while (std::getline(ifs, line))
+        {
+            classes.push_back(line);
+        }
+    }
+
+    // Load a model.
+    CV_Assert(parser.has("model"));
+    Net net = readNet(parser.get<String>("model"), parser.get<String>("config"), parser.get<String>("framework"));
+    net.setPreferableBackend(parser.get<int>("backend"));
+    net.setPreferableTarget(parser.get<int>("target"));
+
+    // Create a window
+    static const std::string kWinName = "Deep learning object detection in OpenCV";
+    namedWindow(kWinName, WINDOW_NORMAL);
+    int initialConf = (int)(confThreshold * 100);
+    createTrackbar("Confidence threshold, %", kWinName, &initialConf, 99, callback);
+
+    // Open a video file or an image file or a camera stream.
+    VideoCapture cap;
+    if (parser.has("input"))
+        cap.open(parser.get<String>("input"));
+    else
+        cap.open(0);
+
+    // Process frames.
+    Mat frame, blob;
+    while (waitKey(1) < 0)
+    {
+        cap >> frame;
+        if (frame.empty())
+        {
+            waitKey();
+            break;
+        }
+
+        // Create a 4D blob from a frame.
+        Size inpSize(inpWidth > 0 ? inpWidth : frame.cols,
+                     inpHeight > 0 ? inpHeight : frame.rows);
+        blobFromImage(frame, blob, scale, inpSize, mean, swapRB, false);
+
+        // Run a model.
+        net.setInput(blob);
+        if (net.getLayer(0)->outputNameToIndex("im_info") != -1)  // Faster-RCNN or R-FCN
+        {
+            resize(frame, frame, inpSize);
+            Mat imInfo = (Mat_<float>(1, 3) << inpSize.height, inpSize.width, 1.6f);
+            net.setInput(imInfo, "im_info");
+        }
+        Mat out = net.forward();
+
+        postprocess(frame, out, net);
+
+        // Put efficiency information.
+        std::vector<double> layersTimes;
+        double freq = getTickFrequency() / 1000;
+        double t = net.getPerfProfile(layersTimes) / freq;
+        std::string label = format("Inference time: %.2f ms", t);
+        putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
+
+        imshow(kWinName, frame);
+    }
+    return 0;
+}
+
+void postprocess(Mat& frame, const Mat& out, Net& net)
+{
+    static std::vector<int> outLayers = net.getUnconnectedOutLayers();
+    static std::string outLayerType = net.getLayer(outLayers[0])->type;
+
+    float* data = (float*)out.data;
+    if (net.getLayer(0)->outputNameToIndex("im_info") != -1)  // Faster-RCNN or R-FCN
+    {
+        // Network produces output blob with a shape 1x1xNx7 where N is a number of
+        // detections and an every detection is a vector of values
+        // [batchId, classId, confidence, left, top, right, bottom]
+        for (size_t i = 0; i < out.total(); i += 7)
+        {
+            float confidence = data[i + 2];
+            if (confidence > confThreshold)
+            {
+                int left = (int)data[i + 3];
+                int top = (int)data[i + 4];
+                int right = (int)data[i + 5];
+                int bottom = (int)data[i + 6];
+                int classId = (int)(data[i + 1]) - 1;  // Skip 0th background class id.
+                drawPred(classId, confidence, left, top, right, bottom, frame);
+            }
+        }
+    }
+    else if (outLayerType == "DetectionOutput")
+    {
+        // Network produces output blob with a shape 1x1xNx7 where N is a number of
+        // detections and an every detection is a vector of values
+        // [batchId, classId, confidence, left, top, right, bottom]
+        for (size_t i = 0; i < out.total(); i += 7)
+        {
+            float confidence = data[i + 2];
+            if (confidence > confThreshold)
+            {
+                int left = (int)(data[i + 3] * frame.cols);
+                int top = (int)(data[i + 4] * frame.rows);
+                int right = (int)(data[i + 5] * frame.cols);
+                int bottom = (int)(data[i + 6] * frame.rows);
+                int classId = (int)(data[i + 1]) - 1;  // Skip 0th background class id.
+                drawPred(classId, confidence, left, top, right, bottom, frame);
+            }
+        }
+    }
+    else if (outLayerType == "Region")
+    {
+        // Network produces output blob with a shape NxC where N is a number of
+        // detected objects and C is a number of classes + 4 where the first 4
+        // numbers are [center_x, center_y, width, height]
+        for (int i = 0; i < out.rows; ++i, data += out.cols)
+        {
+            Mat confidences = out.row(i).colRange(5, out.cols);
+            Point classIdPoint;
+            double confidence;
+            minMaxLoc(confidences, 0, &confidence, 0, &classIdPoint);
+            if (confidence > confThreshold)
+            {
+                int classId = classIdPoint.x;
+                int centerX = (int)(data[0] * frame.cols);
+                int centerY = (int)(data[1] * frame.rows);
+                int width = (int)(data[2] * frame.cols);
+                int height = (int)(data[3] * frame.rows);
+                int left = centerX - width / 2;
+                int top = centerY - height / 2;
+                drawPred(classId, (float)confidence, left, top, left + width, top + height, frame);
+            }
+        }
+    }
+    else
+        CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType);
+}
+
+void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame)
+{
+    rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 255, 0));
+
+    std::string label = format("%.2f", conf);
+    if (!classes.empty())
+    {
+        CV_Assert(classId < (int)classes.size());
+        label = classes[classId] + ": " + label;
+    }
+
+    int baseLine;
+    Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
+
+    top = max(top, labelSize.height);
+    rectangle(frame, Point(left, top - labelSize.height),
+              Point(left + labelSize.width, top + baseLine), Scalar::all(255), FILLED);
+    putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.5, Scalar());
+}
+
+void callback(int pos, void*)
+{
+    confThreshold = pos * 0.01f;
+}
--- a/samples/dnn/object_detection.py
+++ b/samples/dnn/object_detection.py
@ -0,0 +1,164 @@
+import cv2 as cv
+import argparse
+import sys
+import numpy as np
+
+backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE)
+targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL)
+
+parser = argparse.ArgumentParser(description='Use this script to run object detection deep learning networks using OpenCV.')
+parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.')
+parser.add_argument('--model', required=True,
+                    help='Path to a binary file of model contains trained weights. '
+                         'It could be a file with extensions .caffemodel (Caffe), '
+                         '.pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet)')
+parser.add_argument('--config',
+                    help='Path to a text file of model contains network configuration. '
+                         'It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet)')
+parser.add_argument('--framework', choices=['caffe', 'tensorflow', 'torch', 'darknet'],
+                    help='Optional name of an origin framework of the model. '
+                         'Detect it automatically if it does not set.')
+parser.add_argument('--classes', help='Optional path to a text file with names of classes to label detected objects.')
+parser.add_argument('--mean', nargs='+', type=float, default=[0, 0, 0],
+                    help='Preprocess input image by subtracting mean values. '
+                         'Mean values should be in BGR order.')
+parser.add_argument('--scale', type=float, default=1.0,
+                    help='Preprocess input image by multiplying on a scale factor.')
+parser.add_argument('--width', type=int,
+                    help='Preprocess input image by resizing to a specific width.')
+parser.add_argument('--height', type=int,
+                    help='Preprocess input image by resizing to a specific height.')
+parser.add_argument('--rgb', action='store_true',
+                    help='Indicate that model works with RGB input images instead BGR ones.')
+parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold')
+parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
+                    help="Choose one of computation backends: "
+                         "%d: default C++ backend, "
+                         "%d: Halide language (http://halide-lang.org/), "
+                         "%d: Intel's Deep Learning Inference Engine (https://software.seek.intel.com/deep-learning-deployment)" % backends)
+parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int,
+                    help='Choose one of target computation devices: '
+                         '%d: CPU target (by default), '
+                         '%d: OpenCL' % targets)
+args = parser.parse_args()
+
+# Load names of classes
+classes = None
+if args.classes:
+    with open(args.classes, 'rt') as f:
+        classes = f.read().rstrip('\n').split('\n')
+
+# Load a network
+net = cv.dnn.readNet(args.model, args.config, args.framework)
+net.setPreferableBackend(args.backend)
+net.setPreferableTarget(args.target)
+
+confThreshold = args.thr
+
+def postprocess(frame, out):
+    frameHeight = frame.shape[0]
+    frameWidth = frame.shape[1]
+
+    def drawPred(classId, conf, left, top, right, bottom):
+        # Draw a bounding box.
+        cv.rectangle(frame, (left, top), (right, bottom), (0, 255, 0))
+
+        label = '%.2f' % confidence
+
+        # Print a label of class.
+        if classes:
+            assert(classId < len(classes))
+            label = '%s: %s' % (classes[classId], label)
+
+        labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+        top = max(top, labelSize[1])
+        cv.rectangle(frame, (left, top - labelSize[1]), (left + labelSize[0], top + baseLine), (255, 255, 255), cv.FILLED)
+        cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
+
+    layerNames = net.getLayerNames()
+    lastLayerId = net.getLayerId(layerNames[-1])
+    lastLayer = net.getLayer(lastLayerId)
+
+    if net.getLayer(0).outputNameToIndex('im_info') != -1:  # Faster-RCNN or R-FCN
+        # Network produces output blob with a shape 1x1xNx7 where N is a number of
+        # detections and an every detection is a vector of values
+        # [batchId, classId, confidence, left, top, right, bottom]
+        for detection in out[0, 0]:
+            confidence = detection[2]
+            if confidence > confThreshold:
+                left = int(detection[3])
+                top = int(detection[4])
+                right = int(detection[5])
+                bottom = int(detection[6])
+                classId = int(detection[1]) - 1  # Skip background label
+                drawPred(classId, confidence, left, top, right, bottom)
+    elif lastLayer.type == 'DetectionOutput':
+        # Network produces output blob with a shape 1x1xNx7 where N is a number of
+        # detections and an every detection is a vector of values
+        # [batchId, classId, confidence, left, top, right, bottom]
+        for detection in out[0, 0]:
+            confidence = detection[2]
+            if confidence > confThreshold:
+                left = int(detection[3] * frameWidth)
+                top = int(detection[4] * frameHeight)
+                right = int(detection[5] * frameWidth)
+                bottom = int(detection[6] * frameHeight)
+                classId = int(detection[1]) - 1  # Skip background label
+                drawPred(classId, confidence, left, top, right, bottom)
+    elif lastLayer.type == 'Region':
+        # Network produces output blob with a shape NxC where N is a number of
+        # detected objects and C is a number of classes + 4 where the first 4
+        # numbers are [center_x, center_y, width, height]
+        for detection in out:
+            confidences = detection[5:]
+            classId = np.argmax(confidences)
+            confidence = confidences[classId]
+            if confidence > confThreshold:
+                center_x = int(detection[0] * frameWidth)
+                center_y = int(detection[1] * frameHeight)
+                width = int(detection[2] * frameWidth)
+                height = int(detection[3] * frameHeight)
+                left = center_x - width / 2
+                top = center_y - height / 2
+                drawPred(classId, confidence, left, top, left + width, top + height)
+
+# Process inputs
+winName = 'Deep learning object detection in OpenCV'
+cv.namedWindow(winName, cv.WINDOW_NORMAL)
+
+def callback(pos):
+    global confThreshold
+    confThreshold = pos / 100.0
+
+cv.createTrackbar('Confidence threshold, %', winName, int(confThreshold * 100), 99, callback)
+
+cap = cv.VideoCapture(args.input if args.input else 0)
+while cv.waitKey(1) < 0:
+    hasFrame, frame = cap.read()
+    if not hasFrame:
+        cv.waitKey()
+        break
+
+    frameHeight = frame.shape[0]
+    frameWidth = frame.shape[1]
+
+    # Create a 4D blob from a frame.
+    inpWidth = args.width if args.width else frameWidth
+    inpHeight = args.height if args.height else frameHeight
+    blob = cv.dnn.blobFromImage(frame, args.scale, (inpWidth, inpHeight), args.mean, args.rgb, crop=False)
+
+    # Run a model
+    net.setInput(blob)
+    if net.getLayer(0).outputNameToIndex('im_info') != -1:  # Faster-RCNN or R-FCN
+        frame = cv.resize(frame, (inpWidth, inpHeight))
+        net.setInput(np.array([inpHeight, inpWidth, 1.6], dtype=np.float32), 'im_info');
+    out = net.forward()
+
+    postprocess(frame, out)
+
+    # Put efficiency information.
+    t, _ = net.getPerfProfile()
+    label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
+    cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
+
+    cv.imshow(winName, frame)
--- a/samples/dnn/resnet_ssd_face.cpp
+++ b/samples/dnn/resnet_ssd_face.cpp
@ -1,164 +0,0 @@
-#include <opencv2/dnn.hpp>
-#include <opencv2/imgproc.hpp>
-#include <opencv2/highgui.hpp>
-#include <iostream>
-
-using namespace cv;
-using namespace std;
-using namespace cv::dnn;
-
-const size_t inWidth = 300;
-const size_t inHeight = 300;
-const double inScaleFactor = 1.0;
-const Scalar meanVal(104.0, 177.0, 123.0);
-
-const char* about = "This sample uses Single-Shot Detector "
-                    "(https://arxiv.org/abs/1512.02325) "
-                    "with ResNet-10 architecture to detect faces on camera/video/image.\n"
-                    "More information about the training is available here: "
-                    "<OPENCV_SRC_DIR>/samples/dnn/face_detector/how_to_train_face_detector.txt\n"
-                    ".caffemodel model's file is available here: "
-                    "<OPENCV_SRC_DIR>/samples/dnn/face_detector/res10_300x300_ssd_iter_140000.caffemodel\n"
-                    ".prototxt file is available here: "
-                    "<OPENCV_SRC_DIR>/samples/dnn/face_detector/deploy.prototxt\n";
-
-const char* params
-    = "{ help           | false | print usage          }"
-      "{ proto          |       | model configuration (deploy.prototxt) }"
-      "{ model          |       | model weights (res10_300x300_ssd_iter_140000.caffemodel) }"
-      "{ camera_device  | 0     | camera device number }"
-      "{ video          |       | video or image for detection }"
-      "{ opencl         | false | enable OpenCL }"
-      "{ min_confidence | 0.5   | min confidence       }";
-
-int main(int argc, char** argv)
-{
-    CommandLineParser parser(argc, argv, params);
-
-    if (parser.get<bool>("help"))
-    {
-        cout << about << endl;
-        parser.printMessage();
-        return 0;
-    }
-
-    String modelConfiguration = parser.get<string>("proto");
-    String modelBinary = parser.get<string>("model");
-
-    //! [Initialize network]
-    dnn::Net net = readNetFromCaffe(modelConfiguration, modelBinary);
-    //! [Initialize network]
-
-    if (net.empty())
-    {
-        cerr << "Can't load network by using the following files: " << endl;
-        cerr << "prototxt:   " << modelConfiguration << endl;
-        cerr << "caffemodel: " << modelBinary << endl;
-        cerr << "Models are available here:" << endl;
-        cerr << "<OPENCV_SRC_DIR>/samples/dnn/face_detector" << endl;
-        cerr << "or here:" << endl;
-        cerr << "https://github.com/opencv/opencv/tree/master/samples/dnn/face_detector" << endl;
-        exit(-1);
-    }
-
-    if (parser.get<bool>("opencl"))
-    {
-        net.setPreferableTarget(DNN_TARGET_OPENCL);
-    }
-
-    VideoCapture cap;
-    if (parser.get<String>("video").empty())
-    {
-        int cameraDevice = parser.get<int>("camera_device");
-        cap = VideoCapture(cameraDevice);
-        if(!cap.isOpened())
-        {
-            cout << "Couldn't find camera: " << cameraDevice << endl;
-            return -1;
-        }
-    }
-    else
-    {
-        cap.open(parser.get<String>("video"));
-        if(!cap.isOpened())
-        {
-            cout << "Couldn't open image or video: " << parser.get<String>("video") << endl;
-            return -1;
-        }
-    }
-
-    for(;;)
-    {
-        Mat frame;
-        cap >> frame; // get a new frame from camera/video or read image
-
-        if (frame.empty())
-        {
-            waitKey();
-            break;
-        }
-
-        if (frame.channels() == 4)
-            cvtColor(frame, frame, COLOR_BGRA2BGR);
-
-        //! [Prepare blob]
-        Mat inputBlob = blobFromImage(frame, inScaleFactor,
-                                      Size(inWidth, inHeight), meanVal, false, false); //Convert Mat to batch of images
-        //! [Prepare blob]
-
-        //! [Set input blob]
-        net.setInput(inputBlob, "data"); //set the network input
-        //! [Set input blob]
-
-        //! [Make forward pass]
-        Mat detection = net.forward("detection_out"); //compute output
-        //! [Make forward pass]
-
-        vector<double> layersTimings;
-        double freq = getTickFrequency() / 1000;
-        double time = net.getPerfProfile(layersTimings) / freq;
-
-        Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
-
-        ostringstream ss;
-        ss << "FPS: " << 1000/time << " ; time: " << time << " ms";
-        putText(frame, ss.str(), Point(20,20), 0, 0.5, Scalar(0,0,255));
-
-        float confidenceThreshold = parser.get<float>("min_confidence");
-        for(int i = 0; i < detectionMat.rows; i++)
-        {
-            float confidence = detectionMat.at<float>(i, 2);
-
-            if(confidence > confidenceThreshold)
-            {
-                int xLeftBottom = static_cast<int>(detectionMat.at<float>(i, 3) * frame.cols);
-                int yLeftBottom = static_cast<int>(detectionMat.at<float>(i, 4) * frame.rows);
-                int xRightTop = static_cast<int>(detectionMat.at<float>(i, 5) * frame.cols);
-                int yRightTop = static_cast<int>(detectionMat.at<float>(i, 6) * frame.rows);
-
-                Rect object((int)xLeftBottom, (int)yLeftBottom,
-                            (int)(xRightTop - xLeftBottom),
-                            (int)(yRightTop - yLeftBottom));
-
-                rectangle(frame, object, Scalar(0, 255, 0));
-
-                ss.str("");
-                ss << confidence;
-                String conf(ss.str());
-                String label = "Face: " + conf;
-                int baseLine = 0;
-                Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
-                rectangle(frame, Rect(Point(xLeftBottom, yLeftBottom - labelSize.height),
-                                      Size(labelSize.width, labelSize.height + baseLine)),
-                          Scalar(255, 255, 255), FILLED);
-                putText(frame, label, Point(xLeftBottom, yLeftBottom),
-                        FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0));
-            }
-        }
-
-        imshow("detections", frame);
-        if (waitKey(1) >= 0) break;
-    }
-
-    return 0;
-} // main
--- a/samples/dnn/resnet_ssd_face_python.py
+++ b/samples/dnn/resnet_ssd_face_python.py
@ -1,55 +0,0 @@
-import numpy as np
-import argparse
-import cv2 as cv
-try:
-    import cv2 as cv
-except ImportError:
-    raise ImportError('Can\'t find OpenCV Python module. If you\'ve built it from sources without installation, '
-                      'configure environment variable PYTHONPATH to "opencv_build_dir/lib" directory (with "python3" subdirectory if required)')
-
-from cv2 import dnn
-
-inWidth = 300
-inHeight = 300
-confThreshold = 0.5
-
-prototxt = 'face_detector/deploy.prototxt'
-caffemodel = 'face_detector/res10_300x300_ssd_iter_140000.caffemodel'
-
-if __name__ == '__main__':
-    net = dnn.readNetFromCaffe(prototxt, caffemodel)
-    cap = cv.VideoCapture(0)
-    while True:
-        ret, frame = cap.read()
-        cols = frame.shape[1]
-        rows = frame.shape[0]
-
-        net.setInput(dnn.blobFromImage(frame, 1.0, (inWidth, inHeight), (104.0, 177.0, 123.0), False, False))
-        detections = net.forward()
-
-        perf_stats = net.getPerfProfile()
-
-        print('Inference time, ms: %.2f' % (perf_stats[0] / cv.getTickFrequency() * 1000))
-
-        for i in range(detections.shape[2]):
-            confidence = detections[0, 0, i, 2]
-            if confidence > confThreshold:
-                xLeftBottom = int(detections[0, 0, i, 3] * cols)
-                yLeftBottom = int(detections[0, 0, i, 4] * rows)
-                xRightTop = int(detections[0, 0, i, 5] * cols)
-                yRightTop = int(detections[0, 0, i, 6] * rows)
-
-                cv.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop),
-                             (0, 255, 0))
-                label = "face: %.4f" % confidence
-                labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
-
-                cv.rectangle(frame, (xLeftBottom, yLeftBottom - labelSize[1]),
-                                    (xLeftBottom + labelSize[0], yLeftBottom + baseLine),
-                                    (255, 255, 255), cv.FILLED)
-                cv.putText(frame, label, (xLeftBottom, yLeftBottom),
-                           cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
-
-        cv.imshow("detections", frame)
-        if cv.waitKey(1) != -1:
-            break
--- a/samples/dnn/segmentation.cpp
+++ b/samples/dnn/segmentation.cpp
@ -0,0 +1,237 @@
+#include <fstream>
+#include <sstream>
+
+#include <opencv2/dnn.hpp>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/highgui.hpp>
+
+const char* keys =
+    "{ help  h     | | Print help message. }"
+    "{ input i     | | Path to input image or video file. Skip this argument to capture frames from a camera.}"
+    "{ model m     | | Path to a binary file of model contains trained weights. "
+                      "It could be a file with extensions .caffemodel (Caffe), "
+                      ".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet) }"
+    "{ config c    | | Path to a text file of model contains network configuration. "
+                      "It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet) }"
+    "{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }"
+    "{ classes     | | Optional path to a text file with names of classes. }"
+    "{ colors      | | Optional path to a text file with colors for an every class. "
+                      "An every color is represented with three values from 0 to 255 in BGR channels order. }"
+    "{ mean        | | Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces. }"
+    "{ scale       | 1 | Preprocess input image by multiplying on a scale factor. }"
+    "{ width       |   | Preprocess input image by resizing to a specific width. }"
+    "{ height      |   | Preprocess input image by resizing to a specific height. }"
+    "{ rgb         |   | Indicate that model works with RGB input images instead BGR ones. }"
+    "{ backend     | 0 | Choose one of computation backends: "
+                        "0: default C++ backend, "
+                        "1: Halide language (http://halide-lang.org/), "
+                        "2: Intel's Deep Learning Inference Engine (https://software.seek.intel.com/deep-learning-deployment)}"
+    "{ target      | 0 | Choose one of target computation devices: "
+                        "0: CPU target (by default),"
+                        "1: OpenCL }";
+
+using namespace cv;
+using namespace dnn;
+
+std::vector<std::string> classes;
+std::vector<Vec3b> colors;
+
+void showLegend();
+
+void colorizeSegmentation(const Mat &score, Mat &segm);
+
+int main(int argc, char** argv)
+{
+    CommandLineParser parser(argc, argv, keys);
+    parser.about("Use this script to run semantic segmentation deep learning networks using OpenCV.");
+    if (argc == 1 || parser.has("help"))
+    {
+        parser.printMessage();
+        return 0;
+    }
+
+    float scale = parser.get<float>("scale");
+    Scalar mean = parser.get<Scalar>("mean");
+    bool swapRB = parser.get<bool>("rgb");
+    CV_Assert(parser.has("width"), parser.has("height"));
+    int inpWidth = parser.get<int>("width");
+    int inpHeight = parser.get<int>("height");
+    String model = parser.get<String>("model");
+    String config = parser.get<String>("config");
+    String framework = parser.get<String>("framework");
+    int backendId = parser.get<int>("backend");
+    int targetId = parser.get<int>("target");
+
+    // Open file with classes names.
+    if (parser.has("classes"))
+    {
+        std::string file = parser.get<String>("classes");
+        std::ifstream ifs(file.c_str());
+        if (!ifs.is_open())
+            CV_Error(Error::StsError, "File " + file + " not found");
+        std::string line;
+        while (std::getline(ifs, line))
+        {
+            classes.push_back(line);
+        }
+    }
+
+    // Open file with colors.
+    if (parser.has("colors"))
+    {
+        std::string file = parser.get<String>("colors");
+        std::ifstream ifs(file.c_str());
+        if (!ifs.is_open())
+            CV_Error(Error::StsError, "File " + file + " not found");
+        std::string line;
+        while (std::getline(ifs, line))
+        {
+            std::istringstream colorStr(line.c_str());
+
+            Vec3b color;
+            for (int i = 0; i < 3 && !colorStr.eof(); ++i)
+                colorStr >> color[i];
+            colors.push_back(color);
+        }
+    }
+
+    CV_Assert(parser.has("model"));
+    //! [Read and initialize network]
+    Net net = readNet(model, config, framework);
+    net.setPreferableBackend(backendId);
+    net.setPreferableTarget(targetId);
+    //! [Read and initialize network]
+
+    // Create a window
+    static const std::string kWinName = "Deep learning semantic segmentation in OpenCV";
+    namedWindow(kWinName, WINDOW_NORMAL);
+
+    //! [Open a video file or an image file or a camera stream]
+    VideoCapture cap;
+    if (parser.has("input"))
+        cap.open(parser.get<String>("input"));
+    else
+        cap.open(0);
+    //! [Open a video file or an image file or a camera stream]
+
+    // Process frames.
+    Mat frame, blob;
+    while (waitKey(1) < 0)
+    {
+        cap >> frame;
+        if (frame.empty())
+        {
+            waitKey();
+            break;
+        }
+
+        //! [Create a 4D blob from a frame]
+        blobFromImage(frame, blob, scale, Size(inpWidth, inpHeight), mean, swapRB, false);
+        //! [Create a 4D blob from a frame]
+
+        //! [Set input blob]
+        net.setInput(blob);
+        //! [Set input blob]
+        //! [Make forward pass]
+        Mat score = net.forward();
+        //! [Make forward pass]
+
+        Mat segm;
+        colorizeSegmentation(score, segm);
+
+        resize(segm, segm, frame.size(), 0, 0, INTER_NEAREST);
+        addWeighted(frame, 0.1, segm, 0.9, 0.0, frame);
+
+        // Put efficiency information.
+        std::vector<double> layersTimes;
+        double freq = getTickFrequency() / 1000;
+        double t = net.getPerfProfile(layersTimes) / freq;
+        std::string label = format("Inference time: %.2f ms", t);
+        putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
+
+        imshow(kWinName, frame);
+        if (!classes.empty())
+            showLegend();
+    }
+    return 0;
+}
+
+void colorizeSegmentation(const Mat &score, Mat &segm)
+{
+    const int rows = score.size[2];
+    const int cols = score.size[3];
+    const int chns = score.size[1];
+
+    if (colors.empty())
+    {
+        // Generate colors.
+        colors.push_back(Vec3b());
+        for (int i = 1; i < chns; ++i)
+        {
+            Vec3b color;
+            for (int j = 0; j < 3; ++j)
+                color[j] = (colors[i - 1][j] + rand() % 256) / 2;
+            colors.push_back(color);
+        }
+    }
+    else if (chns != (int)colors.size())
+    {
+        CV_Error(Error::StsError, format("Number of output classes does not match "
+                                         "number of colors (%d != %d)", chns, colors.size()));
+    }
+
+    Mat maxCl = Mat::zeros(rows, cols, CV_8UC1);
+    Mat maxVal(rows, cols, CV_32FC1, score.data);
+    for (int ch = 1; ch < chns; ch++)
+    {
+        for (int row = 0; row < rows; row++)
+        {
+            const float *ptrScore = score.ptr<float>(0, ch, row);
+            uint8_t *ptrMaxCl = maxCl.ptr<uint8_t>(row);
+            float *ptrMaxVal = maxVal.ptr<float>(row);
+            for (int col = 0; col < cols; col++)
+            {
+                if (ptrScore[col] > ptrMaxVal[col])
+                {
+                    ptrMaxVal[col] = ptrScore[col];
+                    ptrMaxCl[col] = (uchar)ch;
+                }
+            }
+        }
+    }
+
+    segm.create(rows, cols, CV_8UC3);
+    for (int row = 0; row < rows; row++)
+    {
+        const uchar *ptrMaxCl = maxCl.ptr<uchar>(row);
+        Vec3b *ptrSegm = segm.ptr<Vec3b>(row);
+        for (int col = 0; col < cols; col++)
+        {
+            ptrSegm[col] = colors[ptrMaxCl[col]];
+        }
+    }
+}
+
+void showLegend()
+{
+    static const int kBlockHeight = 30;
+    static Mat legend;
+    if (legend.empty())
+    {
+        const int numClasses = (int)classes.size();
+        if ((int)colors.size() != numClasses)
+        {
+            CV_Error(Error::StsError, format("Number of output classes does not match "
+                                             "number of labels (%d != %d)", colors.size(), classes.size()));
+        }
+        legend.create(kBlockHeight * numClasses, 200, CV_8UC3);
+        for (int i = 0; i < numClasses; i++)
+        {
+            Mat block = legend.rowRange(i * kBlockHeight, (i + 1) * kBlockHeight);
+            block.setTo(colors[i]);
+            putText(block, classes[i], Point(0, kBlockHeight / 2), FONT_HERSHEY_SIMPLEX, 0.5, Vec3b(255, 255, 255));
+        }
+        namedWindow("Legend", WINDOW_NORMAL);
+        imshow("Legend", legend);
+    }
+}
--- a/samples/dnn/segmentation.py
+++ b/samples/dnn/segmentation.py
@ -0,0 +1,125 @@
+import cv2 as cv
+import argparse
+import numpy as np
+import sys
+
+backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE)
+targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL)
+
+parser = argparse.ArgumentParser(description='Use this script to run semantic segmentation deep learning networks using OpenCV.')
+parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.')
+parser.add_argument('--model', required=True,
+                    help='Path to a binary file of model contains trained weights. '
+                         'It could be a file with extensions .caffemodel (Caffe), '
+                         '.pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet)')
+parser.add_argument('--config',
+                    help='Path to a text file of model contains network configuration. '
+                         'It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet)')
+parser.add_argument('--framework', choices=['caffe', 'tensorflow', 'torch', 'darknet'],
+                    help='Optional name of an origin framework of the model. '
+                         'Detect it automatically if it does not set.')
+parser.add_argument('--classes', help='Optional path to a text file with names of classes.')
+parser.add_argument('--colors', help='Optional path to a text file with colors for an every class. '
+                                     'An every color is represented with three values from 0 to 255 in BGR channels order.')
+parser.add_argument('--mean', nargs='+', type=float, default=[0, 0, 0],
+                    help='Preprocess input image by subtracting mean values. '
+                         'Mean values should be in BGR order.')
+parser.add_argument('--scale', type=float, default=1.0,
+                    help='Preprocess input image by multiplying on a scale factor.')
+parser.add_argument('--width', type=int, required=True,
+                    help='Preprocess input image by resizing to a specific width.')
+parser.add_argument('--height', type=int, required=True,
+                    help='Preprocess input image by resizing to a specific height.')
+parser.add_argument('--rgb', action='store_true',
+                    help='Indicate that model works with RGB input images instead BGR ones.')
+parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
+                    help="Choose one of computation backends: "
+                         "%d: default C++ backend, "
+                         "%d: Halide language (http://halide-lang.org/), "
+                         "%d: Intel's Deep Learning Inference Engine (https://software.seek.intel.com/deep-learning-deployment)" % backends)
+parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int,
+                    help='Choose one of target computation devices: '
+                         '%d: CPU target (by default), '
+                         '%d: OpenCL' % targets)
+args = parser.parse_args()
+
+np.random.seed(324)
+
+# Load names of classes
+classes = None
+if args.classes:
+    with open(args.classes, 'rt') as f:
+        classes = f.read().rstrip('\n').split('\n')
+
+# Load colors
+colors = None
+if args.colors:
+    with open(args.colors, 'rt') as f:
+        colors = [np.array(color.split(' '), np.uint8) for color in f.read().rstrip('\n').split('\n')]
+
+legend = None
+def showLegend(classes):
+    global legend
+    if not classes is None and legend is None:
+        blockHeight = 30
+        assert(len(classes) == len(colors))
+
+        legend = np.zeros((blockHeight * len(colors), 200, 3), np.uint8)
+        for i in range(len(classes)):
+            block = legend[i * blockHeight:(i + 1) * blockHeight]
+            block[:,:] = colors[i]
+            cv.putText(block, classes[i], (0, blockHeight/2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255))
+
+        cv.namedWindow('Legend', cv.WINDOW_NORMAL)
+        cv.imshow('Legend', legend)
+        classes = None
+
+# Load a network
+net = cv.dnn.readNet(args.model, args.config, args.framework)
+net.setPreferableBackend(args.backend)
+net.setPreferableTarget(args.target)
+
+winName = 'Deep learning image classification in OpenCV'
+cv.namedWindow(winName, cv.WINDOW_NORMAL)
+
+cap = cv.VideoCapture(args.input if args.input else 0)
+legend = None
+while cv.waitKey(1) < 0:
+    hasFrame, frame = cap.read()
+    if not hasFrame:
+        cv.waitKey()
+        break
+
+    # Create a 4D blob from a frame.
+    blob = cv.dnn.blobFromImage(frame, args.scale, (args.width, args.height), args.mean, args.rgb, crop=False)
+
+    # Run a model
+    net.setInput(blob)
+    score = net.forward()
+
+    numClasses = score.shape[1]
+    height = score.shape[2]
+    width = score.shape[3]
+
+    # Draw segmentation
+    if not colors:
+        # Generate colors
+        colors = [np.array([0, 0, 0], np.uint8)]
+        for i in range(1, numClasses):
+            colors.append((colors[i - 1] + np.random.randint(0, 256, [3], np.uint8)) / 2)
+
+    classIds = np.argmax(score[0], axis=0)
+    segm = np.stack([colors[idx] for idx in classIds.flatten()])
+    segm = segm.reshape(height, width, 3)
+
+    segm = cv.resize(segm, (frame.shape[1], frame.shape[0]), interpolation=cv.INTER_NEAREST)
+    frame = (0.1 * frame + 0.9 * segm).astype(np.uint8)
+
+    # Put efficiency information.
+    t, _ = net.getPerfProfile()
+    label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
+    cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
+
+    showLegend(classes)
+
+    cv.imshow(winName, frame)
--- a/samples/dnn/squeezenet_halide.cpp
+++ b/samples/dnn/squeezenet_halide.cpp
@ -1,110 +0,0 @@
-// Sample of using Halide backend in OpenCV deep learning module.
-// Based on caffe_googlenet.cpp.
-
-#include <opencv2/dnn.hpp>
-#include <opencv2/imgproc.hpp>
-#include <opencv2/highgui.hpp>
-using namespace cv;
-using namespace cv::dnn;
-
-#include <fstream>
-#include <iostream>
-#include <cstdlib>
-
-/* Find best class for the blob (i. e. class with maximal probability) */
-static void getMaxClass(const Mat &probBlob, int *classId, double *classProb)
-{
-    Mat probMat = probBlob.reshape(1, 1); //reshape the blob to 1x1000 matrix
-    Point classNumber;
-
-    minMaxLoc(probMat, NULL, classProb, NULL, &classNumber);
-    *classId = classNumber.x;
-}
-
-static std::vector<std::string> readClassNames(const char *filename = "synset_words.txt")
-{
-    std::vector<std::string> classNames;
-
-    std::ifstream fp(filename);
-    if (!fp.is_open())
-    {
-        std::cerr << "File with classes labels not found: " << filename << std::endl;
-        exit(-1);
-    }
-
-    std::string name;
-    while (!fp.eof())
-    {
-        std::getline(fp, name);
-        if (name.length())
-            classNames.push_back( name.substr(name.find(' ')+1) );
-    }
-
-    fp.close();
-    return classNames;
-}
-
-int main(int argc, char **argv)
-{
-    std::string modelTxt = "train_val.prototxt";
-    std::string modelBin = "squeezenet_v1.1.caffemodel";
-    std::string imageFile = (argc > 1) ? argv[1] : "space_shuttle.jpg";
-
-    //! [Read and initialize network]
-    Net net = dnn::readNetFromCaffe(modelTxt, modelBin);
-    //! [Read and initialize network]
-
-    //! [Check that network was read successfully]
-    if (net.empty())
-    {
-        std::cerr << "Can't load network by using the following files: " << std::endl;
-        std::cerr << "prototxt:   " << modelTxt << std::endl;
-        std::cerr << "caffemodel: " << modelBin << std::endl;
-        std::cerr << "SqueezeNet v1.1 can be downloaded from:" << std::endl;
-        std::cerr << "https://github.com/DeepScale/SqueezeNet/tree/master/SqueezeNet_v1.1" << std::endl;
-        exit(-1);
-    }
-    //! [Check that network was read successfully]
-
-    //! [Prepare blob]
-    Mat img = imread(imageFile);
-    if (img.empty())
-    {
-        std::cerr << "Can't read image from the file: " << imageFile << std::endl;
-        exit(-1);
-    }
-    if (img.channels() != 3)
-    {
-        std::cerr << "Image " << imageFile << " isn't 3-channel" << std::endl;
-        exit(-1);
-    }
-
-    Mat inputBlob = blobFromImage(img, 1.0, Size(227, 227), Scalar(), false, false);  // Convert Mat to 4-dimensional batch.
-    //! [Prepare blob]
-
-    //! [Set input blob]
-    net.setInput(inputBlob);                         // Set the network input.
-    //! [Set input blob]
-
-    //! [Enable Halide backend]
-    net.setPreferableBackend(DNN_BACKEND_HALIDE);    // Tell engine to use Halide where it possible.
-    //! [Enable Halide backend]
-
-    //! [Make forward pass]
-    Mat prob = net.forward("prob");                  // Compute output.
-    //! [Make forward pass]
-
-    //! [Determine the best class]
-    int classId;
-    double classProb;
-    getMaxClass(prob, &classId, &classProb);         // Find the best class.
-    //! [Determine the best class]
-
-    //! [Print results]
-    std::vector<std::string> classNames = readClassNames();
-    std::cout << "Best class: #" << classId << " '" << classNames.at(classId) << "'" << std::endl;
-    std::cout << "Probability: " << classProb * 100 << "%" << std::endl;
-    //! [Print results]
-
-    return 0;
-} //main
--- a/samples/dnn/ssd_mobilenet_object_detection.cpp
+++ b/samples/dnn/ssd_mobilenet_object_detection.cpp
@ -1,187 +0,0 @@
-#include <opencv2/dnn.hpp>
-#include <opencv2/dnn/shape_utils.hpp>
-#include <opencv2/imgproc.hpp>
-#include <opencv2/highgui.hpp>
-#include <iostream>
-
-using namespace cv;
-using namespace std;
-using namespace cv::dnn;
-
-const size_t inWidth = 300;
-const size_t inHeight = 300;
-const float inScaleFactor = 0.007843f;
-const float meanVal = 127.5;
-const char* classNames[] = {"background",
-                            "aeroplane", "bicycle", "bird", "boat",
-                            "bottle", "bus", "car", "cat", "chair",
-                            "cow", "diningtable", "dog", "horse",
-                            "motorbike", "person", "pottedplant",
-                            "sheep", "sofa", "train", "tvmonitor"};
-
-const String keys
-    = "{ help           | false | print usage         }"
-      "{ proto          | MobileNetSSD_deploy.prototxt   | model configuration }"
-      "{ model          | MobileNetSSD_deploy.caffemodel | model weights }"
-      "{ camera_device  | 0     | camera device number }"
-      "{ camera_width   | 640   | camera device width  }"
-      "{ camera_height  | 480   | camera device height }"
-      "{ video          |       | video or image for detection}"
-      "{ out            |       | path to output video file}"
-      "{ min_confidence | 0.2   | min confidence      }"
-      "{ opencl         | false | enable OpenCL }"
-;
-
-int main(int argc, char** argv)
-{
-    CommandLineParser parser(argc, argv, keys);
-    parser.about("This sample uses MobileNet Single-Shot Detector "
-                 "(https://arxiv.org/abs/1704.04861) "
-                 "to detect objects on camera/video/image.\n"
-                 ".caffemodel model's file is available here: "
-                 "https://github.com/chuanqi305/MobileNet-SSD\n"
-                 "Default network is 300x300 and 20-classes VOC.\n");
-
-    if (parser.get<bool>("help"))
-    {
-        parser.printMessage();
-        return 0;
-    }
-
-    String modelConfiguration = parser.get<String>("proto");
-    String modelBinary = parser.get<String>("model");
-    CV_Assert(!modelConfiguration.empty() && !modelBinary.empty());
-
-    //! [Initialize network]
-    dnn::Net net = readNetFromCaffe(modelConfiguration, modelBinary);
-    //! [Initialize network]
-
-    if (parser.get<bool>("opencl"))
-    {
-        net.setPreferableTarget(DNN_TARGET_OPENCL);
-    }
-
-    if (net.empty())
-    {
-        cerr << "Can't load network by using the following files: " << endl;
-        cerr << "prototxt:   " << modelConfiguration << endl;
-        cerr << "caffemodel: " << modelBinary << endl;
-        cerr << "Models can be downloaded here:" << endl;
-        cerr << "https://github.com/chuanqi305/MobileNet-SSD" << endl;
-        exit(-1);
-    }
-
-    VideoCapture cap;
-    if (!parser.has("video"))
-    {
-        int cameraDevice = parser.get<int>("camera_device");
-        cap = VideoCapture(cameraDevice);
-        if(!cap.isOpened())
-        {
-            cout << "Couldn't find camera: " << cameraDevice << endl;
-            return -1;
-        }
-
-        cap.set(CAP_PROP_FRAME_WIDTH, parser.get<int>("camera_width"));
-        cap.set(CAP_PROP_FRAME_HEIGHT, parser.get<int>("camera_height"));
-    }
-    else
-    {
-        cap.open(parser.get<String>("video"));
-        if(!cap.isOpened())
-        {
-            cout << "Couldn't open image or video: " << parser.get<String>("video") << endl;
-            return -1;
-        }
-    }
-
-    //Acquire input size
-    Size inVideoSize((int) cap.get(CAP_PROP_FRAME_WIDTH),
-                     (int) cap.get(CAP_PROP_FRAME_HEIGHT));
-
-    double fps = cap.get(CAP_PROP_FPS);
-    int fourcc = static_cast<int>(cap.get(CAP_PROP_FOURCC));
-    VideoWriter outputVideo;
-    outputVideo.open(parser.get<String>("out") ,
-                     (fourcc != 0 ? fourcc : VideoWriter::fourcc('M','J','P','G')),
-                     (fps != 0 ? fps : 10.0), inVideoSize, true);
-
-    for(;;)
-    {
-        Mat frame;
-        cap >> frame; // get a new frame from camera/video or read image
-
-        if (frame.empty())
-        {
-            waitKey();
-            break;
-        }
-
-        if (frame.channels() == 4)
-            cvtColor(frame, frame, COLOR_BGRA2BGR);
-
-        //! [Prepare blob]
-        Mat inputBlob = blobFromImage(frame, inScaleFactor,
-                                      Size(inWidth, inHeight),
-                                      Scalar(meanVal, meanVal, meanVal),
-                                      false, false); //Convert Mat to batch of images
-        //! [Prepare blob]
-
-        //! [Set input blob]
-        net.setInput(inputBlob); //set the network input
-        //! [Set input blob]
-
-        //! [Make forward pass]
-        Mat detection = net.forward(); //compute output
-        //! [Make forward pass]
-
-        vector<double> layersTimings;
-        double freq = getTickFrequency() / 1000;
-        double time = net.getPerfProfile(layersTimings) / freq;
-
-        Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
-
-        if (!outputVideo.isOpened())
-        {
-            putText(frame, format("FPS: %.2f ; time: %.2f ms", 1000.f/time, time),
-                    Point(20,20), 0, 0.5, Scalar(0,0,255));
-        }
-        else
-            cout << "Inference time, ms: " << time << endl;
-
-        float confidenceThreshold = parser.get<float>("min_confidence");
-        for(int i = 0; i < detectionMat.rows; i++)
-        {
-            float confidence = detectionMat.at<float>(i, 2);
-
-            if(confidence > confidenceThreshold)
-            {
-                size_t objectClass = (size_t)(detectionMat.at<float>(i, 1));
-
-                int left = static_cast<int>(detectionMat.at<float>(i, 3) * frame.cols);
-                int top = static_cast<int>(detectionMat.at<float>(i, 4) * frame.rows);
-                int right = static_cast<int>(detectionMat.at<float>(i, 5) * frame.cols);
-                int bottom = static_cast<int>(detectionMat.at<float>(i, 6) * frame.rows);
-
-                rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 255, 0));
-                String label = format("%s: %.2f", classNames[objectClass], confidence);
-                int baseLine = 0;
-                Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
-                top = max(top, labelSize.height);
-                rectangle(frame, Point(left, top - labelSize.height),
-                          Point(left + labelSize.width, top + baseLine),
-                          Scalar(255, 255, 255), FILLED);
-                putText(frame, label, Point(left, top),
-                        FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0));
-            }
-        }
-
-        if (outputVideo.isOpened())
-            outputVideo << frame;
-
-        imshow("detections", frame);
-        if (waitKey(1) >= 0) break;
-    }
-
-    return 0;
-} // main
--- a/samples/dnn/ssd_object_detection.cpp
+++ b/samples/dnn/ssd_object_detection.cpp
@ -1,156 +0,0 @@
-#include <opencv2/dnn.hpp>
-#include <opencv2/dnn/shape_utils.hpp>
-#include <opencv2/imgproc.hpp>
-#include <opencv2/highgui.hpp>
-#include <iostream>
-
-using namespace cv;
-using namespace std;
-using namespace cv::dnn;
-
-const char* classNames[] = {"background",
-                            "aeroplane", "bicycle", "bird", "boat",
-                            "bottle", "bus", "car", "cat", "chair",
-                            "cow", "diningtable", "dog", "horse",
-                            "motorbike", "person", "pottedplant",
-                            "sheep", "sofa", "train", "tvmonitor"};
-
-const char* about = "This sample uses Single-Shot Detector "
-                    "(https://arxiv.org/abs/1512.02325) "
-                    "to detect objects on camera/video/image.\n"
-                    ".caffemodel model's file is available here: "
-                    "https://github.com/weiliu89/caffe/tree/ssd#models\n"
-                    "Default network is 300x300 and 20-classes VOC.\n";
-
-const char* params
-    = "{ help           | false | print usage         }"
-      "{ proto          |       | model configuration }"
-      "{ model          |       | model weights       }"
-      "{ camera_device  | 0     | camera device number}"
-      "{ video          |       | video or image for detection}"
-      "{ min_confidence | 0.5   | min confidence      }";
-
-int main(int argc, char** argv)
-{
-    cv::CommandLineParser parser(argc, argv, params);
-
-    if (parser.get<bool>("help"))
-    {
-        cout << about << endl;
-        parser.printMessage();
-        return 0;
-    }
-
-    String modelConfiguration = parser.get<string>("proto");
-    String modelBinary = parser.get<string>("model");
-
-    //! [Initialize network]
-    dnn::Net net = readNetFromCaffe(modelConfiguration, modelBinary);
-    //! [Initialize network]
-
-    if (net.empty())
-    {
-        cerr << "Can't load network by using the following files: " << endl;
-        cerr << "prototxt:   " << modelConfiguration << endl;
-        cerr << "caffemodel: " << modelBinary << endl;
-        cerr << "Models can be downloaded here:" << endl;
-        cerr << "https://github.com/weiliu89/caffe/tree/ssd#models" << endl;
-        exit(-1);
-    }
-
-    VideoCapture cap;
-    if (parser.get<String>("video").empty())
-    {
-        int cameraDevice = parser.get<int>("camera_device");
-        cap = VideoCapture(cameraDevice);
-        if(!cap.isOpened())
-        {
-            cout << "Couldn't find camera: " << cameraDevice << endl;
-            return -1;
-        }
-    }
-    else
-    {
-        cap.open(parser.get<String>("video"));
-        if(!cap.isOpened())
-        {
-            cout << "Couldn't open image or video: " << parser.get<String>("video") << endl;
-            return -1;
-        }
-    }
-
-    for (;;)
-    {
-        cv::Mat frame;
-        cap >> frame; // get a new frame from camera/video or read image
-
-        if (frame.empty())
-        {
-            waitKey();
-            break;
-        }
-
-        if (frame.channels() == 4)
-            cvtColor(frame, frame, COLOR_BGRA2BGR);
-
-        //! [Prepare blob]
-        Mat inputBlob = blobFromImage(frame, 1.0f, Size(300, 300), Scalar(104, 117, 123), false, false); //Convert Mat to batch of images
-        //! [Prepare blob]
-
-        //! [Set input blob]
-        net.setInput(inputBlob, "data"); //set the network input
-        //! [Set input blob]
-
-        //! [Make forward pass]
-        Mat detection = net.forward("detection_out"); //compute output
-        //! [Make forward pass]
-
-        vector<double> layersTimings;
-        double freq = getTickFrequency() / 1000;
-        double time = net.getPerfProfile(layersTimings) / freq;
-        ostringstream ss;
-        ss << "FPS: " << 1000/time << " ; time: " << time << " ms";
-        putText(frame, ss.str(), Point(20,20), 0, 0.5, Scalar(0,0,255));
-
-        Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
-
-        float confidenceThreshold = parser.get<float>("min_confidence");
-        for(int i = 0; i < detectionMat.rows; i++)
-        {
-            float confidence = detectionMat.at<float>(i, 2);
-
-            if(confidence > confidenceThreshold)
-            {
-                size_t objectClass = (size_t)(detectionMat.at<float>(i, 1));
-
-                int xLeftBottom = static_cast<int>(detectionMat.at<float>(i, 3) * frame.cols);
-                int yLeftBottom = static_cast<int>(detectionMat.at<float>(i, 4) * frame.rows);
-                int xRightTop = static_cast<int>(detectionMat.at<float>(i, 5) * frame.cols);
-                int yRightTop = static_cast<int>(detectionMat.at<float>(i, 6) * frame.rows);
-
-                ss.str("");
-                ss << confidence;
-                String conf(ss.str());
-
-                Rect object(xLeftBottom, yLeftBottom,
-                            xRightTop - xLeftBottom,
-                            yRightTop - yLeftBottom);
-
-                rectangle(frame, object, Scalar(0, 255, 0));
-                String label = String(classNames[objectClass]) + ": " + conf;
-                int baseLine = 0;
-                Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
-                rectangle(frame, Rect(Point(xLeftBottom, yLeftBottom - labelSize.height),
-                                      Size(labelSize.width, labelSize.height + baseLine)),
-                          Scalar(255, 255, 255), FILLED);
-                putText(frame, label, Point(xLeftBottom, yLeftBottom),
-                        FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0));
-            }
-        }
-
-        imshow("detections", frame);
-        if (waitKey(1) >= 0) break;
-    }
-
-    return 0;
-} // main
--- a/samples/dnn/tf_inception.cpp
+++ b/samples/dnn/tf_inception.cpp
@ -1,154 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html.
-
-// Copyright (C) 2016, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-
-/*
-Sample of using OpenCV dnn module with Tensorflow Inception model.
-*/
-
-#include <opencv2/dnn.hpp>
-#include <opencv2/imgproc.hpp>
-#include <opencv2/highgui.hpp>
-using namespace cv;
-using namespace cv::dnn;
-
-#include <fstream>
-#include <iostream>
-#include <cstdlib>
-using namespace std;
-
-const String keys =
-        "{help h    || Sample app for loading Inception TensorFlow model. "
-                       "The model and class names list can be downloaded here: "
-                       "https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip }"
-        "{model m   |tensorflow_inception_graph.pb| path to TensorFlow .pb model file }"
-        "{image i   || path to image file }"
-        "{i_blob    | input | input blob name) }"
-        "{o_blob    | softmax2 | output blob name) }"
-        "{c_names c | imagenet_comp_graph_label_strings.txt | path to file with classnames for class id }"
-        "{result r  || path to save output blob (optional, binary format, NCHW order) }"
-        ;
-
-void getMaxClass(const Mat &probBlob, int *classId, double *classProb);
-std::vector<String> readClassNames(const char *filename);
-
-int main(int argc, char **argv)
-{
-    cv::CommandLineParser parser(argc, argv, keys);
-
-    if (parser.has("help"))
-    {
-        parser.printMessage();
-        return 0;
-    }
-
-    String modelFile = parser.get<String>("model");
-    String imageFile = parser.get<String>("image");
-    String inBlobName = parser.get<String>("i_blob");
-    String outBlobName = parser.get<String>("o_blob");
-
-    if (!parser.check())
-    {
-        parser.printErrors();
-        return 0;
-    }
-
-    String classNamesFile = parser.get<String>("c_names");
-    String resultFile = parser.get<String>("result");
-
-    //! [Initialize network]
-    dnn::Net net = readNetFromTensorflow(modelFile);
-    //! [Initialize network]
-
-    if (net.empty())
-    {
-        std::cerr << "Can't load network by using the mode file: " << std::endl;
-        std::cerr << modelFile << std::endl;
-        exit(-1);
-    }
-
-    //! [Prepare blob]
-    Mat img = imread(imageFile);
-    if (img.empty())
-    {
-        std::cerr << "Can't read image from the file: " << imageFile << std::endl;
-        exit(-1);
-    }
-
-    Mat inputBlob = blobFromImage(img, 1.0f, Size(224, 224), Scalar(), true, false);   //Convert Mat to batch of images
-    //! [Prepare blob]
-    inputBlob -= 117.0;
-    //! [Set input blob]
-    net.setInput(inputBlob, inBlobName);        //set the network input
-    //! [Set input blob]
-
-    cv::TickMeter tm;
-    tm.start();
-
-    //! [Make forward pass]
-    Mat result = net.forward(outBlobName);                          //compute output
-    //! [Make forward pass]
-
-    tm.stop();
-
-    if (!resultFile.empty()) {
-        CV_Assert(result.isContinuous());
-
-        ofstream fout(resultFile.c_str(), ios::out | ios::binary);
-        fout.write((char*)result.data, result.total() * sizeof(float));
-        fout.close();
-    }
-
-    std::cout << "Output blob shape " << result.size[0] << " x " << result.size[1] << " x " << result.size[2] << " x " << result.size[3] << std::endl;
-    std::cout << "Inference time, ms: " << tm.getTimeMilli()  << std::endl;
-
-    if (!classNamesFile.empty()) {
-        std::vector<String> classNames = readClassNames(classNamesFile.c_str());
-
-        int classId;
-        double classProb;
-        getMaxClass(result, &classId, &classProb);//find the best class
-
-        //! [Print results]
-        std::cout << "Best class: #" << classId << " '" << classNames.at(classId) << "'" << std::endl;
-        std::cout << "Probability: " << classProb * 100 << "%" << std::endl;
-    }
-    return 0;
-} //main
-
-
-/* Find best class for the blob (i. e. class with maximal probability) */
-void getMaxClass(const Mat &probBlob, int *classId, double *classProb)
-{
-    Mat probMat = probBlob.reshape(1, 1); //reshape the blob to 1x1000 matrix
-    Point classNumber;
-
-    minMaxLoc(probMat, NULL, classProb, NULL, &classNumber);
-    *classId = classNumber.x;
-}
-
-std::vector<String> readClassNames(const char *filename)
-{
-    std::vector<String> classNames;
-
-    std::ifstream fp(filename);
-    if (!fp.is_open())
-    {
-        std::cerr << "File with classes labels not found: " << filename << std::endl;
-        exit(-1);
-    }
-
-    std::string name;
-    while (!fp.eof())
-    {
-        std::getline(fp, name);
-        if (name.length())
-            classNames.push_back( name );
-    }
-
-    fp.close();
-    return classNames;
-}
--- a/samples/dnn/torch_enet.cpp
+++ b/samples/dnn/torch_enet.cpp
@ -1,175 +0,0 @@
-/*
-Sample of using OpenCV dnn module with Torch ENet model.
-*/
-
-#include <opencv2/dnn.hpp>
-#include <opencv2/imgproc.hpp>
-#include <opencv2/highgui.hpp>
-using namespace cv;
-using namespace cv::dnn;
-
-#include <fstream>
-#include <iostream>
-#include <cstdlib>
-#include <sstream>
-using namespace std;
-
-const String keys =
-        "{help h    || Sample app for loading ENet Torch model. "
-                       "The model and class names list can be downloaded here: "
-                       "https://www.dropbox.com/sh/dywzk3gyb12hpe5/AAD5YkUa8XgMpHs2gCRgmCVCa }"
-        "{model m   || path to Torch .net model file (model_best.net) }"
-        "{image i   || path to image file }"
-        "{result r  || path to save output blob (optional, binary format, NCHW order) }"
-        "{show s    || whether to show all output channels or not}"
-        "{o_blob    || output blob's name. If empty, last blob's name in net is used}";
-
-static const int kNumClasses = 20;
-
-static const String classes[] = {
-    "Background", "Road", "Sidewalk", "Building", "Wall", "Fence", "Pole",
-    "TrafficLight", "TrafficSign", "Vegetation", "Terrain", "Sky", "Person",
-    "Rider", "Car", "Truck", "Bus", "Train", "Motorcycle", "Bicycle"
-};
-
-static const Vec3b colors[] = {
-    Vec3b(0, 0, 0), Vec3b(244, 126, 205), Vec3b(254, 83, 132), Vec3b(192, 200, 189),
-    Vec3b(50, 56, 251), Vec3b(65, 199, 228), Vec3b(240, 178, 193), Vec3b(201, 67, 188),
-    Vec3b(85, 32, 33), Vec3b(116, 25, 18), Vec3b(162, 33, 72), Vec3b(101, 150, 210),
-    Vec3b(237, 19, 16), Vec3b(149, 197, 72), Vec3b(80, 182, 21), Vec3b(141, 5, 207),
-    Vec3b(189, 156, 39), Vec3b(235, 170, 186), Vec3b(133, 109, 144), Vec3b(231, 160, 96)
-};
-
-static void showLegend();
-
-static void colorizeSegmentation(const Mat &score, Mat &segm);
-
-int main(int argc, char **argv)
-{
-    CommandLineParser parser(argc, argv, keys);
-
-    if (parser.has("help") || argc == 1)
-    {
-        parser.printMessage();
-        return 0;
-    }
-
-    String modelFile = parser.get<String>("model");
-    String imageFile = parser.get<String>("image");
-
-    if (!parser.check())
-    {
-        parser.printErrors();
-        return 0;
-    }
-
-    String resultFile = parser.get<String>("result");
-
-    //! [Read model and initialize network]
-    dnn::Net net = dnn::readNetFromTorch(modelFile);
-
-    //! [Prepare blob]
-    Mat img = imread(imageFile), input;
-    if (img.empty())
-    {
-        std::cerr << "Can't read image from the file: " << imageFile << std::endl;
-        exit(-1);
-    }
-
-    Mat inputBlob = blobFromImage(img, 1./255, Size(1024, 512), Scalar(), true, false);   //Convert Mat to batch of images
-    //! [Prepare blob]
-
-    //! [Set input blob]
-    net.setInput(inputBlob);        //set the network input
-    //! [Set input blob]
-
-    TickMeter tm;
-
-    String oBlob = net.getLayerNames().back();
-    if (!parser.get<String>("o_blob").empty())
-    {
-        oBlob = parser.get<String>("o_blob");
-    }
-
-    //! [Make forward pass]
-    tm.start();
-    Mat result = net.forward(oBlob);
-    tm.stop();
-
-    if (!resultFile.empty()) {
-        CV_Assert(result.isContinuous());
-
-        ofstream fout(resultFile.c_str(), ios::out | ios::binary);
-        fout.write((char*)result.data, result.total() * sizeof(float));
-        fout.close();
-    }
-
-    std::cout << "Output blob: " << result.size[0] << " x " << result.size[1] << " x " << result.size[2] << " x " << result.size[3] << "\n";
-    std::cout << "Inference time, ms: " << tm.getTimeMilli()  << std::endl;
-
-    if (parser.has("show"))
-    {
-        Mat segm, show;
-        colorizeSegmentation(result, segm);
-        showLegend();
-
-        cv::resize(segm, segm, img.size(), 0, 0, cv::INTER_NEAREST);
-        addWeighted(img, 0.1, segm, 0.9, 0.0, show);
-
-        imshow("Result", show);
-        waitKey();
-    }
-    return 0;
-} //main
-
-static void showLegend()
-{
-    static const int kBlockHeight = 30;
-
-    cv::Mat legend(kBlockHeight * kNumClasses, 200, CV_8UC3);
-    for(int i = 0; i < kNumClasses; i++)
-    {
-        cv::Mat block = legend.rowRange(i * kBlockHeight, (i + 1) * kBlockHeight);
-        block.setTo(colors[i]);
-        putText(block, classes[i], Point(0, kBlockHeight / 2), FONT_HERSHEY_SIMPLEX, 0.5, Vec3b(255, 255, 255));
-    }
-    imshow("Legend", legend);
-}
-
-static void colorizeSegmentation(const Mat &score, Mat &segm)
-{
-    const int rows = score.size[2];
-    const int cols = score.size[3];
-    const int chns = score.size[1];
-
-    Mat maxCl = Mat::zeros(rows, cols, CV_8UC1);
-    Mat maxVal(rows, cols, CV_32FC1, score.data);
-    for (int ch = 1; ch < chns; ch++)
-    {
-        for (int row = 0; row < rows; row++)
-        {
-            const float *ptrScore = score.ptr<float>(0, ch, row);
-            uint8_t *ptrMaxCl = maxCl.ptr<uint8_t>(row);
-            float *ptrMaxVal = maxVal.ptr<float>(row);
-            for (int col = 0; col < cols; col++)
-            {
-                if (ptrScore[col] > ptrMaxVal[col])
-                {
-                    ptrMaxVal[col] = ptrScore[col];
-                    ptrMaxCl[col] = (uchar)ch;
-                }
-            }
-        }
-    }
-
-    segm.create(rows, cols, CV_8UC3);
-    for (int row = 0; row < rows; row++)
-    {
-        const uchar *ptrMaxCl = maxCl.ptr<uchar>(row);
-        Vec3b *ptrSegm = segm.ptr<Vec3b>(row);
-        for (int col = 0; col < cols; col++)
-        {
-            ptrSegm[col] = colors[ptrMaxCl[col]];
-        }
-    }
-}
--- a/samples/dnn/yolo_object_detection.cpp
+++ b/samples/dnn/yolo_object_detection.cpp
@ -1,185 +0,0 @@
-// Brief Sample of using OpenCV dnn module in real time with device capture, video and image.
-// VIDEO DEMO: https://www.youtube.com/watch?v=NHtRlndE2cg
-
-#include <opencv2/dnn.hpp>
-#include <opencv2/dnn/shape_utils.hpp>
-#include <opencv2/imgproc.hpp>
-#include <opencv2/highgui.hpp>
-#include <fstream>
-#include <iostream>
-
-using namespace std;
-using namespace cv;
-using namespace cv::dnn;
-
-static const char* about =
-"This sample uses You only look once (YOLO)-Detector (https://arxiv.org/abs/1612.08242) to detect objects on camera/video/image.\n"
-"Models can be downloaded here: https://pjreddie.com/darknet/yolo/\n"
-"Default network is 416x416.\n"
-"Class names can be downloaded here: https://github.com/pjreddie/darknet/tree/master/data\n";
-
-static const char* params =
-"{ help           | false | print usage         }"
-"{ cfg            |       | model configuration }"
-"{ model          |       | model weights       }"
-"{ camera_device  | 0     | camera device number}"
-"{ source         |       | video or image for detection}"
-"{ out            |       | path to output video file}"
-"{ fps            | 3     | frame per second }"
-"{ style          | box   | box or line style draw }"
-"{ min_confidence | 0.24  | min confidence      }"
-"{ class_names    |       | File with class names, [PATH-TO-DARKNET]/data/coco.names }";
-
-int main(int argc, char** argv)
-{
-    CommandLineParser parser(argc, argv, params);
-
-    if (parser.get<bool>("help"))
-    {
-        cout << about << endl;
-        parser.printMessage();
-        return 0;
-    }
-
-    String modelConfiguration = parser.get<String>("cfg");
-    String modelBinary = parser.get<String>("model");
-
-    //! [Initialize network]
-    dnn::Net net = readNetFromDarknet(modelConfiguration, modelBinary);
-    //! [Initialize network]
-
-    if (net.empty())
-    {
-        cerr << "Can't load network by using the following files: " << endl;
-        cerr << "cfg-file:     " << modelConfiguration << endl;
-        cerr << "weights-file: " << modelBinary << endl;
-        cerr << "Models can be downloaded here:" << endl;
-        cerr << "https://pjreddie.com/darknet/yolo/" << endl;
-        exit(-1);
-    }
-
-    VideoCapture cap;
-    VideoWriter writer;
-    int codec = CV_FOURCC('M', 'J', 'P', 'G');
-    double fps = parser.get<float>("fps");
-    if (parser.get<String>("source").empty())
-    {
-        int cameraDevice = parser.get<int>("camera_device");
-        cap = VideoCapture(cameraDevice);
-        if(!cap.isOpened())
-        {
-            cout << "Couldn't find camera: " << cameraDevice << endl;
-            return -1;
-        }
-    }
-    else
-    {
-        cap.open(parser.get<String>("source"));
-        if(!cap.isOpened())
-        {
-            cout << "Couldn't open image or video: " << parser.get<String>("video") << endl;
-            return -1;
-        }
-    }
-
-    if(!parser.get<String>("out").empty())
-    {
-        writer.open(parser.get<String>("out"), codec, fps, Size((int)cap.get(CAP_PROP_FRAME_WIDTH),(int)cap.get(CAP_PROP_FRAME_HEIGHT)), 1);
-    }
-
-    vector<String> classNamesVec;
-    ifstream classNamesFile(parser.get<String>("class_names").c_str());
-    if (classNamesFile.is_open())
-    {
-        string className = "";
-        while (std::getline(classNamesFile, className))
-            classNamesVec.push_back(className);
-    }
-
-    String object_roi_style = parser.get<String>("style");
-
-    for(;;)
-    {
-        Mat frame;
-        cap >> frame; // get a new frame from camera/video or read image
-
-        if (frame.empty())
-        {
-            waitKey();
-            break;
-        }
-
-        if (frame.channels() == 4)
-            cvtColor(frame, frame, COLOR_BGRA2BGR);
-
-        //! [Prepare blob]
-        Mat inputBlob = blobFromImage(frame, 1 / 255.F, Size(416, 416), Scalar(), true, false); //Convert Mat to batch of images
-        //! [Prepare blob]
-
-        //! [Set input blob]
-        net.setInput(inputBlob, "data");                   //set the network input
-        //! [Set input blob]
-
-        //! [Make forward pass]
-        Mat detectionMat = net.forward("detection_out");   //compute output
-        //! [Make forward pass]
-
-        vector<double> layersTimings;
-        double tick_freq = getTickFrequency();
-        double time_ms = net.getPerfProfile(layersTimings) / tick_freq * 1000;
-        putText(frame, format("FPS: %.2f ; time: %.2f ms", 1000.f / time_ms, time_ms),
-                Point(20, 20), 0, 0.5, Scalar(0, 0, 255));
-
-        float confidenceThreshold = parser.get<float>("min_confidence");
-        for (int i = 0; i < detectionMat.rows; i++)
-        {
-            const int probability_index = 5;
-            const int probability_size = detectionMat.cols - probability_index;
-            float *prob_array_ptr = &detectionMat.at<float>(i, probability_index);
-
-            size_t objectClass = max_element(prob_array_ptr, prob_array_ptr + probability_size) - prob_array_ptr;
-            float confidence = detectionMat.at<float>(i, (int)objectClass + probability_index);
-
-            if (confidence > confidenceThreshold)
-            {
-                float x_center = detectionMat.at<float>(i, 0) * frame.cols;
-                float y_center = detectionMat.at<float>(i, 1) * frame.rows;
-                float width = detectionMat.at<float>(i, 2) * frame.cols;
-                float height = detectionMat.at<float>(i, 3) * frame.rows;
-                Point p1(cvRound(x_center - width / 2), cvRound(y_center - height / 2));
-                Point p2(cvRound(x_center + width / 2), cvRound(y_center + height / 2));
-                Rect object(p1, p2);
-
-                Scalar object_roi_color(0, 255, 0);
-
-                if (object_roi_style == "box")
-                {
-                    rectangle(frame, object, object_roi_color);
-                }
-                else
-                {
-                    Point p_center(cvRound(x_center), cvRound(y_center));
-                    line(frame, object.tl(), p_center, object_roi_color, 1);
-                }
-
-                String className = objectClass < classNamesVec.size() ? classNamesVec[objectClass] : cv::format("unknown(%d)", objectClass);
-                String label = format("%s: %.2f", className.c_str(), confidence);
-                int baseLine = 0;
-                Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
-                rectangle(frame, Rect(p1, Size(labelSize.width, labelSize.height + baseLine)),
-                          object_roi_color, FILLED);
-                putText(frame, label, p1 + Point(0, labelSize.height),
-                        FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0));
-            }
-        }
-        if(writer.isOpened())
-        {
-            writer.write(frame);
-        }
-
-        imshow("YOLO: Detections", frame);
-        if (waitKey(1) >= 0) break;
-    }
-
-    return 0;
-} // main