Merge pull request #16223 from l-bat:lip_jppnet

5 years ago · 5429b1f5ff
parent 02f8a9470a 24166ac402
commit 5429b1f5ff
9 changed files with 364 additions and 40 deletions
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@ -250,7 +250,8 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
        std::vector<size_t> pads_begin, pads_end;
        CV_DEPRECATED_EXTERNAL Size kernel, stride, pad;
        CV_DEPRECATED_EXTERNAL int pad_l, pad_t, pad_r, pad_b;
-        bool globalPooling;
+        bool globalPooling; //!< Flag is true if at least one of the axes is global pooled.
+        std::vector<bool> isGlobalPooling;
        bool computeMaxIdx;
        String padMode;
        bool ceilMode;
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@ -47,9 +47,9 @@
 #include "opencv2/core/async.hpp"

 #if !defined CV_DOXYGEN && !defined CV_STATIC_ANALYSIS && !defined CV_DNN_DONT_ADD_EXPERIMENTAL_NS
-#define CV__DNN_EXPERIMENTAL_NS_BEGIN namespace experimental_dnn_34_v15 {
+#define CV__DNN_EXPERIMENTAL_NS_BEGIN namespace experimental_dnn_34_v16 {
 #define CV__DNN_EXPERIMENTAL_NS_END }
-namespace cv { namespace dnn { namespace experimental_dnn_34_v15 { } using namespace experimental_dnn_34_v15; }}
+namespace cv { namespace dnn { namespace experimental_dnn_34_v16 { } using namespace experimental_dnn_34_v16; }}
 #else
 #define CV__DNN_EXPERIMENTAL_NS_BEGIN
 #define CV__DNN_EXPERIMENTAL_NS_END
--- a/modules/dnn/src/layers/layers_common.cpp
+++ b/modules/dnn/src/layers/layers_common.cpp
@ -144,26 +144,37 @@ void getStrideAndPadding(const LayerParams &params, std::vector<size_t>& pads_be
 }
 }

-void getPoolingKernelParams(const LayerParams &params, std::vector<size_t>& kernel, bool &globalPooling,
+void getPoolingKernelParams(const LayerParams &params, std::vector<size_t>& kernel, std::vector<bool>& globalPooling,
                            std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end,
                            std::vector<size_t>& strides, cv::String &padMode)
 {
-    globalPooling = params.has("global_pooling") &&
-                    params.get<bool>("global_pooling");
+    bool is_global = params.get<bool>("global_pooling", false);
+    globalPooling.resize(3);
+    globalPooling[0] = params.get<bool>("global_pooling_d", is_global);
+    globalPooling[1] = params.get<bool>("global_pooling_h", is_global);
+    globalPooling[2] = params.get<bool>("global_pooling_w", is_global);

-    if (globalPooling)
+    if (globalPooling[0] || globalPooling[1] || globalPooling[2])
    {
        util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode);
-        if(params.has("kernel_h") || params.has("kernel_w") || params.has("kernel_size"))
-        {
+        if ((globalPooling[0] && params.has("kernel_d")) ||
+            (globalPooling[1] && params.has("kernel_h")) ||
+            (globalPooling[2] && params.has("kernel_w")) ||
+            params.has("kernel_size")) {
            CV_Error(cv::Error::StsBadArg, "In global_pooling mode, kernel_size (or kernel_h and kernel_w) cannot be specified");
        }
-        for (int i = 0; i < pads_begin.size(); i++) {
-            if (pads_begin[i] != 0 || pads_end[i] != 0)
+
+        kernel.resize(3);
+        kernel[0] = params.get<int>("kernel_d", 1);
+        kernel[1] = params.get<int>("kernel_h", 1);
+        kernel[2] = params.get<int>("kernel_w", 1);
+
+        for (int i = 0, j = globalPooling.size() - pads_begin.size(); i < pads_begin.size(); i++, j++) {
+            if ((pads_begin[i] != 0 || pads_end[i] != 0) && globalPooling[j])
                CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pads must be = 0");
        }
-        for (int i = 0; i < strides.size(); i++) {
-            if (strides[i] != 1)
+        for (int i = 0, j = globalPooling.size() - strides.size(); i < strides.size(); i++, j++) {
+            if (strides[i] != 1 && globalPooling[j])
                CV_Error(cv::Error::StsBadArg, "In global_pooling mode, strides must be = 1");
        }
    }
--- a/modules/dnn/src/layers/layers_common.hpp
+++ b/modules/dnn/src/layers/layers_common.hpp
@ -63,7 +63,7 @@ void getConvolutionKernelParams(const LayerParams &params, std::vector<size_t>&
                                std::vector<size_t>& pads_end, std::vector<size_t>& strides, std::vector<size_t>& dilations,
                                cv::String &padMode, std::vector<size_t>& adjust_pads);

-void getPoolingKernelParams(const LayerParams &params, std::vector<size_t>& kernel, bool &globalPooling,
+void getPoolingKernelParams(const LayerParams &params, std::vector<size_t>& kernel, std::vector<bool>& globalPooling,
                            std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end, std::vector<size_t>& strides, cv::String &padMode);

 void getConvPoolOutParams(const std::vector<int>& inp, const std::vector<size_t>& kernel,
--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@ -79,6 +79,7 @@ public:
    {
        computeMaxIdx = true;
        globalPooling = false;
+        isGlobalPooling = std::vector<bool>(3, false);
        stride = Size(1, 1);
        pad_t = pad_l = pad_b = pad_r = 0;

@ -95,7 +96,8 @@ public:
            else
                CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");

-            getPoolingKernelParams(params, kernel_size, globalPooling, pads_begin, pads_end, strides, padMode);
+            getPoolingKernelParams(params, kernel_size, isGlobalPooling, pads_begin, pads_end, strides, padMode);
+            globalPooling = isGlobalPooling[0] || isGlobalPooling[1] || isGlobalPooling[2];
            if (kernel_size.size() == 2) {
                kernel = Size(kernel_size[1], kernel_size[0]);
                stride = Size(strides[1], strides[0]);
@ -147,9 +149,14 @@ public:
            out.push_back(outputs[0].size[i]);
        }
        if (globalPooling) {
-            kernel = Size(inp[1], inp[0]);
-            kernel_size = std::vector<size_t>(inp.begin(), inp.end());
-        }
+            std::vector<size_t> finalKernel;
+            for (int i = 0; i < inp.size(); i++) {
+                int idx = isGlobalPooling.size() - inp.size() + i;
+                finalKernel.push_back(isGlobalPooling[idx] ? inp[i] : kernel_size[idx]);
+             }
+             kernel_size = finalKernel;
+             kernel = Size(kernel_size[1], kernel_size[0]);
+         }

        getConvPoolPaddings(inp, kernel_size, strides, padMode, pads_begin, pads_end);
        if (pads_begin.size() == 2) {
@ -995,20 +1002,25 @@ virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inp
        std::vector<int> inpShape(inputs[0].begin() + 2, inputs[0].end());
        std::vector<int> outShape(inputs[0].begin(), inputs[0].begin() + 2);

-        if (globalPooling)
-        {
-            outShape.push_back(1);
-            outShape.push_back(1);
+        std::vector<size_t> local_kernel;
+        if (globalPooling) {
+            for (int i = 0; i < inpShape.size(); i++) {
+                int idx = isGlobalPooling.size() - inpShape.size() + i;
+                local_kernel.push_back(isGlobalPooling[idx] ? inpShape[i] : kernel_size[idx]);
+            }
+        } else {
+            local_kernel = kernel_size;
        }
-        else if (type == ROI || type == PSROI)
+
+        if (type == ROI || type == PSROI)
        {
            outShape.push_back(pooledSize.height);
            outShape.push_back(pooledSize.width);
        }
        else if (padMode.empty())
        {
-            for (int i = 0; i < kernel_size.size(); i++) {
-                float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - kernel_size[i]) / strides[i];
+            for (int i = 0; i < local_kernel.size(); i++) {
+                float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i];
                outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst)));
            }

@ -1023,7 +1035,7 @@ virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inp
        }
        else
        {
-            getConvPoolOutParams(inpShape, kernel_size, strides, padMode, std::vector<size_t>(kernel_size.size(), 1), outShape);
+            getConvPoolOutParams(inpShape, local_kernel, strides, padMode, std::vector<size_t>(local_kernel.size(), 1), outShape);
        }
        if (type == ROI)
        {
--- a/modules/dnn/src/layers/slice_layer.cpp
+++ b/modules/dnn/src/layers/slice_layer.cpp
@ -114,7 +114,8 @@ public:
    virtual bool supportBackend(int backendId) CV_OVERRIDE
    {
        return backendId == DNN_BACKEND_OPENCV ||
-               ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) &&
+               (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && sliceRanges.size() == 1) ||
+               (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 &&
 #ifdef HAVE_INF_ENGINE
                INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) &&
 #endif
--- a/modules/dnn/src/tensorflow/tf_importer.cpp
+++ b/modules/dnn/src/tensorflow/tf_importer.cpp
@ -1936,20 +1936,22 @@ void TFImporter::populateNet(Net dstNet)
        }
        else if (type == "Mean")
        {
+            // Computes the mean of elements across dimensions of a tensor.
+            // If keepdims is false (default) reduces input_tensor along the dimensions given in axis,
+            // else the reduced dimensions are retained with length 1.
+            // if indices = [1, 2] in NHWC layout we use global pooling: NxCxHxW --Pooling--> NxCx1x1
+            // if keepdims is false we use Flatten after Pooling: out_shape = NxC
+            // if indices = [0] we use a global pooling by indices.
+            // To return correct shape, we use Reshape after Pooling. To determine input shape use Slice for input,
+            // if keepdims is false we use Flatten after Slice.
+            // Example: input_shape = NxCxHxW
+            // determine out shape: NxCxHxW --Slice--> 1xCxHxW
+            //                      out_shape = 1xCxHxW if keepDims else (1xCxHxW --Flatten--> CxHxW)
+            // global pool: NxCxHxW --Flatten--> Nx(C*H*W) --Reshape--> 1x1xNx(C*H*W) --Pooling--> 1x1x1x(C*H*W) --Reshape--> out_shape
+
            Mat indices = getTensorContent(getConstBlob(layer, value_id, 1));
            CV_Assert(indices.type() == CV_32SC1);

-            if (indices.total() != 2 || indices.at<int>(0) != 1 || indices.at<int>(1) != 2)
-                CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation.");
-
-            layerParams.set("pool", "ave");
-            layerParams.set("global_pooling", true);
-
-            int id = dstNet.addLayer(name, "Pooling", layerParams);
-            layer_id[name] = id;
-
-            connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
-
            // There are two attributes, "keepdims" and a deprecated "keep_dims".
            bool keepDims = false;
            if (hasLayerAttr(layer, "keepdims"))
@ -1957,16 +1959,128 @@ void TFImporter::populateNet(Net dstNet)
            else if (hasLayerAttr(layer, "keep_dims"))
                keepDims = getLayerAttr(layer, "keep_dims").b();

-            if (!keepDims)
+            if (indices.total() == 1 && indices.at<int>(0) == 0)
            {
                LayerParams flattenLp;
                std::string flattenName = name + "/flatten";
                CV_Assert(layer_id.find(flattenName) == layer_id.end());
                int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp);
                layer_id[flattenName] = flattenId;
-                connect(layer_id, dstNet, Pin(name), flattenId, 0);
+                connect(layer_id, dstNet, parsePin(layer.input(0)), flattenId, 0);
+
+                LayerParams reshapeLp;
+                std::string reshapeName = name + "/reshape";
+                CV_Assert(layer_id.find(reshapeName) == layer_id.end());
+                reshapeLp.set("axis", 0);
+                reshapeLp.set("num_axes", 1);
+                int newShape[] = {1, 1, -1};
+                reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 3));
+
+                int reshapeId = dstNet.addLayer(reshapeName, "Reshape", reshapeLp);
+                layer_id[reshapeName] = reshapeId;
+                connect(layer_id, dstNet, Pin(flattenName), reshapeId, 0);
+
+                LayerParams avgLp;
+                std::string avgName = name + "/avg";
+                CV_Assert(layer_id.find(avgName) == layer_id.end());
+                avgLp.set("pool", "ave");
+                // pooling kernel H x 1
+                avgLp.set("global_pooling_h", true);
+                avgLp.set("kernel_w", 1);
+                int avgId = dstNet.addLayer(avgName, "Pooling", avgLp);
+                layer_id[avgName] = avgId;
+                connect(layer_id, dstNet, Pin(reshapeName), avgId, 0);
+
+                LayerParams sliceLp;
+                std::string layerShapeName = name + "/slice";
+                CV_Assert(layer_id.find(layerShapeName) == layer_id.end());
+                sliceLp.set("axis", 0);
+                int begin[] = {0};
+                int size[] = {1};
+                sliceLp.set("begin", DictValue::arrayInt(&begin[0], 1));
+                sliceLp.set("size", DictValue::arrayInt(&size[0], 1));
+                int sliceId = dstNet.addLayer(layerShapeName, "Slice", sliceLp);
+                layer_id[layerShapeName] = sliceId;
+                connect(layer_id, dstNet, Pin(layer.input(0)), sliceId, 0);
+
+                if (!keepDims)
+                {
+                    LayerParams squeezeLp;
+                    std::string squeezeName = name + "/squeeze";
+                    CV_Assert(layer_id.find(squeezeName) == layer_id.end());
+                    squeezeLp.set("axis", 0);
+                    squeezeLp.set("end_axis", 1);
+                    int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp);
+                    layer_id[squeezeName] = squeezeId;
+                    connect(layer_id, dstNet, Pin(layerShapeName), squeezeId, 0);
+                    layerShapeName = squeezeName;
+                }
+
+                int id = dstNet.addLayer(name, "Reshape", layerParams);
+                layer_id[name] = id;
+                connect(layer_id, dstNet, Pin(avgName), id, 0);
+                connect(layer_id, dstNet, Pin(layerShapeName), id, 1);
+            } else {
+                if (indices.total() != 2 || indices.at<int>(0) != 1 || indices.at<int>(1) != 2)
+                    CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation.");
+
+                layerParams.set("pool", "ave");
+                layerParams.set("global_pooling", true);
+                int id = dstNet.addLayer(name, "Pooling", layerParams);
+                layer_id[name] = id;
+                connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
+
+                if (!keepDims)
+                {
+                    LayerParams flattenLp;
+                    std::string flattenName = name + "/flatten";
+                    CV_Assert(layer_id.find(flattenName) == layer_id.end());
+                    int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp);
+                    layer_id[flattenName] = flattenId;
+                    connect(layer_id, dstNet, Pin(name), flattenId, 0);
+                }
            }
        }
+        else if (type == "Pack")
+        {
+            // op: tf.stack(list of tensors, axis=0)
+            // Join a list of inputs along a new axis.
+            // The "axis" specifies the index of the new axis in the dimensions of the output.
+            // Example: given a list with "N" tensors of shape (C, H, W):
+            // if axis == 0 then the output tensor will have the shape (N, C, H, W),
+            // if axis == 1 then the output tensor will have the shape (C, N, H, W).
+            CV_Assert(hasLayerAttr(layer, "axis"));
+            int dim = (int)getLayerAttr(layer, "axis").i();
+            if (dim != 0)
+                CV_Error(Error::StsNotImplemented, "Unsupported mode of pack operation.");
+
+            CV_Assert(hasLayerAttr(layer, "N"));
+            int num = (int)getLayerAttr(layer, "N").i();
+            CV_Assert(layer.input_size() == num);
+            std::string base_name = name + "/reshape_";
+            std::vector<int> reshape_ids;
+            for (int i = 0; i < num; i++) {
+                std::ostringstream ss;
+                ss << i;
+                std::string reshape_name = base_name + ss.str();
+                LayerParams reshapeLP;
+                reshapeLP.set("axis", dim);
+                reshapeLP.set("num_axes", 1);
+                int outShape[] = {1, -1};
+                reshapeLP.set("dim", DictValue::arrayInt(&outShape[0], 2));
+                int id = dstNet.addLayer(reshape_name, "Reshape", reshapeLP);
+                layer_id[reshape_name] = id;
+                reshape_ids.push_back(id);
+                connect(layer_id, dstNet, parsePin(layer.input(i)), id, 0);
+            }
+
+            layerParams.set("axis", dim);
+            int id = dstNet.addLayer(name, "Concat", layerParams);
+            layer_id[name] = id;
+
+            for (int li = 0; li < num; li++)
+                dstNet.connect(reshape_ids[li], 0, id, li);
+        }
        else if (type == "ClipByValue")
        {
            // op: "ClipByValue"
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@ -121,6 +121,13 @@ public:
    }
 };

+TEST_P(Test_TensorFlow_layers, reduce_mean)
+{
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    runTensorFlowNet("global_pool_by_axis");
+}
+
 TEST_P(Test_TensorFlow_layers, conv)
 {
    runTensorFlowNet("single_conv");
--- a/samples/dnn/human_parsing.py
+++ b/samples/dnn/human_parsing.py
@ -0,0 +1,178 @@
+#!/usr/bin/env python
+'''
+You can download the converted pb model from https://www.dropbox.com/s/qag9vzambhhkvxr/lip_jppnet_384.pb?dl=0
+or convert the model yourself.
+
+Follow these steps if you want to convert the original model yourself:
+    To get original .meta pre-trained model download https://drive.google.com/file/d/1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg/view
+    For correct convert .meta to .pb model download original repository https://github.com/Engineering-Course/LIP_JPPNet
+    Change script evaluate_parsing_JPPNet-s2.py for human parsing
+    1. Remove preprocessing to create image_batch_origin:
+        with tf.name_scope("create_inputs"):
+        ...
+    Add
+        image_batch_origin = tf.placeholder(tf.float32, shape=(2, None, None, 3), name='input')
+
+    2. Create input
+        image = cv2.imread(path/to/image)
+        image_rev = np.flip(image, axis=1)
+        input = np.stack([image, image_rev], axis=0)
+
+    3. Hardcode image_h and image_w shapes to determine output shapes.
+       We use default INPUT_SIZE = (384, 384) from evaluate_parsing_JPPNet-s2.py.
+        parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, INPUT_SIZE),
+                                                tf.image.resize_images(parsing_out1_075, INPUT_SIZE),
+                                                tf.image.resize_images(parsing_out1_125, INPUT_SIZE)]), axis=0)
+       Do similarly with parsing_out2, parsing_out3
+    4. Remove postprocessing. Last net operation:
+        raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2, parsing_out3]), axis=0)
+       Change:
+        parsing_ = sess.run(raw_output, feed_dict={'input:0': input})
+
+    5. To save model after sess.run(...) add:
+        input_graph_def = tf.get_default_graph().as_graph_def()
+        output_node = "Mean_3"
+        output_graph_def = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_node)
+
+        output_graph = "LIP_JPPNet.pb"
+        with tf.gfile.GFile(output_graph, "wb") as f:
+            f.write(output_graph_def.SerializeToString())'
+'''
+
+import argparse
+import numpy as np
+import cv2 as cv
+
+
+backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV)
+targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD)
+
+
+def preprocess(image_path):
+    """
+    Create 4-dimensional blob from image and flip image
+    :param image_path: path to input image
+    """
+    image = cv.imread(image_path)
+    image_rev = np.flip(image, axis=1)
+    input = cv.dnn.blobFromImages([image, image_rev], mean=(104.00698793, 116.66876762, 122.67891434))
+    return input
+
+
+def run_net(input, model_path, backend, target):
+    """
+    Read network and infer model
+    :param model_path: path to JPPNet model
+    :param backend: computation backend
+    :param target: computation device
+    """
+    net = cv.dnn.readNet(model_path)
+    net.setPreferableBackend(backend)
+    net.setPreferableTarget(target)
+    net.setInput(input)
+    out = net.forward()
+    return out
+
+
+def postprocess(out, input_shape):
+    """
+    Create a grayscale human segmentation
+    :param out: network output
+    :param input_shape: input image width and height
+    """
+    # LIP classes
+    # 0 Background
+    # 1 Hat
+    # 2 Hair
+    # 3 Glove
+    # 4 Sunglasses
+    # 5 UpperClothes
+    # 6 Dress
+    # 7 Coat
+    # 8 Socks
+    # 9 Pants
+    # 10 Jumpsuits
+    # 11 Scarf
+    # 12 Skirt
+    # 13 Face
+    # 14 LeftArm
+    # 15 RightArm
+    # 16 LeftLeg
+    # 17 RightLeg
+    # 18 LeftShoe
+    # 19 RightShoe
+    head_output, tail_output = np.split(out, indices_or_sections=[1], axis=0)
+    head_output = head_output.squeeze(0)
+    tail_output = tail_output.squeeze(0)
+
+    head_output = np.stack([cv.resize(img, dsize=input_shape) for img in head_output[:, ...]])
+    tail_output = np.stack([cv.resize(img, dsize=input_shape) for img in tail_output[:, ...]])
+
+    tail_list = np.split(tail_output, indices_or_sections=list(range(1, 20)), axis=0)
+    tail_list = [arr.squeeze(0) for arr in tail_list]
+    tail_list_rev = [tail_list[i] for i in range(14)]
+    tail_list_rev.extend([tail_list[15], tail_list[14], tail_list[17], tail_list[16], tail_list[19], tail_list[18]])
+    tail_output_rev = np.stack(tail_list_rev, axis=0)
+    tail_output_rev = np.flip(tail_output_rev, axis=2)
+    raw_output_all = np.mean(np.stack([head_output, tail_output_rev], axis=0), axis=0, keepdims=True)
+    raw_output_all = np.argmax(raw_output_all, axis=1)
+    raw_output_all = raw_output_all.transpose(1, 2, 0)
+    return raw_output_all
+
+
+def decode_labels(gray_image):
+    """
+    Colorize image according to labels
+    :param gray_image: grayscale human segmentation result
+    """
+    height, width, _ = gray_image.shape
+    colors = [(0, 0, 0), (128, 0, 0), (255, 0, 0), (0, 85, 0), (170, 0, 51), (255, 85, 0),
+              (0, 0, 85), (0, 119, 221), (85, 85, 0), (0, 85, 85), (85, 51, 0), (52, 86, 128),
+              (0, 128, 0), (0, 0, 255), (51, 170, 221), (0, 255, 255),(85, 255, 170),
+              (170, 255, 85), (255, 255, 0), (255, 170, 0)]
+
+    segm = np.stack([colors[idx] for idx in gray_image.flatten()])
+    segm = segm.reshape(height, width, 3).astype(np.uint8)
+    segm = cv.cvtColor(segm, cv.COLOR_BGR2RGB)
+    return segm
+
+
+def parse_human(image_path, model_path, backend=cv.dnn.DNN_BACKEND_OPENCV, target=cv.dnn.DNN_TARGET_CPU):
+    """
+    Prepare input for execution, run net and postprocess output to parse human.
+    :param image_path: path to input image
+    :param model_path: path to JPPNet model
+    :param backend: name of computation backend
+    :param target: name of computation target
+    """
+    input = preprocess(image_path)
+    input_h, input_w = input.shape[2:]
+    output = run_net(input, model_path, backend, target)
+    grayscale_out = postprocess(output, (input_w, input_h))
+    segmentation = decode_labels(grayscale_out)
+    return segmentation
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Use this script to run human parsing using JPPNet',
+                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--input', '-i', required=True, help='Path to input image.')
+    parser.add_argument('--model', '-m', required=True, help='Path to pb model.')
+    parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
+                        help="Choose one of computation backends: "
+                             "%d: automatically (by default), "
+                             "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
+                             "%d: OpenCV implementation" % backends)
+    parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int,
+                        help='Choose one of target computation devices: '
+                             '%d: CPU target (by default), '
+                             '%d: OpenCL, '
+                             '%d: OpenCL fp16 (half-float precision), '
+                             '%d: VPU' % targets)
+    args, _ = parser.parse_known_args()
+
+    output = parse_human(args.input, args.model, args.backend, args.target)
+    winName = 'Deep learning human parsing in OpenCV'
+    cv.namedWindow(winName, cv.WINDOW_AUTOSIZE)
+    cv.imshow(winName, output)
+    cv.waitKey()