From ad5898224dedec79a10a3fd68548048ac2ce1549 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Thu, 20 Sep 2018 17:59:04 +0300 Subject: [PATCH 1/6] Add a file with preprocessing parameters for deep learning networks --- .../core/include/opencv2/core/persistence.hpp | 5 + modules/core/src/persistence_cpp.cpp | 14 +++ samples/dnn/classification.cpp | 27 ++-- samples/dnn/classification.py | 39 +++--- samples/dnn/common.hpp | 94 ++++++++++++++ samples/dnn/common.py | 108 ++++++++++++++++ samples/dnn/models.yml | 117 ++++++++++++++++++ samples/dnn/object_detection.cpp | 29 +++-- samples/dnn/object_detection.py | 34 ++--- samples/dnn/segmentation.cpp | 27 ++-- samples/dnn/segmentation.py | 43 ++++--- samples/dnn/tf_text_graph_common.py | 2 + 12 files changed, 435 insertions(+), 104 deletions(-) create mode 100644 samples/dnn/common.hpp create mode 100644 samples/dnn/common.py create mode 100644 samples/dnn/models.yml diff --git a/modules/core/include/opencv2/core/persistence.hpp b/modules/core/include/opencv2/core/persistence.hpp index a82235b7c3..126393fae1 100644 --- a/modules/core/include/opencv2/core/persistence.hpp +++ b/modules/core/include/opencv2/core/persistence.hpp @@ -542,6 +542,11 @@ public: */ CV_WRAP_AS(at) FileNode operator[](int i) const; + /** @brief Returns keys of a mapping node. + @returns Keys of a mapping node. + */ + CV_WRAP std::vector keys() const; + /** @brief Returns type of the node. @returns Type of the node. See FileNode::Type */ diff --git a/modules/core/src/persistence_cpp.cpp b/modules/core/src/persistence_cpp.cpp index b1cef6be95..334c787c98 100644 --- a/modules/core/src/persistence_cpp.cpp +++ b/modules/core/src/persistence_cpp.cpp @@ -269,6 +269,20 @@ FileNode FileNode::operator[](int i) const i == 0 ? *this : FileNode(); } +std::vector FileNode::keys() const +{ + std::vector res; + if (isMap()) + { + res.reserve(size()); + for (FileNodeIterator it = begin(); it != end(); ++it) + { + res.push_back((*it).name()); + } + } + return res; +} + String FileNode::name() const { const char* str; diff --git a/samples/dnn/classification.cpp b/samples/dnn/classification.cpp index 42bdc20dd2..0ae9e6ed94 100644 --- a/samples/dnn/classification.cpp +++ b/samples/dnn/classification.cpp @@ -5,21 +5,15 @@ #include #include -const char* keys = +#include "common.hpp" + +std::string keys = "{ help h | | Print help message. }" + "{ @alias | | An alias name of model to extract preprocessing parameters from models.yml file. }" + "{ zoo | models.yml | An optional path to file with preprocessing parameters }" "{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera.}" - "{ model m | | Path to a binary file of model contains trained weights. " - "It could be a file with extensions .caffemodel (Caffe), " - ".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet) }" - "{ config c | | Path to a text file of model contains network configuration. " - "It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet) }" "{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }" "{ classes | | Optional path to a text file with names of classes. }" - "{ mean | | Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces. }" - "{ scale | 1 | Preprocess input image by multiplying on a scale factor. }" - "{ width | | Preprocess input image by resizing to a specific width. }" - "{ height | | Preprocess input image by resizing to a specific height. }" - "{ rgb | | Indicate that model works with RGB input images instead BGR ones. }" "{ backend | 0 | Choose one of computation backends: " "0: automatically (by default), " "1: Halide language (http://halide-lang.org/), " @@ -39,6 +33,13 @@ std::vector classes; int main(int argc, char** argv) { CommandLineParser parser(argc, argv, keys); + + const std::string modelName = parser.get("@alias"); + const std::string zooFile = parser.get("zoo"); + + keys += genPreprocArguments(modelName, zooFile); + + parser = CommandLineParser(argc, argv, keys); parser.about("Use this script to run classification deep learning networks using OpenCV."); if (argc == 1 || parser.has("help")) { @@ -51,8 +52,8 @@ int main(int argc, char** argv) bool swapRB = parser.get("rgb"); int inpWidth = parser.get("width"); int inpHeight = parser.get("height"); - String model = parser.get("model"); - String config = parser.get("config"); + String model = findFile(parser.get("model")); + String config = findFile(parser.get("config")); String framework = parser.get("framework"); int backendId = parser.get("backend"); int targetId = parser.get("target"); diff --git a/samples/dnn/classification.py b/samples/dnn/classification.py index 9a610d1ab7..5a2373d363 100644 --- a/samples/dnn/classification.py +++ b/samples/dnn/classification.py @@ -1,35 +1,19 @@ import cv2 as cv import argparse import numpy as np -import sys + +from common import * backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV) targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD) -parser = argparse.ArgumentParser(description='Use this script to run classification deep learning networks using OpenCV.') +parser = argparse.ArgumentParser(add_help=False) +parser.add_argument('--zoo', default=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'models.yml'), + help='An optional path to file with preprocessing parameters.') parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.') -parser.add_argument('--model', required=True, - help='Path to a binary file of model contains trained weights. ' - 'It could be a file with extensions .caffemodel (Caffe), ' - '.pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet)') -parser.add_argument('--config', - help='Path to a text file of model contains network configuration. ' - 'It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet)') parser.add_argument('--framework', choices=['caffe', 'tensorflow', 'torch', 'darknet'], help='Optional name of an origin framework of the model. ' 'Detect it automatically if it does not set.') -parser.add_argument('--classes', help='Optional path to a text file with names of classes.') -parser.add_argument('--mean', nargs='+', type=float, default=[0, 0, 0], - help='Preprocess input image by subtracting mean values. ' - 'Mean values should be in BGR order.') -parser.add_argument('--scale', type=float, default=1.0, - help='Preprocess input image by multiplying on a scale factor.') -parser.add_argument('--width', type=int, required=True, - help='Preprocess input image by resizing to a specific width.') -parser.add_argument('--height', type=int, required=True, - help='Preprocess input image by resizing to a specific height.') -parser.add_argument('--rgb', action='store_true', - help='Indicate that model works with RGB input images instead BGR ones.') parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, help="Choose one of computation backends: " "%d: automatically (by default), " @@ -42,8 +26,17 @@ parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, '%d: OpenCL, ' '%d: OpenCL fp16 (half-float precision), ' '%d: VPU' % targets) +args, _ = parser.parse_known_args() +add_preproc_args(args.zoo, parser, 'classification') +parser = argparse.ArgumentParser(parents=[parser], + description='Use this script to run classification deep learning networks using OpenCV.', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) args = parser.parse_args() +args.model = findFile(args.model) +args.config = findFile(args.config) +args.classes = findFile(args.classes) + # Load names of classes classes = None if args.classes: @@ -66,7 +59,9 @@ while cv.waitKey(1) < 0: break # Create a 4D blob from a frame. - blob = cv.dnn.blobFromImage(frame, args.scale, (args.width, args.height), args.mean, args.rgb, crop=False) + inpWidth = args.width if args.width else frame.shape[1] + inpHeight = args.height if args.height else frame.shape[0] + blob = cv.dnn.blobFromImage(frame, args.scale, (inpWidth, inpHeight), args.mean, args.rgb, crop=False) # Run a model net.setInput(blob) diff --git a/samples/dnn/common.hpp b/samples/dnn/common.hpp new file mode 100644 index 0000000000..a0ca012645 --- /dev/null +++ b/samples/dnn/common.hpp @@ -0,0 +1,94 @@ +#include + +using namespace cv; + +std::string genArgument(const std::string& argName, const std::string& help, + const std::string& modelName, const std::string& zooFile, + char key = ' ', std::string defaultVal = ""); + +std::string genPreprocArguments(const std::string& modelName, const std::string& zooFile); + +std::string findFile(const std::string& filename); + +std::string genArgument(const std::string& argName, const std::string& help, + const std::string& modelName, const std::string& zooFile, + char key, std::string defaultVal) +{ + if (!modelName.empty()) + { + FileStorage fs(zooFile, FileStorage::READ); + if (fs.isOpened()) + { + FileNode node = fs[modelName]; + if (!node.empty()) + { + FileNode value = node[argName]; + if (!value.empty()) + { + if (value.isReal()) + defaultVal = format("%f", (float)value); + else if (value.isString()) + defaultVal = (std::string)value; + else if (value.isInt()) + defaultVal = format("%d", (int)value); + else if (value.isSeq()) + { + for (size_t i = 0; i < value.size(); ++i) + { + FileNode v = value[(int)i]; + if (v.isInt()) + defaultVal += format("%d ", (int)v); + else if (v.isReal()) + defaultVal += format("%f ", (float)v); + else + CV_Error(Error::StsNotImplemented, "Unexpected value format"); + } + } + else + CV_Error(Error::StsNotImplemented, "Unexpected field format"); + } + } + } + } + return "{ " + argName + " " + key + " | " + defaultVal + " | " + help + " }"; +} + +std::string findFile(const std::string& filename) +{ + if (filename.empty() || utils::fs::exists(filename)) + return filename; + + std::string extraPaths[] = {getenv("OPENCV_DNN_TEST_DATA_PATH"), + getenv("OPENCV_TEST_DATA_PATH")}; + for (int i = 0; i < 2; ++i) + { + std::string absPath = utils::fs::join(extraPaths[i], utils::fs::join("dnn", filename)); + if (utils::fs::exists(absPath)) + return absPath; + } + CV_Error(Error::StsObjectNotFound, "File " + filename + " not found! " + "Please specify a path to /opencv_extra/testdata in OPENCV_DNN_TEST_DATA_PATH " + "environment variable or pass a full path to model."); + return ""; +} + +std::string genPreprocArguments(const std::string& modelName, const std::string& zooFile) +{ + return genArgument("model", "Path to a binary file of model contains trained weights. " + "It could be a file with extensions .caffemodel (Caffe), " + ".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet), .bin (OpenVINO).", + modelName, zooFile, 'm') + + genArgument("config", "Path to a text file of model contains network configuration. " + "It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet), .xml (OpenVINO).", + modelName, zooFile, 'c') + + genArgument("mean", "Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces.", + modelName, zooFile) + + genArgument("scale", "Preprocess input image by multiplying on a scale factor.", + modelName, zooFile, ' ', "1.0") + + genArgument("width", "Preprocess input image by resizing to a specific width.", + modelName, zooFile, ' ', "-1") + + genArgument("height", "Preprocess input image by resizing to a specific height.", + modelName, zooFile, ' ', "-1") + + genArgument("rgb", "Indicate that model works with RGB input images instead BGR ones.", + modelName, zooFile); +} diff --git a/samples/dnn/common.py b/samples/dnn/common.py new file mode 100644 index 0000000000..feafdc9d02 --- /dev/null +++ b/samples/dnn/common.py @@ -0,0 +1,108 @@ +import sys +import os +import cv2 as cv + + +def add_argument(zoo, parser, name, help, required=False, default=None, type=None, action=None, nargs=None): + if len(sys.argv) <= 1: + return + + modelName = sys.argv[1] + + if os.path.isfile(zoo): + fs = cv.FileStorage(zoo, cv.FILE_STORAGE_READ) + node = fs.getNode(modelName) + if not node.empty(): + value = node.getNode(name) + if not value.empty(): + if value.isReal(): + default = value.real() + elif value.isString(): + default = value.string() + elif value.isInt(): + default = int(value.real()) + elif value.isSeq(): + default = [] + for i in range(value.size()): + v = value.at(i) + if v.isInt(): + default.append(int(v.real())) + elif v.isReal(): + default.append(v.real()) + else: + print('Unexpected value format') + exit(0) + else: + print('Unexpected field format') + exit(0) + required = False + + if action == 'store_true': + default = 1 if default == 'true' else (0 if default == 'false' else default) + assert(default is None or default == 0 or default == 1) + parser.add_argument('--' + name, required=required, help=help, default=bool(default), + action=action) + else: + parser.add_argument('--' + name, required=required, help=help, default=default, + action=action, nargs=nargs, type=type) + + +def add_preproc_args(zoo, parser, sample): + aliases = [] + if os.path.isfile(zoo): + fs = cv.FileStorage(zoo, cv.FILE_STORAGE_READ) + root = fs.root() + for name in root.keys(): + model = root.getNode(name) + if model.getNode('sample').string() == sample: + aliases.append(name) + + parser.add_argument('alias', nargs='?', choices=aliases, + help='An alias name of model to extract preprocessing parameters from models.yml file.') + add_argument(zoo, parser, 'model', required=True, + help='Path to a binary file of model contains trained weights. ' + 'It could be a file with extensions .caffemodel (Caffe), ' + '.pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet), .bin (OpenVINO)') + add_argument(zoo, parser, 'config', + help='Path to a text file of model contains network configuration. ' + 'It could be a file with extensions .prototxt (Caffe), .pbtxt or .config (TensorFlow), .cfg (Darknet), .xml (OpenVINO)') + add_argument(zoo, parser, 'mean', nargs='+', type=float, default=[0, 0, 0], + help='Preprocess input image by subtracting mean values. ' + 'Mean values should be in BGR order.') + add_argument(zoo, parser, 'scale', type=float, default=1.0, + help='Preprocess input image by multiplying on a scale factor.') + add_argument(zoo, parser, 'width', type=int, + help='Preprocess input image by resizing to a specific width.') + add_argument(zoo, parser, 'height', type=int, + help='Preprocess input image by resizing to a specific height.') + add_argument(zoo, parser, 'rgb', action='store_true', + help='Indicate that model works with RGB input images instead BGR ones.') + add_argument(zoo, parser, 'classes', + help='Optional path to a text file with names of classes to label detected objects.') + + +def findFile(filename): + if filename: + if os.path.exists(filename): + return filename + + samplesDataDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), + '..', + 'data', + 'dnn') + if os.path.exists(os.path.join(samplesDataDir, filename)): + return os.path.join(samplesDataDir, filename) + + for path in ['OPENCV_DNN_TEST_DATA_PATH', 'OPENCV_TEST_DATA_PATH']: + try: + extraPath = os.environ[path] + absPath = os.path.join(extraPath, 'dnn', filename) + if os.path.exists(absPath): + return absPath + except KeyError: + pass + + print('File ' + filename + ' not found! Please specify a path to ' + '/opencv_extra/testdata in OPENCV_DNN_TEST_DATA_PATH environment ' + 'variable or pass a full path to model.') + exit(0) diff --git a/samples/dnn/models.yml b/samples/dnn/models.yml new file mode 100644 index 0000000000..0e7198a660 --- /dev/null +++ b/samples/dnn/models.yml @@ -0,0 +1,117 @@ +%YAML:1.0 + +################################################################################ +# Object detection models. +################################################################################ + +# OpenCV's face detection network +opencv_fd: + model: "opencv_face_detector.caffemodel" + config: "opencv_face_detector.prototxt" + mean: [104, 177, 123] + scale: 1.0 + width: 300 + height: 300 + rgb: false + sample: "object_detection" + +# YOLO object detection family from Darknet (https://pjreddie.com/darknet/yolo/) +# Might be used for all YOLOv2, TinyYolov2 and YOLOv3 +yolo: + model: "yolov3.weights" + config: "yolov3.cfg" + mean: [0, 0, 0] + scale: 0.00392 + width: 416 + height: 416 + rgb: true + classes: "object_detection_classes_yolov3.txt" + sample: "object_detection" + +tiny-yolo-voc: + model: "tiny-yolo-voc.weights" + config: "tiny-yolo-voc.cfg" + mean: [0, 0, 0] + scale: 0.00392 + width: 416 + height: 416 + rgb: true + classes: "object_detection_classes_pascal_voc.txt" + sample: "object_detection" + +# Caffe implementation of SSD model from https://github.com/chuanqi305/MobileNet-SSD +ssd_caffe: + model: "MobileNetSSD_deploy.caffemodel" + config: "MobileNetSSD_deploy.prototxt" + mean: [127.5, 127.5, 127.5] + scale: 0.007843 + width: 300 + height: 300 + rgb: false + classes: "object_detection_classes_pascal_voc.txt" + sample: "object_detection" + +# TensorFlow implementation of SSD model from https://github.com/tensorflow/models/tree/master/research/object_detection +ssd_tf: + model: "ssd_mobilenet_v1_coco_2017_11_17.pb" + config: "ssd_mobilenet_v1_coco_2017_11_17.pbtxt" + mean: [0, 0, 0] + scale: 1.0 + width: 300 + height: 300 + rgb: true + classes: "object_detection_classes_coco.txt" + sample: "object_detection" + +# TensorFlow implementation of Faster-RCNN model from https://github.com/tensorflow/models/tree/master/research/object_detection +faster_rcnn_tf: + model: "faster_rcnn_inception_v2_coco_2018_01_28.pb" + config: "faster_rcnn_inception_v2_coco_2018_01_28.pbtxt" + mean: [0, 0, 0] + scale: 1.0 + width: 800 + height: 600 + rgb: true + sample: "object_detection" + +################################################################################ +# Image classification models. +################################################################################ + +# SqueezeNet v1.1 from https://github.com/DeepScale/SqueezeNet +squeezenet: + model: "squeezenet_v1.1.caffemodel" + config: "squeezenet_v1.1.prototxt" + mean: [0, 0, 0] + scale: 1.0 + width: 227 + height: 227 + rgb: false + classes: "classification_classes_ILSVRC2012.txt" + sample: "classification" + +################################################################################ +# Semantic segmentation models. +################################################################################ + +# ENet road scene segmentation network from https://github.com/e-lab/ENet-training +# Works fine for different input sizes. +enet: + model: "Enet-model-best.net" + mean: [0, 0, 0] + scale: 0.00392 + width: 512 + height: 256 + rgb: true + classes: "enet-classes.txt" + sample: "segmentation" + +fcn8s: + model: "fcn8s-heavy-pascal.caffemodel" + config: "fcn8s-heavy-pascal.prototxt" + mean: [0, 0, 0] + scale: 1.0 + width: 500 + height: 500 + rgb: false + sample: "segmentation" diff --git a/samples/dnn/object_detection.cpp b/samples/dnn/object_detection.cpp index 161f7434f8..756978a574 100644 --- a/samples/dnn/object_detection.cpp +++ b/samples/dnn/object_detection.cpp @@ -5,22 +5,16 @@ #include #include -const char* keys = +#include "common.hpp" + +std::string keys = "{ help h | | Print help message. }" + "{ @alias | | An alias name of model to extract preprocessing parameters from models.yml file. }" + "{ zoo | models.yml | An optional path to file with preprocessing parameters }" "{ device | 0 | camera device number. }" "{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera. }" - "{ model m | | Path to a binary file of model contains trained weights. " - "It could be a file with extensions .caffemodel (Caffe), " - ".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet).}" - "{ config c | | Path to a text file of model contains network configuration. " - "It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet).}" "{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }" "{ classes | | Optional path to a text file with names of classes to label detected objects. }" - "{ mean | | Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces. }" - "{ scale | 1 | Preprocess input image by multiplying on a scale factor. }" - "{ width | -1 | Preprocess input image by resizing to a specific width. }" - "{ height | -1 | Preprocess input image by resizing to a specific height. }" - "{ rgb | | Indicate that model works with RGB input images instead BGR ones. }" "{ thr | .5 | Confidence threshold. }" "{ nms | .4 | Non-maximum suppression threshold. }" "{ backend | 0 | Choose one of computation backends: " @@ -52,6 +46,13 @@ std::vector getOutputsNames(const Net& net); int main(int argc, char** argv) { CommandLineParser parser(argc, argv, keys); + + const std::string modelName = parser.get("@alias"); + const std::string zooFile = parser.get("zoo"); + + keys += genPreprocArguments(modelName, zooFile); + + parser = CommandLineParser(argc, argv, keys); parser.about("Use this script to run object detection deep learning networks using OpenCV."); if (argc == 1 || parser.has("help")) { @@ -66,6 +67,9 @@ int main(int argc, char** argv) bool swapRB = parser.get("rgb"); int inpWidth = parser.get("width"); int inpHeight = parser.get("height"); + CV_Assert(parser.has("model")); + std::string modelPath = findFile(parser.get("model")); + std::string configPath = findFile(parser.get("config")); // Open file with classes names. if (parser.has("classes")) @@ -82,8 +86,7 @@ int main(int argc, char** argv) } // Load a model. - CV_Assert(parser.has("model")); - Net net = readNet(parser.get("model"), parser.get("config"), parser.get("framework")); + Net net = readNet(modelPath, configPath, parser.get("framework")); net.setPreferableBackend(parser.get("backend")); net.setPreferableTarget(parser.get("target")); diff --git a/samples/dnn/object_detection.py b/samples/dnn/object_detection.py index 76c33f8e3b..77855ede2c 100644 --- a/samples/dnn/object_detection.py +++ b/samples/dnn/object_detection.py @@ -1,8 +1,8 @@ import cv2 as cv import argparse -import sys import numpy as np +from common import * from tf_text_graph_common import readTextMessage from tf_text_graph_ssd import createSSDGraph from tf_text_graph_faster_rcnn import createFasterRCNNGraph @@ -10,15 +10,10 @@ from tf_text_graph_faster_rcnn import createFasterRCNNGraph backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV) targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD) -parser = argparse.ArgumentParser(description='Use this script to run object detection deep learning networks using OpenCV.') +parser = argparse.ArgumentParser(add_help=False) +parser.add_argument('--zoo', default=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'models.yml'), + help='An optional path to file with preprocessing parameters.') parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.') -parser.add_argument('--model', required=True, - help='Path to a binary file of model contains trained weights. ' - 'It could be a file with extensions .caffemodel (Caffe), ' - '.pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet), .bin (OpenVINO)') -parser.add_argument('--config', - help='Path to a text file of model contains network configuration. ' - 'It could be a file with extensions .prototxt (Caffe), .pbtxt or .config (TensorFlow), .cfg (Darknet), .xml (OpenVINO)') parser.add_argument('--out_tf_graph', default='graph.pbtxt', help='For models from TensorFlow Object Detection API, you may ' 'pass a .config file which was used for training through --config ' @@ -26,18 +21,6 @@ parser.add_argument('--out_tf_graph', default='graph.pbtxt', parser.add_argument('--framework', choices=['caffe', 'tensorflow', 'torch', 'darknet', 'dldt'], help='Optional name of an origin framework of the model. ' 'Detect it automatically if it does not set.') -parser.add_argument('--classes', help='Optional path to a text file with names of classes to label detected objects.') -parser.add_argument('--mean', nargs='+', type=float, default=[0, 0, 0], - help='Preprocess input image by subtracting mean values. ' - 'Mean values should be in BGR order.') -parser.add_argument('--scale', type=float, default=1.0, - help='Preprocess input image by multiplying on a scale factor.') -parser.add_argument('--width', type=int, - help='Preprocess input image by resizing to a specific width.') -parser.add_argument('--height', type=int, - help='Preprocess input image by resizing to a specific height.') -parser.add_argument('--rgb', action='store_true', - help='Indicate that model works with RGB input images instead BGR ones.') parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold') parser.add_argument('--nms', type=float, default=0.4, help='Non-maximum suppression threshold') parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, @@ -52,8 +35,17 @@ parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, '%d: OpenCL, ' '%d: OpenCL fp16 (half-float precision), ' '%d: VPU' % targets) +args, _ = parser.parse_known_args() +add_preproc_args(args.zoo, parser, 'object_detection') +parser = argparse.ArgumentParser(parents=[parser], + description='Use this script to run object detection deep learning networks using OpenCV.', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) args = parser.parse_args() +args.model = findFile(args.model) +args.config = findFile(args.config) +args.classes = findFile(args.classes) + # If config specified, try to load it as TensorFlow Object Detection API's pipeline. config = readTextMessage(args.config) if 'model' in config: diff --git a/samples/dnn/segmentation.cpp b/samples/dnn/segmentation.cpp index 70e8d7b5b4..30b29dc449 100644 --- a/samples/dnn/segmentation.cpp +++ b/samples/dnn/segmentation.cpp @@ -5,24 +5,18 @@ #include #include -const char* keys = +#include "common.hpp" + +std::string keys = "{ help h | | Print help message. }" + "{ @alias | | An alias name of model to extract preprocessing parameters from models.yml file. }" + "{ zoo | models.yml | An optional path to file with preprocessing parameters }" "{ device | 0 | camera device number. }" "{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera. }" - "{ model m | | Path to a binary file of model contains trained weights. " - "It could be a file with extensions .caffemodel (Caffe), " - ".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet). }" - "{ config c | | Path to a text file of model contains network configuration. " - "It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet). }" "{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }" "{ classes | | Optional path to a text file with names of classes. }" "{ colors | | Optional path to a text file with colors for an every class. " "An every color is represented with three values from 0 to 255 in BGR channels order. }" - "{ mean | | Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces. }" - "{ scale | 1 | Preprocess input image by multiplying on a scale factor. }" - "{ width | | Preprocess input image by resizing to a specific width. }" - "{ height | | Preprocess input image by resizing to a specific height. }" - "{ rgb | | Indicate that model works with RGB input images instead BGR ones. }" "{ backend | 0 | Choose one of computation backends: " "0: automatically (by default), " "1: Halide language (http://halide-lang.org/), " @@ -47,6 +41,13 @@ void colorizeSegmentation(const Mat &score, Mat &segm); int main(int argc, char** argv) { CommandLineParser parser(argc, argv, keys); + + const std::string modelName = parser.get("@alias"); + const std::string zooFile = parser.get("zoo"); + + keys += genPreprocArguments(modelName, zooFile); + + parser = CommandLineParser(argc, argv, keys); parser.about("Use this script to run semantic segmentation deep learning networks using OpenCV."); if (argc == 1 || parser.has("help")) { @@ -59,8 +60,8 @@ int main(int argc, char** argv) bool swapRB = parser.get("rgb"); int inpWidth = parser.get("width"); int inpHeight = parser.get("height"); - String model = parser.get("model"); - String config = parser.get("config"); + String model = findFile(parser.get("model")); + String config = findFile(parser.get("config")); String framework = parser.get("framework"); int backendId = parser.get("backend"); int targetId = parser.get("target"); diff --git a/samples/dnn/segmentation.py b/samples/dnn/segmentation.py index b615b96028..a926ca27b3 100644 --- a/samples/dnn/segmentation.py +++ b/samples/dnn/segmentation.py @@ -3,35 +3,20 @@ import argparse import numpy as np import sys +from common import * + backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV) targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD) -parser = argparse.ArgumentParser(description='Use this script to run semantic segmentation deep learning networks using OpenCV.') +parser = argparse.ArgumentParser(add_help=False) +parser.add_argument('--zoo', default=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'models.yml'), + help='An optional path to file with preprocessing parameters.') parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.') -parser.add_argument('--model', required=True, - help='Path to a binary file of model contains trained weights. ' - 'It could be a file with extensions .caffemodel (Caffe), ' - '.pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet)') -parser.add_argument('--config', - help='Path to a text file of model contains network configuration. ' - 'It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet)') parser.add_argument('--framework', choices=['caffe', 'tensorflow', 'torch', 'darknet'], help='Optional name of an origin framework of the model. ' 'Detect it automatically if it does not set.') -parser.add_argument('--classes', help='Optional path to a text file with names of classes.') parser.add_argument('--colors', help='Optional path to a text file with colors for an every class. ' 'An every color is represented with three values from 0 to 255 in BGR channels order.') -parser.add_argument('--mean', nargs='+', type=float, default=[0, 0, 0], - help='Preprocess input image by subtracting mean values. ' - 'Mean values should be in BGR order.') -parser.add_argument('--scale', type=float, default=1.0, - help='Preprocess input image by multiplying on a scale factor.') -parser.add_argument('--width', type=int, required=True, - help='Preprocess input image by resizing to a specific width.') -parser.add_argument('--height', type=int, required=True, - help='Preprocess input image by resizing to a specific height.') -parser.add_argument('--rgb', action='store_true', - help='Indicate that model works with RGB input images instead BGR ones.') parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, help="Choose one of computation backends: " "%d: automatically (by default), " @@ -44,8 +29,17 @@ parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, '%d: OpenCL, ' '%d: OpenCL fp16 (half-float precision), ' '%d: VPU' % targets) +args, _ = parser.parse_known_args() +add_preproc_args(args.zoo, parser, 'segmentation') +parser = argparse.ArgumentParser(parents=[parser], + description='Use this script to run semantic segmentation deep learning networks using OpenCV.', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) args = parser.parse_args() +args.model = findFile(args.model) +args.config = findFile(args.config) +args.classes = findFile(args.classes) + np.random.seed(324) # Load names of classes @@ -93,8 +87,13 @@ while cv.waitKey(1) < 0: cv.waitKey() break + frameHeight = frame.shape[0] + frameWidth = frame.shape[1] + # Create a 4D blob from a frame. - blob = cv.dnn.blobFromImage(frame, args.scale, (args.width, args.height), args.mean, args.rgb, crop=False) + inpWidth = args.width if args.width else frameWidth + inpHeight = args.height if args.height else frameHeight + blob = cv.dnn.blobFromImage(frame, args.scale, (inpWidth, inpHeight), args.mean, args.rgb, crop=False) # Run a model net.setInput(blob) @@ -115,7 +114,7 @@ while cv.waitKey(1) < 0: segm = np.stack([colors[idx] for idx in classIds.flatten()]) segm = segm.reshape(height, width, 3) - segm = cv.resize(segm, (frame.shape[1], frame.shape[0]), interpolation=cv.INTER_NEAREST) + segm = cv.resize(segm, (frameWidth, frameHeight), interpolation=cv.INTER_NEAREST) frame = (0.1 * frame + 0.9 * segm).astype(np.uint8) # Put efficiency information. diff --git a/samples/dnn/tf_text_graph_common.py b/samples/dnn/tf_text_graph_common.py index 564c572d58..bf04c42174 100644 --- a/samples/dnn/tf_text_graph_common.py +++ b/samples/dnn/tf_text_graph_common.py @@ -72,6 +72,8 @@ def parseMessage(tokens, idx): def readTextMessage(filePath): + if not filePath: + return {} with open(filePath, 'rt') as f: content = f.read() From 7b9b876726eb706345be51d503569ac706cf0376 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 10 Oct 2018 19:16:32 +0300 Subject: [PATCH 2/6] cmake: improve PDB support --- cmake/OpenCVUtils.cmake | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake index e0c740caf9..3ae6162ba6 100644 --- a/cmake/OpenCVUtils.cmake +++ b/cmake/OpenCVUtils.cmake @@ -999,6 +999,15 @@ function(ocv_convert_to_lib_name var) set(${var} ${tmp} PARENT_SCOPE) endfunction() +if(MSVC AND BUILD_SHARED_LIBS) # no defaults for static libs (modern CMake is required) + if(NOT CMAKE_VERSION VERSION_LESS 3.6.0) + option(INSTALL_PDB_COMPONENT_EXCLUDE_FROM_ALL "Don't install PDB files by default" ON) + option(INSTALL_PDB "Add install PDB rules" ON) + elseif(NOT CMAKE_VERSION VERSION_LESS 3.1.0) + option(INSTALL_PDB_COMPONENT_EXCLUDE_FROM_ALL "Don't install PDB files by default (not supported)" OFF) + option(INSTALL_PDB "Add install PDB rules" OFF) + endif() +endif() # add install command function(ocv_install_target) @@ -1030,9 +1039,10 @@ function(ocv_install_target) endif() if(MSVC) - if(INSTALL_PDB AND (NOT INSTALL_IGNORE_PDB)) - set(__target "${ARGV0}") - + set(__target "${ARGV0}") + if(INSTALL_PDB AND NOT INSTALL_IGNORE_PDB + AND NOT OPENCV_${__target}_PDB_SKIP + ) set(__location_key "ARCHIVE") # static libs get_target_property(__target_type ${__target} TYPE) if("${__target_type}" STREQUAL "SHARED_LIBRARY") @@ -1064,16 +1074,28 @@ function(ocv_install_target) if(DEFINED INSTALL_PDB_COMPONENT AND INSTALL_PDB_COMPONENT) set(__pdb_install_component "${INSTALL_PDB_COMPONENT}") endif() + set(__pdb_exclude_from_all "") + if(INSTALL_PDB_COMPONENT_EXCLUDE_FROM_ALL) + if(NOT CMAKE_VERSION VERSION_LESS 3.6.0) + set(__pdb_exclude_from_all EXCLUDE_FROM_ALL) + else() + message(WARNING "INSTALL_PDB_COMPONENT_EXCLUDE_FROM_ALL requires CMake 3.6+") + endif() + endif() + # message(STATUS "Adding PDB file installation rule: target=${__target} dst=${__dst} component=${__pdb_install_component}") if("${__target_type}" STREQUAL "SHARED_LIBRARY") - install(FILES "$" DESTINATION "${__dst}" COMPONENT ${__pdb_install_component} OPTIONAL) + install(FILES "$" DESTINATION "${__dst}" + COMPONENT ${__pdb_install_component} OPTIONAL ${__pdb_exclude_from_all}) else() # There is no generator expression similar to TARGET_PDB_FILE and TARGET_PDB_FILE can't be used: https://gitlab.kitware.com/cmake/cmake/issues/16932 # However we still want .pdb files like: 'lib/Debug/opencv_core341d.pdb' or '3rdparty/lib/zlibd.pdb' install(FILES "$/$/$>,$,$>.pdb" - DESTINATION "${__dst}" CONFIGURATIONS Debug COMPONENT ${__pdb_install_component} OPTIONAL) + DESTINATION "${__dst}" CONFIGURATIONS Debug + COMPONENT ${__pdb_install_component} OPTIONAL ${__pdb_exclude_from_all}) install(FILES "$/$/$>,$,$>.pdb" - DESTINATION "${__dst}" CONFIGURATIONS Release COMPONENT ${__pdb_install_component} OPTIONAL) + DESTINATION "${__dst}" CONFIGURATIONS Release + COMPONENT ${__pdb_install_component} OPTIONAL ${__pdb_exclude_from_all}) endif() else() message(WARNING "PDB files installation is not supported (need CMake >= 3.1.0)") From 9dc1d388af78069c74637ba6180eb55b135ef778 Mon Sep 17 00:00:00 2001 From: Sayed Adel Date: Thu, 11 Oct 2018 23:03:57 +0000 Subject: [PATCH 3/6] imgproc: Enable VSX on pyrDown & pyrUp --- modules/imgproc/src/pyramids.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/imgproc/src/pyramids.cpp b/modules/imgproc/src/pyramids.cpp index 48b85c0720..0f2dc60302 100644 --- a/modules/imgproc/src/pyramids.cpp +++ b/modules/imgproc/src/pyramids.cpp @@ -152,7 +152,7 @@ struct PyrDownVec_32f } }; -#if CV_SSE4_1 || CV_NEON +#if CV_SSE4_1 || CV_NEON || CV_VSX struct PyrDownVec_32s16u { @@ -312,7 +312,7 @@ struct PyrUpVec_32s16s } }; -#if CV_SSE4_1 || CV_NEON +#if CV_SSE4_1 || CV_NEON || CV_VSX struct PyrUpVec_32s16u { From 24af70c7e029e07562fea8780975bdbfc73c5b2f Mon Sep 17 00:00:00 2001 From: take1014 Date: Fri, 12 Oct 2018 23:08:03 +0900 Subject: [PATCH 4/6] resolves 11283 --- modules/imgproc/src/resize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/imgproc/src/resize.cpp b/modules/imgproc/src/resize.cpp index 5436a78ab5..688f4c7ff0 100644 --- a/modules/imgproc/src/resize.cpp +++ b/modules/imgproc/src/resize.cpp @@ -3783,7 +3783,7 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize, CV_Assert( !ssize.empty() ); CV_Assert( !dsize.empty() || (inv_scale_x > 0 && inv_scale_y > 0) ); - if( dsize.area() == 0 ) + if( dsize.empty() ) { dsize = Size(saturate_cast(ssize.width*inv_scale_x), saturate_cast(ssize.height*inv_scale_y)); From 8c4f886f5fbd7dfa9492dc83d5922fd90618c960 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 12 Oct 2018 19:10:06 +0000 Subject: [PATCH 5/6] core: re-throw allocation exception if there is no fallback --- modules/core/src/matrix.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 5edf252c87..8433babc64 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -355,15 +355,16 @@ void Mat::create(int d, const int* _sizes, int _type) #endif if(!a) a = a0; - CV_TRY + try { u = a->allocate(dims, size, _type, 0, step.p, 0, USAGE_DEFAULT); CV_Assert(u != 0); } - CV_CATCH_ALL + catch (...) { - if(a != a0) - u = a0->allocate(dims, size, _type, 0, step.p, 0, USAGE_DEFAULT); + if (a == a0) + throw; + u = a0->allocate(dims, size, _type, 0, step.p, 0, USAGE_DEFAULT); CV_Assert(u != 0); } CV_Assert( step[dims-1] == (size_t)CV_ELEM_SIZE(flags) ); From 978ad4981ee1d2e902291e52bdd8daa81f6a1e20 Mon Sep 17 00:00:00 2001 From: drkoller Date: Thu, 11 Oct 2018 16:27:13 -0400 Subject: [PATCH 6/6] Clean up documentation for imread and imwrite --- .../imgcodecs/include/opencv2/imgcodecs.hpp | 43 +++++++++++-------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/modules/imgcodecs/include/opencv2/imgcodecs.hpp b/modules/imgcodecs/include/opencv2/imgcodecs.hpp index c5ffff97b2..a27d4be74e 100644 --- a/modules/imgcodecs/include/opencv2/imgcodecs.hpp +++ b/modules/imgcodecs/include/opencv2/imgcodecs.hpp @@ -140,19 +140,18 @@ returns an empty matrix ( Mat::data==NULL ). Currently, the following file formats are supported: - Windows bitmaps - \*.bmp, \*.dib (always supported) -- JPEG files - \*.jpeg, \*.jpg, \*.jpe (see the *Notes* section) -- JPEG 2000 files - \*.jp2 (see the *Notes* section) -- Portable Network Graphics - \*.png (see the *Notes* section) -- WebP - \*.webp (see the *Notes* section) +- JPEG files - \*.jpeg, \*.jpg, \*.jpe (see the *Note* section) +- JPEG 2000 files - \*.jp2 (see the *Note* section) +- Portable Network Graphics - \*.png (see the *Note* section) +- WebP - \*.webp (see the *Note* section) - Portable image format - \*.pbm, \*.pgm, \*.ppm \*.pxm, \*.pnm (always supported) - Sun rasters - \*.sr, \*.ras (always supported) -- TIFF files - \*.tiff, \*.tif (see the *Notes* section) -- OpenEXR Image files - \*.exr (see the *Notes* section) +- TIFF files - \*.tiff, \*.tif (see the *Note* section) +- OpenEXR Image files - \*.exr (see the *Note* section) - Radiance HDR - \*.hdr, \*.pic (always supported) -- Raster and Vector geospatial data supported by Gdal (see the *Notes* section) +- Raster and Vector geospatial data supported by GDAL (see the *Note* section) @note - - The function determines the type of an image by the content, not by the file extension. - In the case of color images, the decoded images will have the channels stored in **B G R** order. - When using IMREAD_GRAYSCALE, the codec's internal grayscale conversion will be used, if available. @@ -167,11 +166,12 @@ Currently, the following file formats are supported: files, for example, "libjpeg-dev", in Debian\* and Ubuntu\*) to get the codec support or turn on the OPENCV_BUILD_3RDPARTY_LIBS flag in CMake. - In the case you set *WITH_GDAL* flag to true in CMake and @ref IMREAD_LOAD_GDAL to load the image, - then [GDAL](http://www.gdal.org) driver will be used in order to decode the image by supporting + then the [GDAL](http://www.gdal.org) driver will be used in order to decode the image, supporting the following formats: [Raster](http://www.gdal.org/formats_list.html), [Vector](http://www.gdal.org/ogr_formats.html). - If EXIF information are embedded in the image file, the EXIF orientation will be taken into account and thus the image will be rotated accordingly except if the flag @ref IMREAD_IGNORE_ORIENTATION is passed. + @param filename Name of file to be loaded. @param flags Flag that can take values of cv::ImreadModes */ @@ -190,18 +190,23 @@ CV_EXPORTS_W bool imreadmulti(const String& filename, CV_OUT std::vector& m /** @brief Saves an image to a specified file. The function imwrite saves the image to the specified file. The image format is chosen based on the -filename extension (see cv::imread for the list of extensions). Only 8-bit (or 16-bit unsigned (CV_16U) -in case of PNG, JPEG 2000, and TIFF) single-channel or 3-channel (with 'BGR' channel order) images -can be saved using this function. If the format, depth or channel order is different, use -Mat::convertTo , and cv::cvtColor to convert it before saving. Or, use the universal FileStorage I/O -functions to save the image to XML or YAML format. - -It is possible to store PNG images with an alpha channel using this function. To do this, create +filename extension (see cv::imread for the list of extensions). In general, only 8-bit +single-channel or 3-channel (with 'BGR' channel order) images +can be saved using this function, with these exceptions: + +- 16-bit unsigned (CV_16U) images can be saved in the case of PNG, JPEG 2000, and TIFF formats +- 32-bit float (CV_32F) images can be saved in TIFF, OpenEXR, and Radiance HDR formats; 3-channel +(CV_32FC3) TIFF images will be saved using the LogLuv high dynamic range encoding (4 bytes per pixel) +- PNG images with an alpha channel can be saved using this function. To do this, create 8-bit (or 16-bit) 4-channel image BGRA, where the alpha channel goes last. Fully transparent pixels -should have alpha set to 0, fully opaque pixels should have alpha set to 255/65535. +should have alpha set to 0, fully opaque pixels should have alpha set to 255/65535 (see the code sample below). + +If the format, depth or channel order is different, use +Mat::convertTo and cv::cvtColor to convert it before saving. Or, use the universal FileStorage I/O +functions to save the image to XML or YAML format. -The sample below shows how to create such a BGRA image and store to PNG file. It also demonstrates how to set custom -compression parameters : +The sample below shows how to create a BGRA image and save it to a PNG file. It also demonstrates how to set custom +compression parameters: @include snippets/imgcodecs_imwrite.cpp @param filename Name of the file. @param img Image to be saved.