parent
667f5b655a
commit
e8d94ea87c
14 changed files with 555 additions and 995 deletions
@ -0,0 +1,20 @@ |
|||||||
|
# OpenCV deep learning module samples |
||||||
|
|
||||||
|
## Model Zoo |
||||||
|
|
||||||
|
### Object detection |
||||||
|
|
||||||
|
| Model | Scale | Size WxH| Mean subtraction | Channels order | |
||||||
|
|---------------|-------|-----------|--------------------|-------| |
||||||
|
| [MobileNet-SSD, Caffe](https://github.com/chuanqi305/MobileNet-SSD/) | `0.00784 (2/255)` | `300x300` | `127.5 127.5 127.5` | BGR | |
||||||
|
| [OpenCV face detector](https://github.com/opencv/opencv/tree/master/samples/dnn/face_detector) | `1.0` | `300x300` | `104 177 123` | BGR | |
||||||
|
| [SSDs from TensorFlow](https://github.com/tensorflow/models/tree/master/research/object_detection/) | `0.00784 (2/255)` | `300x300` | `127.5 127.5 127.5` | RGB | |
||||||
|
| [YOLO](https://pjreddie.com/darknet/yolo/) | `0.00392 (1/255)` | `416x416` | `0 0 0` | RGB | |
||||||
|
| [VGG16-SSD](https://github.com/weiliu89/caffe/tree/ssd) | `1.0` | `300x300` | `104 117 123` | BGR | |
||||||
|
| [Faster-RCNN](https://github.com/rbgirshick/py-faster-rcnn) | `1.0` | `800x600` | `102.9801, 115.9465, 122.7717` | BGR | |
||||||
|
| [R-FCN](https://github.com/YuwenXiong/py-R-FCN) | `1.0` | `800x600` | `102.9801 115.9465 122.7717` | BGR | |
||||||
|
|
||||||
|
## References |
||||||
|
* [Models downloading script](https://github.com/opencv/opencv_extra/blob/master/testdata/dnn/download_models.py) |
||||||
|
* [Configuration files adopted for OpenCV](https://github.com/opencv/opencv_extra/tree/master/testdata/dnn) |
||||||
|
* [How to import models from TensorFlow Object Detection API](https://github.com/opencv/opencv/wiki/TensorFlow-Object-Detection-API) |
@ -1,93 +0,0 @@ |
|||||||
#include <opencv2/dnn.hpp> |
|
||||||
#include <opencv2/dnn/all_layers.hpp> |
|
||||||
#include <opencv2/imgproc.hpp> |
|
||||||
#include <opencv2/highgui.hpp> |
|
||||||
|
|
||||||
using namespace cv; |
|
||||||
using namespace dnn; |
|
||||||
|
|
||||||
const char* keys = |
|
||||||
"{ help h | | print help message }" |
|
||||||
"{ proto p | | path to .prototxt }" |
|
||||||
"{ model m | | path to .caffemodel }" |
|
||||||
"{ image i | | path to input image }" |
|
||||||
"{ conf c | 0.8 | minimal confidence }"; |
|
||||||
|
|
||||||
const char* classNames[] = { |
|
||||||
"__background__", |
|
||||||
"aeroplane", "bicycle", "bird", "boat", |
|
||||||
"bottle", "bus", "car", "cat", "chair", |
|
||||||
"cow", "diningtable", "dog", "horse", |
|
||||||
"motorbike", "person", "pottedplant", |
|
||||||
"sheep", "sofa", "train", "tvmonitor" |
|
||||||
}; |
|
||||||
|
|
||||||
static const int kInpWidth = 800; |
|
||||||
static const int kInpHeight = 600; |
|
||||||
|
|
||||||
int main(int argc, char** argv) |
|
||||||
{ |
|
||||||
// Parse command line arguments.
|
|
||||||
CommandLineParser parser(argc, argv, keys); |
|
||||||
parser.about("This sample is used to run Faster-RCNN and R-FCN object detection " |
|
||||||
"models with OpenCV. You can get required models from " |
|
||||||
"https://github.com/rbgirshick/py-faster-rcnn (Faster-RCNN) and from " |
|
||||||
"https://github.com/YuwenXiong/py-R-FCN (R-FCN). Corresponding .prototxt " |
|
||||||
"files may be found at https://github.com/opencv/opencv_extra/tree/master/testdata/dnn."); |
|
||||||
if (argc == 1 || parser.has("help")) |
|
||||||
{ |
|
||||||
parser.printMessage(); |
|
||||||
return 0; |
|
||||||
} |
|
||||||
|
|
||||||
String protoPath = parser.get<String>("proto"); |
|
||||||
String modelPath = parser.get<String>("model"); |
|
||||||
String imagePath = parser.get<String>("image"); |
|
||||||
float confThreshold = parser.get<float>("conf"); |
|
||||||
CV_Assert(!protoPath.empty(), !modelPath.empty(), !imagePath.empty()); |
|
||||||
|
|
||||||
// Load a model.
|
|
||||||
Net net = readNetFromCaffe(protoPath, modelPath); |
|
||||||
|
|
||||||
Mat img = imread(imagePath); |
|
||||||
resize(img, img, Size(kInpWidth, kInpHeight)); |
|
||||||
Mat blob = blobFromImage(img, 1.0, Size(), Scalar(102.9801, 115.9465, 122.7717), false, false); |
|
||||||
Mat imInfo = (Mat_<float>(1, 3) << img.rows, img.cols, 1.6f); |
|
||||||
|
|
||||||
net.setInput(blob, "data"); |
|
||||||
net.setInput(imInfo, "im_info"); |
|
||||||
|
|
||||||
// Draw detections.
|
|
||||||
Mat detections = net.forward(); |
|
||||||
const float* data = (float*)detections.data; |
|
||||||
for (size_t i = 0; i < detections.total(); i += 7) |
|
||||||
{ |
|
||||||
// An every detection is a vector [id, classId, confidence, left, top, right, bottom]
|
|
||||||
float confidence = data[i + 2]; |
|
||||||
if (confidence > confThreshold) |
|
||||||
{ |
|
||||||
int classId = (int)data[i + 1]; |
|
||||||
int left = max(0, min((int)data[i + 3], img.cols - 1)); |
|
||||||
int top = max(0, min((int)data[i + 4], img.rows - 1)); |
|
||||||
int right = max(0, min((int)data[i + 5], img.cols - 1)); |
|
||||||
int bottom = max(0, min((int)data[i + 6], img.rows - 1)); |
|
||||||
|
|
||||||
// Draw a bounding box.
|
|
||||||
rectangle(img, Point(left, top), Point(right, bottom), Scalar(0, 255, 0)); |
|
||||||
|
|
||||||
// Put a label with a class name and confidence.
|
|
||||||
String label = cv::format("%s, %.3f", classNames[classId], confidence); |
|
||||||
int baseLine; |
|
||||||
Size labelSize = cv::getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); |
|
||||||
|
|
||||||
top = max(top, labelSize.height); |
|
||||||
rectangle(img, Point(left, top - labelSize.height), |
|
||||||
Point(left + labelSize.width, top + baseLine), |
|
||||||
Scalar(255, 255, 255), FILLED); |
|
||||||
putText(img, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0)); |
|
||||||
} |
|
||||||
} |
|
||||||
imshow("frame", img); |
|
||||||
waitKey(); |
|
||||||
return 0; |
|
||||||
} |
|
@ -1,132 +0,0 @@ |
|||||||
# This script is used to demonstrate MobileNet-SSD network using OpenCV deep learning module. |
|
||||||
# |
|
||||||
# It works with model taken from https://github.com/chuanqi305/MobileNet-SSD/ that |
|
||||||
# was trained in Caffe-SSD framework, https://github.com/weiliu89/caffe/tree/ssd. |
|
||||||
# Model detects objects from 20 classes. |
|
||||||
# |
|
||||||
# Also TensorFlow model from TensorFlow object detection model zoo may be used to |
|
||||||
# detect objects from 90 classes: |
|
||||||
# http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_11_06_2017.tar.gz |
|
||||||
# Text graph definition must be taken from opencv_extra: |
|
||||||
# https://github.com/opencv/opencv_extra/tree/master/testdata/dnn/ssd_mobilenet_v1_coco.pbtxt |
|
||||||
import numpy as np |
|
||||||
import argparse |
|
||||||
|
|
||||||
try: |
|
||||||
import cv2 as cv |
|
||||||
except ImportError: |
|
||||||
raise ImportError('Can\'t find OpenCV Python module. If you\'ve built it from sources without installation, ' |
|
||||||
'configure environment variable PYTHONPATH to "opencv_build_dir/lib" directory (with "python3" subdirectory if required)') |
|
||||||
|
|
||||||
inWidth = 300 |
|
||||||
inHeight = 300 |
|
||||||
WHRatio = inWidth / float(inHeight) |
|
||||||
inScaleFactor = 0.007843 |
|
||||||
meanVal = 127.5 |
|
||||||
|
|
||||||
if __name__ == "__main__": |
|
||||||
parser = argparse.ArgumentParser( |
|
||||||
description='Script to run MobileNet-SSD object detection network ' |
|
||||||
'trained either in Caffe or TensorFlow frameworks.') |
|
||||||
parser.add_argument("--video", help="path to video file. If empty, camera's stream will be used") |
|
||||||
parser.add_argument("--prototxt", default="MobileNetSSD_deploy.prototxt", |
|
||||||
help='Path to text network file: ' |
|
||||||
'MobileNetSSD_deploy.prototxt for Caffe model or ' |
|
||||||
'ssd_mobilenet_v1_coco.pbtxt from opencv_extra for TensorFlow model') |
|
||||||
parser.add_argument("--weights", default="MobileNetSSD_deploy.caffemodel", |
|
||||||
help='Path to weights: ' |
|
||||||
'MobileNetSSD_deploy.caffemodel for Caffe model or ' |
|
||||||
'frozen_inference_graph.pb from TensorFlow.') |
|
||||||
parser.add_argument("--num_classes", default=20, type=int, |
|
||||||
help="Number of classes. It's 20 for Caffe model from " |
|
||||||
"https://github.com/chuanqi305/MobileNet-SSD/ and 90 for " |
|
||||||
"TensorFlow model from http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_11_06_2017.tar.gz") |
|
||||||
parser.add_argument("--thr", default=0.2, type=float, help="confidence threshold to filter out weak detections") |
|
||||||
args = parser.parse_args() |
|
||||||
|
|
||||||
if args.num_classes == 20: |
|
||||||
net = cv.dnn.readNetFromCaffe(args.prototxt, args.weights) |
|
||||||
swapRB = False |
|
||||||
classNames = { 0: 'background', |
|
||||||
1: 'aeroplane', 2: 'bicycle', 3: 'bird', 4: 'boat', |
|
||||||
5: 'bottle', 6: 'bus', 7: 'car', 8: 'cat', 9: 'chair', |
|
||||||
10: 'cow', 11: 'diningtable', 12: 'dog', 13: 'horse', |
|
||||||
14: 'motorbike', 15: 'person', 16: 'pottedplant', |
|
||||||
17: 'sheep', 18: 'sofa', 19: 'train', 20: 'tvmonitor' } |
|
||||||
else: |
|
||||||
assert(args.num_classes == 90) |
|
||||||
net = cv.dnn.readNetFromTensorflow(args.weights, args.prototxt) |
|
||||||
swapRB = True |
|
||||||
classNames = { 0: 'background', |
|
||||||
1: 'person', 2: 'bicycle', 3: 'car', 4: 'motorcycle', 5: 'airplane', 6: 'bus', |
|
||||||
7: 'train', 8: 'truck', 9: 'boat', 10: 'traffic light', 11: 'fire hydrant', |
|
||||||
13: 'stop sign', 14: 'parking meter', 15: 'bench', 16: 'bird', 17: 'cat', |
|
||||||
18: 'dog', 19: 'horse', 20: 'sheep', 21: 'cow', 22: 'elephant', 23: 'bear', |
|
||||||
24: 'zebra', 25: 'giraffe', 27: 'backpack', 28: 'umbrella', 31: 'handbag', |
|
||||||
32: 'tie', 33: 'suitcase', 34: 'frisbee', 35: 'skis', 36: 'snowboard', |
|
||||||
37: 'sports ball', 38: 'kite', 39: 'baseball bat', 40: 'baseball glove', |
|
||||||
41: 'skateboard', 42: 'surfboard', 43: 'tennis racket', 44: 'bottle', |
|
||||||
46: 'wine glass', 47: 'cup', 48: 'fork', 49: 'knife', 50: 'spoon', |
|
||||||
51: 'bowl', 52: 'banana', 53: 'apple', 54: 'sandwich', 55: 'orange', |
|
||||||
56: 'broccoli', 57: 'carrot', 58: 'hot dog', 59: 'pizza', 60: 'donut', |
|
||||||
61: 'cake', 62: 'chair', 63: 'couch', 64: 'potted plant', 65: 'bed', |
|
||||||
67: 'dining table', 70: 'toilet', 72: 'tv', 73: 'laptop', 74: 'mouse', |
|
||||||
75: 'remote', 76: 'keyboard', 77: 'cell phone', 78: 'microwave', 79: 'oven', |
|
||||||
80: 'toaster', 81: 'sink', 82: 'refrigerator', 84: 'book', 85: 'clock', |
|
||||||
86: 'vase', 87: 'scissors', 88: 'teddy bear', 89: 'hair drier', 90: 'toothbrush' } |
|
||||||
|
|
||||||
if args.video: |
|
||||||
cap = cv.VideoCapture(args.video) |
|
||||||
else: |
|
||||||
cap = cv.VideoCapture(0) |
|
||||||
|
|
||||||
while True: |
|
||||||
# Capture frame-by-frame |
|
||||||
ret, frame = cap.read() |
|
||||||
blob = cv.dnn.blobFromImage(frame, inScaleFactor, (inWidth, inHeight), (meanVal, meanVal, meanVal), swapRB) |
|
||||||
net.setInput(blob) |
|
||||||
detections = net.forward() |
|
||||||
|
|
||||||
cols = frame.shape[1] |
|
||||||
rows = frame.shape[0] |
|
||||||
|
|
||||||
if cols / float(rows) > WHRatio: |
|
||||||
cropSize = (int(rows * WHRatio), rows) |
|
||||||
else: |
|
||||||
cropSize = (cols, int(cols / WHRatio)) |
|
||||||
|
|
||||||
y1 = int((rows - cropSize[1]) / 2) |
|
||||||
y2 = y1 + cropSize[1] |
|
||||||
x1 = int((cols - cropSize[0]) / 2) |
|
||||||
x2 = x1 + cropSize[0] |
|
||||||
frame = frame[y1:y2, x1:x2] |
|
||||||
|
|
||||||
cols = frame.shape[1] |
|
||||||
rows = frame.shape[0] |
|
||||||
|
|
||||||
for i in range(detections.shape[2]): |
|
||||||
confidence = detections[0, 0, i, 2] |
|
||||||
if confidence > args.thr: |
|
||||||
class_id = int(detections[0, 0, i, 1]) |
|
||||||
|
|
||||||
xLeftBottom = int(detections[0, 0, i, 3] * cols) |
|
||||||
yLeftBottom = int(detections[0, 0, i, 4] * rows) |
|
||||||
xRightTop = int(detections[0, 0, i, 5] * cols) |
|
||||||
yRightTop = int(detections[0, 0, i, 6] * rows) |
|
||||||
|
|
||||||
cv.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop), |
|
||||||
(0, 255, 0)) |
|
||||||
if class_id in classNames: |
|
||||||
label = classNames[class_id] + ": " + str(confidence) |
|
||||||
labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1) |
|
||||||
|
|
||||||
yLeftBottom = max(yLeftBottom, labelSize[1]) |
|
||||||
cv.rectangle(frame, (xLeftBottom, yLeftBottom - labelSize[1]), |
|
||||||
(xLeftBottom + labelSize[0], yLeftBottom + baseLine), |
|
||||||
(255, 255, 255), cv.FILLED) |
|
||||||
cv.putText(frame, label, (xLeftBottom, yLeftBottom), |
|
||||||
cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0)) |
|
||||||
|
|
||||||
cv.imshow("detections", frame) |
|
||||||
if cv.waitKey(1) >= 0: |
|
||||||
break |
|
@ -0,0 +1,255 @@ |
|||||||
|
#include <opencv2/opencv.hpp> |
||||||
|
#include <fstream> |
||||||
|
#include <iostream> |
||||||
|
#include <sstream> |
||||||
|
|
||||||
|
const char* keys = |
||||||
|
"{ help h | | Print help message. }" |
||||||
|
"{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera.}" |
||||||
|
"{ model m | | Path to a binary file of model contains trained weights. " |
||||||
|
"It could be a file with extensions .caffemodel (Caffe), " |
||||||
|
".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet) }" |
||||||
|
"{ config c | | Path to a text file of model contains network configuration. " |
||||||
|
"It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet) }" |
||||||
|
"{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }" |
||||||
|
"{ classes | | Optional path to a text file with names of classes to label detected objects. }" |
||||||
|
"{ mean | | Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces. }" |
||||||
|
"{ scale | 1 | Preprocess input image by multiplying on a scale factor. }" |
||||||
|
"{ width | -1 | Preprocess input image by resizing to a specific width. }" |
||||||
|
"{ height | -1 | Preprocess input image by resizing to a specific height. }" |
||||||
|
"{ rgb | | Indicate that model works with RGB input images instead BGR ones. }" |
||||||
|
"{ thr | .5 | Confidence threshold. }" |
||||||
|
"{ opencl | | Enable OpenCL }"; |
||||||
|
|
||||||
|
using namespace cv; |
||||||
|
using namespace dnn; |
||||||
|
|
||||||
|
float confThreshold; |
||||||
|
std::vector<std::string> classes; |
||||||
|
|
||||||
|
void loadClasses(const std::string& file); |
||||||
|
|
||||||
|
Net readNet(const std::string& model, const std::string& config = "", const std::string& framework = ""); |
||||||
|
|
||||||
|
void postprocess(Mat& frame, const Mat& out, Net& net); |
||||||
|
|
||||||
|
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame); |
||||||
|
|
||||||
|
void callback(int pos, void* userdata); |
||||||
|
|
||||||
|
int main(int argc, char** argv) |
||||||
|
{ |
||||||
|
CommandLineParser parser(argc, argv, keys); |
||||||
|
parser.about("Use this script to run object detection deep learning networks using OpenCV."); |
||||||
|
if (argc == 1 || parser.has("help")) |
||||||
|
{ |
||||||
|
parser.printMessage(); |
||||||
|
return 0; |
||||||
|
} |
||||||
|
|
||||||
|
confThreshold = parser.get<float>("thr"); |
||||||
|
float scale = parser.get<float>("scale"); |
||||||
|
bool swapRB = parser.get<bool>("rgb"); |
||||||
|
int inpWidth = parser.get<int>("width"); |
||||||
|
int inpHeight = parser.get<int>("height"); |
||||||
|
|
||||||
|
// Parse mean values.
|
||||||
|
Scalar mean; |
||||||
|
if (parser.has("mean")) |
||||||
|
{ |
||||||
|
std::istringstream meanStr(parser.get<String>("mean")); |
||||||
|
std::vector<float> meanValues; |
||||||
|
float val; |
||||||
|
while (meanStr >> val) |
||||||
|
meanValues.push_back(val); |
||||||
|
CV_Assert(meanValues.size() == 3); |
||||||
|
mean = Scalar(meanValues[0], meanValues[1], meanValues[2]); |
||||||
|
} |
||||||
|
|
||||||
|
// Open file with classes names.
|
||||||
|
if (parser.has("classes")) |
||||||
|
{ |
||||||
|
std::string file = parser.get<String>("classes"); |
||||||
|
std::ifstream ifs(file.c_str()); |
||||||
|
if (!ifs.is_open()) |
||||||
|
CV_Error(Error::StsError, "File " + file + " not found"); |
||||||
|
std::string line; |
||||||
|
while (ifs >> line) |
||||||
|
{ |
||||||
|
classes.push_back(line); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Load a model.
|
||||||
|
CV_Assert(parser.has("model")); |
||||||
|
Net net = readNet(parser.get<String>("model"), parser.get<String>("config"), parser.get<String>("framework")); |
||||||
|
|
||||||
|
if (parser.get<bool>("opencl")) |
||||||
|
{ |
||||||
|
net.setPreferableTarget(DNN_TARGET_OPENCL); |
||||||
|
} |
||||||
|
|
||||||
|
// Create a window
|
||||||
|
static const std::string kWinName = "Deep learning object detection in OpenCV"; |
||||||
|
namedWindow(kWinName, WINDOW_NORMAL); |
||||||
|
int initialConf = confThreshold * 100; |
||||||
|
createTrackbar("Confidence threshold", kWinName, &initialConf, 99, callback); |
||||||
|
|
||||||
|
// Open a video file or an image file or a camera stream.
|
||||||
|
VideoCapture cap; |
||||||
|
if (parser.has("input")) |
||||||
|
cap.open(parser.get<String>("input")); |
||||||
|
else |
||||||
|
cap.open(0); |
||||||
|
|
||||||
|
// Process frames.
|
||||||
|
Mat frame, blob; |
||||||
|
while (waitKey(1) < 0) |
||||||
|
{ |
||||||
|
cap >> frame; |
||||||
|
if (frame.empty()) |
||||||
|
{ |
||||||
|
waitKey(); |
||||||
|
break; |
||||||
|
} |
||||||
|
|
||||||
|
// Create a 4D blob from a frame.
|
||||||
|
Size inpSize(inpWidth > 0 ? inpWidth : frame.cols, |
||||||
|
inpHeight > 0 ? inpHeight : frame.rows); |
||||||
|
blobFromImage(frame, blob, scale, inpSize, mean, swapRB, false); |
||||||
|
|
||||||
|
// Run a model.
|
||||||
|
net.setInput(blob); |
||||||
|
if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN
|
||||||
|
{ |
||||||
|
resize(frame, frame, inpSize); |
||||||
|
Mat imInfo = (Mat_<float>(1, 3) << inpSize.height, inpSize.width, 1.6f); |
||||||
|
net.setInput(imInfo, "im_info"); |
||||||
|
} |
||||||
|
Mat out = net.forward(); |
||||||
|
|
||||||
|
postprocess(frame, out, net); |
||||||
|
|
||||||
|
// Put efficiency information.
|
||||||
|
std::vector<double> layersTimes; |
||||||
|
double t = net.getPerfProfile(layersTimes); |
||||||
|
std::string label = format("Inference time: %.2f", t * 1000 / getTickFrequency()); |
||||||
|
putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar()); |
||||||
|
|
||||||
|
imshow(kWinName, frame); |
||||||
|
} |
||||||
|
return 0; |
||||||
|
} |
||||||
|
|
||||||
|
void postprocess(Mat& frame, const Mat& out, Net& net) |
||||||
|
{ |
||||||
|
static std::vector<int> outLayers = net.getUnconnectedOutLayers(); |
||||||
|
static std::string outLayerType = net.getLayer(outLayers[0])->type; |
||||||
|
|
||||||
|
float* data = (float*)out.data; |
||||||
|
if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN
|
||||||
|
{ |
||||||
|
// Network produces output blob with a shape 1x1xNx7 where N is a number of
|
||||||
|
// detections and an every detection is a vector of values
|
||||||
|
// [batchId, classId, confidence, left, top, right, bottom]
|
||||||
|
for (size_t i = 0; i < out.total(); i += 7) |
||||||
|
{ |
||||||
|
float confidence = data[i + 2]; |
||||||
|
if (confidence > confThreshold) |
||||||
|
{ |
||||||
|
int left = data[i + 3]; |
||||||
|
int top = data[i + 4]; |
||||||
|
int right = data[i + 5]; |
||||||
|
int bottom = data[i + 6]; |
||||||
|
int classId = (int)(data[i + 1]) - 1; // Skip 0th background class id.
|
||||||
|
drawPred(classId, confidence, left, top, right, bottom, frame); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
else if (outLayerType == "DetectionOutput") |
||||||
|
{ |
||||||
|
// Network produces output blob with a shape 1x1xNx7 where N is a number of
|
||||||
|
// detections and an every detection is a vector of values
|
||||||
|
// [batchId, classId, confidence, left, top, right, bottom]
|
||||||
|
for (size_t i = 0; i < out.total(); i += 7) |
||||||
|
{ |
||||||
|
float confidence = data[i + 2]; |
||||||
|
if (confidence > confThreshold) |
||||||
|
{ |
||||||
|
int left = (int)(data[i + 3] * frame.cols); |
||||||
|
int top = (int)(data[i + 4] * frame.rows); |
||||||
|
int right = (int)(data[i + 5] * frame.cols); |
||||||
|
int bottom = (int)(data[i + 6] * frame.rows); |
||||||
|
int classId = (int)(data[i + 1]) - 1; // Skip 0th background class id.
|
||||||
|
drawPred(classId, confidence, left, top, right, bottom, frame); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
else if (outLayerType == "Region") |
||||||
|
{ |
||||||
|
// Network produces output blob with a shape NxC where N is a number of
|
||||||
|
// detected objects and C is a number of classes + 4 where the first 4
|
||||||
|
// numbers are [center_x, center_y, width, height]
|
||||||
|
for (int i = 0; i < out.rows; ++i, data += out.cols) |
||||||
|
{ |
||||||
|
Mat confidences = out.row(i).colRange(5, out.cols); |
||||||
|
Point classIdPoint; |
||||||
|
double confidence; |
||||||
|
minMaxLoc(confidences, 0, &confidence, 0, &classIdPoint); |
||||||
|
if (confidence > confThreshold) |
||||||
|
{ |
||||||
|
int classId = classIdPoint.x; |
||||||
|
int centerX = (int)(data[0] * frame.cols); |
||||||
|
int centerY = (int)(data[1] * frame.rows); |
||||||
|
int width = (int)(data[2] * frame.cols); |
||||||
|
int height = (int)(data[3] * frame.rows); |
||||||
|
int left = centerX - width / 2; |
||||||
|
int top = centerY - height / 2; |
||||||
|
drawPred(classId, confidence, left, top, left + width, top + height, frame); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
else |
||||||
|
CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType); |
||||||
|
} |
||||||
|
|
||||||
|
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame) |
||||||
|
{ |
||||||
|
rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 255, 0)); |
||||||
|
|
||||||
|
std::string label = format("%.2f", conf); |
||||||
|
if (!classes.empty()) |
||||||
|
{ |
||||||
|
CV_Assert(classId < (int)classes.size()); |
||||||
|
label = classes[classId] + ": " + label; |
||||||
|
} |
||||||
|
|
||||||
|
int baseLine; |
||||||
|
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); |
||||||
|
|
||||||
|
top = max(top, labelSize.height); |
||||||
|
rectangle(frame, Point(left, top - labelSize.height), |
||||||
|
Point(left + labelSize.width, top + baseLine), Scalar::all(255), FILLED); |
||||||
|
putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.5, Scalar()); |
||||||
|
} |
||||||
|
|
||||||
|
void callback(int pos, void*) |
||||||
|
{ |
||||||
|
confThreshold = pos * 0.01; |
||||||
|
} |
||||||
|
|
||||||
|
Net readNet(const std::string& model, const std::string& config, const std::string& framework) |
||||||
|
{ |
||||||
|
std::string modelExt = model.substr(model.find('.')); |
||||||
|
if (framework == "caffe" || modelExt == ".caffemodel") |
||||||
|
return readNetFromCaffe(config, model); |
||||||
|
else if (framework == "tensorflow" || modelExt == ".pb") |
||||||
|
return readNetFromTensorflow(model, config); |
||||||
|
else if (framework == "torch" || modelExt == ".t7" || modelExt == ".net") |
||||||
|
return readNetFromTorch(model); |
||||||
|
else if (framework == "darknet" || modelExt == ".weights") |
||||||
|
return readNetFromDarknet(config, model); |
||||||
|
else |
||||||
|
CV_Error(Error::StsError, "Cannot determine an origin framework of model from file " + model); |
||||||
|
return Net(); |
||||||
|
} |
@ -0,0 +1,161 @@ |
|||||||
|
import cv2 as cv |
||||||
|
import argparse |
||||||
|
import sys |
||||||
|
import numpy as np |
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description='Use this script to run object detection deep learning networks using OpenCV.') |
||||||
|
parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.') |
||||||
|
parser.add_argument('--model', required=True, |
||||||
|
help='Path to a binary file of model contains trained weights. ' |
||||||
|
'It could be a file with extensions .caffemodel (Caffe), ' |
||||||
|
'.pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet)') |
||||||
|
parser.add_argument('--config', |
||||||
|
help='Path to a text file of model contains network configuration. ' |
||||||
|
'It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet)') |
||||||
|
parser.add_argument('--framework', choices=['caffe', 'tensorflow', 'torch', 'darknet'], |
||||||
|
help='Optional name of an origin framework of the model. ' |
||||||
|
'Detect it automatically if it does not set.') |
||||||
|
parser.add_argument('--classes', help='Optional path to a text file with names of classes to label detected objects.') |
||||||
|
parser.add_argument('--mean', nargs='+', type=float, default=[0, 0, 0], |
||||||
|
help='Preprocess input image by subtracting mean values. ' |
||||||
|
'Mean values should be in BGR order.') |
||||||
|
parser.add_argument('--scale', type=float, default=1.0, |
||||||
|
help='Preprocess input image by multiplying on a scale factor.') |
||||||
|
parser.add_argument('--width', type=int, |
||||||
|
help='Preprocess input image by resizing to a specific width.') |
||||||
|
parser.add_argument('--height', type=int, |
||||||
|
help='Preprocess input image by resizing to a specific height.') |
||||||
|
parser.add_argument('--rgb', action='store_true', |
||||||
|
help='Indicate that model works with RGB input images instead BGR ones.') |
||||||
|
parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold') |
||||||
|
args = parser.parse_args() |
||||||
|
|
||||||
|
# Load names of classes |
||||||
|
classes = None |
||||||
|
if args.classes: |
||||||
|
with open(args.classes, 'rt') as f: |
||||||
|
classes = f.read().rstrip('\n').split('\n') |
||||||
|
|
||||||
|
# Load a network |
||||||
|
modelExt = args.model[args.model.find('.'):] |
||||||
|
if args.framework == 'caffe' or modelExt == '.caffemodel': |
||||||
|
net = cv.dnn.readNetFromCaffe(args.config, args.model) |
||||||
|
elif args.framework == 'tensorflow' or modelExt == '.pb': |
||||||
|
net = cv.dnn.readNetFromTensorflow(args.model, args.config) |
||||||
|
elif args.framework == 'torch' or modelExt in ['.t7', '.net']: |
||||||
|
net = cv.dnn.readNetFromTorch(args.model) |
||||||
|
elif args.framework == 'darknet' or modelExt == '.weights': |
||||||
|
net = cv.dnn.readNetFromDarknet(args.config, args.model) |
||||||
|
else: |
||||||
|
print('Cannot determine an origin framework of model from file %s' % args.model) |
||||||
|
sys.exit(0) |
||||||
|
|
||||||
|
confThreshold = args.thr |
||||||
|
|
||||||
|
def postprocess(frame, out): |
||||||
|
frameHeight = frame.shape[0] |
||||||
|
frameWidth = frame.shape[1] |
||||||
|
|
||||||
|
def drawPred(classId, conf, left, top, right, bottom): |
||||||
|
# Draw a bounding box. |
||||||
|
cv.rectangle(frame, (left, top), (right, bottom), (0, 255, 0)) |
||||||
|
|
||||||
|
label = '%.2f' % confidence |
||||||
|
|
||||||
|
# Print a label of class. |
||||||
|
if classes: |
||||||
|
assert(classId < len(classes)) |
||||||
|
label = '%s: %s' % (classes[classId], label) |
||||||
|
|
||||||
|
labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1) |
||||||
|
top = max(top, labelSize[1]) |
||||||
|
cv.rectangle(frame, (left, top - labelSize[1]), (left + labelSize[0], top + baseLine), (255, 255, 255), cv.FILLED) |
||||||
|
cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0)) |
||||||
|
|
||||||
|
layerNames = net.getLayerNames() |
||||||
|
lastLayerId = net.getLayerId(layerNames[-1]) |
||||||
|
lastLayer = net.getLayer(lastLayerId) |
||||||
|
|
||||||
|
if net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN |
||||||
|
# Network produces output blob with a shape 1x1xNx7 where N is a number of |
||||||
|
# detections and an every detection is a vector of values |
||||||
|
# [batchId, classId, confidence, left, top, right, bottom] |
||||||
|
for detection in out[0, 0]: |
||||||
|
confidence = detection[2] |
||||||
|
if confidence > confThreshold: |
||||||
|
left = int(detection[3]) |
||||||
|
top = int(detection[4]) |
||||||
|
right = int(detection[5]) |
||||||
|
bottom = int(detection[6]) |
||||||
|
classId = int(detection[1]) - 1 # Skip background label |
||||||
|
drawPred(classId, confidence, left, top, right, bottom) |
||||||
|
elif lastLayer.type == 'DetectionOutput': |
||||||
|
# Network produces output blob with a shape 1x1xNx7 where N is a number of |
||||||
|
# detections and an every detection is a vector of values |
||||||
|
# [batchId, classId, confidence, left, top, right, bottom] |
||||||
|
for detection in out[0, 0]: |
||||||
|
confidence = detection[2] |
||||||
|
if confidence > confThreshold: |
||||||
|
left = int(detection[3] * frameWidth) |
||||||
|
top = int(detection[4] * frameHeight) |
||||||
|
right = int(detection[5] * frameWidth) |
||||||
|
bottom = int(detection[6] * frameHeight) |
||||||
|
classId = int(detection[1]) - 1 # Skip background label |
||||||
|
drawPred(classId, confidence, left, top, right, bottom) |
||||||
|
elif lastLayer.type == 'Region': |
||||||
|
# Network produces output blob with a shape NxC where N is a number of |
||||||
|
# detected objects and C is a number of classes + 4 where the first 4 |
||||||
|
# numbers are [center_x, center_y, width, height] |
||||||
|
for detection in out: |
||||||
|
confidences = detection[5:] |
||||||
|
classId = np.argmax(confidences) |
||||||
|
confidence = confidences[classId] |
||||||
|
if confidence > confThreshold: |
||||||
|
center_x = int(detection[0] * frameWidth) |
||||||
|
center_y = int(detection[1] * frameHeight) |
||||||
|
width = int(detection[2] * frameWidth) |
||||||
|
height = int(detection[3] * frameHeight) |
||||||
|
left = center_x - width / 2 |
||||||
|
top = center_y - height / 2 |
||||||
|
drawPred(classId, confidence, left, top, left + width, top + height) |
||||||
|
|
||||||
|
# Process inputs |
||||||
|
winName = 'Deep learning object detection in OpenCV' |
||||||
|
cv.namedWindow(winName, cv.WINDOW_NORMAL) |
||||||
|
|
||||||
|
def callback(pos): |
||||||
|
global confThreshold |
||||||
|
confThreshold = pos / 100.0 |
||||||
|
|
||||||
|
cv.createTrackbar('Confidence threshold, %', winName, int(confThreshold * 100), 99, callback) |
||||||
|
|
||||||
|
cap = cv.VideoCapture(args.input if args.input else 0) |
||||||
|
while cv.waitKey(1) < 0: |
||||||
|
hasFrame, frame = cap.read() |
||||||
|
if not hasFrame: |
||||||
|
cv.waitKey() |
||||||
|
break |
||||||
|
|
||||||
|
frameHeight = frame.shape[0] |
||||||
|
frameWidth = frame.shape[1] |
||||||
|
|
||||||
|
# Create a 4D blob from a frame. |
||||||
|
inpWidth = args.width if args.width else frameWidth |
||||||
|
inpHeight = args.height if args.height else frameHeight |
||||||
|
blob = cv.dnn.blobFromImage(frame, args.scale, (inpWidth, inpHeight), args.mean, args.rgb, crop=False) |
||||||
|
|
||||||
|
# Run a model |
||||||
|
net.setInput(blob) |
||||||
|
if net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN |
||||||
|
frame = cv.resize(frame, (inpWidth, inpHeight)) |
||||||
|
net.setInput(np.array([inpHeight, inpWidth, 1.6], dtype=np.float32), 'im_info'); |
||||||
|
out = net.forward() |
||||||
|
|
||||||
|
postprocess(frame, out) |
||||||
|
|
||||||
|
# Put efficiency information. |
||||||
|
t, _ = net.getPerfProfile() |
||||||
|
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency()) |
||||||
|
cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0)) |
||||||
|
|
||||||
|
cv.imshow(winName, frame) |
@ -0,0 +1,90 @@ |
|||||||
|
person |
||||||
|
bicycle |
||||||
|
car |
||||||
|
motorcycle |
||||||
|
airplane |
||||||
|
bus |
||||||
|
train |
||||||
|
truck |
||||||
|
boat |
||||||
|
traffic light |
||||||
|
fire hydrant |
||||||
|
|
||||||
|
stop sign |
||||||
|
parking meter |
||||||
|
bench |
||||||
|
bird |
||||||
|
cat |
||||||
|
dog |
||||||
|
horse |
||||||
|
sheep |
||||||
|
cow |
||||||
|
elephant |
||||||
|
bear |
||||||
|
zebra |
||||||
|
giraffe |
||||||
|
|
||||||
|
backpack |
||||||
|
umbrella |
||||||
|
|
||||||
|
|
||||||
|
handbag |
||||||
|
tie |
||||||
|
suitcase |
||||||
|
frisbee |
||||||
|
skis |
||||||
|
snowboard |
||||||
|
sports ball |
||||||
|
kite |
||||||
|
baseball bat |
||||||
|
baseball glove |
||||||
|
skateboard |
||||||
|
surfboard |
||||||
|
tennis racket |
||||||
|
bottle |
||||||
|
|
||||||
|
wine glass |
||||||
|
cup |
||||||
|
fork |
||||||
|
knife |
||||||
|
spoon |
||||||
|
bowl |
||||||
|
banana |
||||||
|
apple |
||||||
|
sandwich |
||||||
|
orange |
||||||
|
broccoli |
||||||
|
carrot |
||||||
|
hot dog |
||||||
|
pizza |
||||||
|
donut |
||||||
|
cake |
||||||
|
chair |
||||||
|
couch |
||||||
|
potted plant |
||||||
|
bed |
||||||
|
|
||||||
|
dining table |
||||||
|
|
||||||
|
|
||||||
|
toilet |
||||||
|
|
||||||
|
tv |
||||||
|
laptop |
||||||
|
mouse |
||||||
|
remote |
||||||
|
keyboard |
||||||
|
cell phone |
||||||
|
microwave |
||||||
|
oven |
||||||
|
toaster |
||||||
|
sink |
||||||
|
refrigerator |
||||||
|
|
||||||
|
book |
||||||
|
clock |
||||||
|
vase |
||||||
|
scissors |
||||||
|
teddy bear |
||||||
|
hair drier |
||||||
|
toothbrush |
@ -0,0 +1,20 @@ |
|||||||
|
aeroplane |
||||||
|
bicycle |
||||||
|
bird |
||||||
|
boat |
||||||
|
bottle |
||||||
|
bus |
||||||
|
car |
||||||
|
cat |
||||||
|
chair |
||||||
|
cow |
||||||
|
diningtable |
||||||
|
dog |
||||||
|
horse |
||||||
|
motorbike |
||||||
|
person |
||||||
|
pottedplant |
||||||
|
sheep |
||||||
|
sofa |
||||||
|
train |
||||||
|
tvmonitor |
@ -1,164 +0,0 @@ |
|||||||
#include <opencv2/dnn.hpp> |
|
||||||
#include <opencv2/imgproc.hpp> |
|
||||||
#include <opencv2/highgui.hpp> |
|
||||||
#include <iostream> |
|
||||||
|
|
||||||
using namespace cv; |
|
||||||
using namespace std; |
|
||||||
using namespace cv::dnn; |
|
||||||
|
|
||||||
const size_t inWidth = 300; |
|
||||||
const size_t inHeight = 300; |
|
||||||
const double inScaleFactor = 1.0; |
|
||||||
const Scalar meanVal(104.0, 177.0, 123.0); |
|
||||||
|
|
||||||
const char* about = "This sample uses Single-Shot Detector " |
|
||||||
"(https://arxiv.org/abs/1512.02325) " |
|
||||||
"with ResNet-10 architecture to detect faces on camera/video/image.\n" |
|
||||||
"More information about the training is available here: " |
|
||||||
"<OPENCV_SRC_DIR>/samples/dnn/face_detector/how_to_train_face_detector.txt\n" |
|
||||||
".caffemodel model's file is available here: " |
|
||||||
"<OPENCV_SRC_DIR>/samples/dnn/face_detector/res10_300x300_ssd_iter_140000.caffemodel\n" |
|
||||||
".prototxt file is available here: " |
|
||||||
"<OPENCV_SRC_DIR>/samples/dnn/face_detector/deploy.prototxt\n"; |
|
||||||
|
|
||||||
const char* params |
|
||||||
= "{ help | false | print usage }" |
|
||||||
"{ proto | | model configuration (deploy.prototxt) }" |
|
||||||
"{ model | | model weights (res10_300x300_ssd_iter_140000.caffemodel) }" |
|
||||||
"{ camera_device | 0 | camera device number }" |
|
||||||
"{ video | | video or image for detection }" |
|
||||||
"{ opencl | false | enable OpenCL }" |
|
||||||
"{ min_confidence | 0.5 | min confidence }"; |
|
||||||
|
|
||||||
int main(int argc, char** argv) |
|
||||||
{ |
|
||||||
CommandLineParser parser(argc, argv, params); |
|
||||||
|
|
||||||
if (parser.get<bool>("help")) |
|
||||||
{ |
|
||||||
cout << about << endl; |
|
||||||
parser.printMessage(); |
|
||||||
return 0; |
|
||||||
} |
|
||||||
|
|
||||||
String modelConfiguration = parser.get<string>("proto"); |
|
||||||
String modelBinary = parser.get<string>("model"); |
|
||||||
|
|
||||||
//! [Initialize network]
|
|
||||||
dnn::Net net = readNetFromCaffe(modelConfiguration, modelBinary); |
|
||||||
//! [Initialize network]
|
|
||||||
|
|
||||||
if (net.empty()) |
|
||||||
{ |
|
||||||
cerr << "Can't load network by using the following files: " << endl; |
|
||||||
cerr << "prototxt: " << modelConfiguration << endl; |
|
||||||
cerr << "caffemodel: " << modelBinary << endl; |
|
||||||
cerr << "Models are available here:" << endl; |
|
||||||
cerr << "<OPENCV_SRC_DIR>/samples/dnn/face_detector" << endl; |
|
||||||
cerr << "or here:" << endl; |
|
||||||
cerr << "https://github.com/opencv/opencv/tree/master/samples/dnn/face_detector" << endl; |
|
||||||
exit(-1); |
|
||||||
} |
|
||||||
|
|
||||||
if (parser.get<bool>("opencl")) |
|
||||||
{ |
|
||||||
net.setPreferableTarget(DNN_TARGET_OPENCL); |
|
||||||
} |
|
||||||
|
|
||||||
VideoCapture cap; |
|
||||||
if (parser.get<String>("video").empty()) |
|
||||||
{ |
|
||||||
int cameraDevice = parser.get<int>("camera_device"); |
|
||||||
cap = VideoCapture(cameraDevice); |
|
||||||
if(!cap.isOpened()) |
|
||||||
{ |
|
||||||
cout << "Couldn't find camera: " << cameraDevice << endl; |
|
||||||
return -1; |
|
||||||
} |
|
||||||
} |
|
||||||
else |
|
||||||
{ |
|
||||||
cap.open(parser.get<String>("video")); |
|
||||||
if(!cap.isOpened()) |
|
||||||
{ |
|
||||||
cout << "Couldn't open image or video: " << parser.get<String>("video") << endl; |
|
||||||
return -1; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
for(;;) |
|
||||||
{ |
|
||||||
Mat frame; |
|
||||||
cap >> frame; // get a new frame from camera/video or read image
|
|
||||||
|
|
||||||
if (frame.empty()) |
|
||||||
{ |
|
||||||
waitKey(); |
|
||||||
break; |
|
||||||
} |
|
||||||
|
|
||||||
if (frame.channels() == 4) |
|
||||||
cvtColor(frame, frame, COLOR_BGRA2BGR); |
|
||||||
|
|
||||||
//! [Prepare blob]
|
|
||||||
Mat inputBlob = blobFromImage(frame, inScaleFactor, |
|
||||||
Size(inWidth, inHeight), meanVal, false, false); //Convert Mat to batch of images
|
|
||||||
//! [Prepare blob]
|
|
||||||
|
|
||||||
//! [Set input blob]
|
|
||||||
net.setInput(inputBlob, "data"); //set the network input
|
|
||||||
//! [Set input blob]
|
|
||||||
|
|
||||||
//! [Make forward pass]
|
|
||||||
Mat detection = net.forward("detection_out"); //compute output
|
|
||||||
//! [Make forward pass]
|
|
||||||
|
|
||||||
vector<double> layersTimings; |
|
||||||
double freq = getTickFrequency() / 1000; |
|
||||||
double time = net.getPerfProfile(layersTimings) / freq; |
|
||||||
|
|
||||||
Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>()); |
|
||||||
|
|
||||||
ostringstream ss; |
|
||||||
ss << "FPS: " << 1000/time << " ; time: " << time << " ms"; |
|
||||||
putText(frame, ss.str(), Point(20,20), 0, 0.5, Scalar(0,0,255)); |
|
||||||
|
|
||||||
float confidenceThreshold = parser.get<float>("min_confidence"); |
|
||||||
for(int i = 0; i < detectionMat.rows; i++) |
|
||||||
{ |
|
||||||
float confidence = detectionMat.at<float>(i, 2); |
|
||||||
|
|
||||||
if(confidence > confidenceThreshold) |
|
||||||
{ |
|
||||||
int xLeftBottom = static_cast<int>(detectionMat.at<float>(i, 3) * frame.cols); |
|
||||||
int yLeftBottom = static_cast<int>(detectionMat.at<float>(i, 4) * frame.rows); |
|
||||||
int xRightTop = static_cast<int>(detectionMat.at<float>(i, 5) * frame.cols); |
|
||||||
int yRightTop = static_cast<int>(detectionMat.at<float>(i, 6) * frame.rows); |
|
||||||
|
|
||||||
Rect object((int)xLeftBottom, (int)yLeftBottom, |
|
||||||
(int)(xRightTop - xLeftBottom), |
|
||||||
(int)(yRightTop - yLeftBottom)); |
|
||||||
|
|
||||||
rectangle(frame, object, Scalar(0, 255, 0)); |
|
||||||
|
|
||||||
ss.str(""); |
|
||||||
ss << confidence; |
|
||||||
String conf(ss.str()); |
|
||||||
String label = "Face: " + conf; |
|
||||||
int baseLine = 0; |
|
||||||
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); |
|
||||||
rectangle(frame, Rect(Point(xLeftBottom, yLeftBottom - labelSize.height), |
|
||||||
Size(labelSize.width, labelSize.height + baseLine)), |
|
||||||
Scalar(255, 255, 255), FILLED); |
|
||||||
putText(frame, label, Point(xLeftBottom, yLeftBottom), |
|
||||||
FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0)); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
imshow("detections", frame); |
|
||||||
if (waitKey(1) >= 0) break; |
|
||||||
} |
|
||||||
|
|
||||||
return 0; |
|
||||||
} // main
|
|
@ -1,55 +0,0 @@ |
|||||||
import numpy as np |
|
||||||
import argparse |
|
||||||
import cv2 as cv |
|
||||||
try: |
|
||||||
import cv2 as cv |
|
||||||
except ImportError: |
|
||||||
raise ImportError('Can\'t find OpenCV Python module. If you\'ve built it from sources without installation, ' |
|
||||||
'configure environment variable PYTHONPATH to "opencv_build_dir/lib" directory (with "python3" subdirectory if required)') |
|
||||||
|
|
||||||
from cv2 import dnn |
|
||||||
|
|
||||||
inWidth = 300 |
|
||||||
inHeight = 300 |
|
||||||
confThreshold = 0.5 |
|
||||||
|
|
||||||
prototxt = 'face_detector/deploy.prototxt' |
|
||||||
caffemodel = 'face_detector/res10_300x300_ssd_iter_140000.caffemodel' |
|
||||||
|
|
||||||
if __name__ == '__main__': |
|
||||||
net = dnn.readNetFromCaffe(prototxt, caffemodel) |
|
||||||
cap = cv.VideoCapture(0) |
|
||||||
while True: |
|
||||||
ret, frame = cap.read() |
|
||||||
cols = frame.shape[1] |
|
||||||
rows = frame.shape[0] |
|
||||||
|
|
||||||
net.setInput(dnn.blobFromImage(frame, 1.0, (inWidth, inHeight), (104.0, 177.0, 123.0), False, False)) |
|
||||||
detections = net.forward() |
|
||||||
|
|
||||||
perf_stats = net.getPerfProfile() |
|
||||||
|
|
||||||
print('Inference time, ms: %.2f' % (perf_stats[0] / cv.getTickFrequency() * 1000)) |
|
||||||
|
|
||||||
for i in range(detections.shape[2]): |
|
||||||
confidence = detections[0, 0, i, 2] |
|
||||||
if confidence > confThreshold: |
|
||||||
xLeftBottom = int(detections[0, 0, i, 3] * cols) |
|
||||||
yLeftBottom = int(detections[0, 0, i, 4] * rows) |
|
||||||
xRightTop = int(detections[0, 0, i, 5] * cols) |
|
||||||
yRightTop = int(detections[0, 0, i, 6] * rows) |
|
||||||
|
|
||||||
cv.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop), |
|
||||||
(0, 255, 0)) |
|
||||||
label = "face: %.4f" % confidence |
|
||||||
labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1) |
|
||||||
|
|
||||||
cv.rectangle(frame, (xLeftBottom, yLeftBottom - labelSize[1]), |
|
||||||
(xLeftBottom + labelSize[0], yLeftBottom + baseLine), |
|
||||||
(255, 255, 255), cv.FILLED) |
|
||||||
cv.putText(frame, label, (xLeftBottom, yLeftBottom), |
|
||||||
cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0)) |
|
||||||
|
|
||||||
cv.imshow("detections", frame) |
|
||||||
if cv.waitKey(1) != -1: |
|
||||||
break |
|
@ -1,187 +0,0 @@ |
|||||||
#include <opencv2/dnn.hpp> |
|
||||||
#include <opencv2/dnn/shape_utils.hpp> |
|
||||||
#include <opencv2/imgproc.hpp> |
|
||||||
#include <opencv2/highgui.hpp> |
|
||||||
#include <iostream> |
|
||||||
|
|
||||||
using namespace cv; |
|
||||||
using namespace std; |
|
||||||
using namespace cv::dnn; |
|
||||||
|
|
||||||
const size_t inWidth = 300; |
|
||||||
const size_t inHeight = 300; |
|
||||||
const float inScaleFactor = 0.007843f; |
|
||||||
const float meanVal = 127.5; |
|
||||||
const char* classNames[] = {"background", |
|
||||||
"aeroplane", "bicycle", "bird", "boat", |
|
||||||
"bottle", "bus", "car", "cat", "chair", |
|
||||||
"cow", "diningtable", "dog", "horse", |
|
||||||
"motorbike", "person", "pottedplant", |
|
||||||
"sheep", "sofa", "train", "tvmonitor"}; |
|
||||||
|
|
||||||
const String keys |
|
||||||
= "{ help | false | print usage }" |
|
||||||
"{ proto | MobileNetSSD_deploy.prototxt | model configuration }" |
|
||||||
"{ model | MobileNetSSD_deploy.caffemodel | model weights }" |
|
||||||
"{ camera_device | 0 | camera device number }" |
|
||||||
"{ camera_width | 640 | camera device width }" |
|
||||||
"{ camera_height | 480 | camera device height }" |
|
||||||
"{ video | | video or image for detection}" |
|
||||||
"{ out | | path to output video file}" |
|
||||||
"{ min_confidence | 0.2 | min confidence }" |
|
||||||
"{ opencl | false | enable OpenCL }" |
|
||||||
; |
|
||||||
|
|
||||||
int main(int argc, char** argv) |
|
||||||
{ |
|
||||||
CommandLineParser parser(argc, argv, keys); |
|
||||||
parser.about("This sample uses MobileNet Single-Shot Detector " |
|
||||||
"(https://arxiv.org/abs/1704.04861) " |
|
||||||
"to detect objects on camera/video/image.\n" |
|
||||||
".caffemodel model's file is available here: " |
|
||||||
"https://github.com/chuanqi305/MobileNet-SSD\n" |
|
||||||
"Default network is 300x300 and 20-classes VOC.\n"); |
|
||||||
|
|
||||||
if (parser.get<bool>("help")) |
|
||||||
{ |
|
||||||
parser.printMessage(); |
|
||||||
return 0; |
|
||||||
} |
|
||||||
|
|
||||||
String modelConfiguration = parser.get<String>("proto"); |
|
||||||
String modelBinary = parser.get<String>("model"); |
|
||||||
CV_Assert(!modelConfiguration.empty() && !modelBinary.empty()); |
|
||||||
|
|
||||||
//! [Initialize network]
|
|
||||||
dnn::Net net = readNetFromCaffe(modelConfiguration, modelBinary); |
|
||||||
//! [Initialize network]
|
|
||||||
|
|
||||||
if (parser.get<bool>("opencl")) |
|
||||||
{ |
|
||||||
net.setPreferableTarget(DNN_TARGET_OPENCL); |
|
||||||
} |
|
||||||
|
|
||||||
if (net.empty()) |
|
||||||
{ |
|
||||||
cerr << "Can't load network by using the following files: " << endl; |
|
||||||
cerr << "prototxt: " << modelConfiguration << endl; |
|
||||||
cerr << "caffemodel: " << modelBinary << endl; |
|
||||||
cerr << "Models can be downloaded here:" << endl; |
|
||||||
cerr << "https://github.com/chuanqi305/MobileNet-SSD" << endl; |
|
||||||
exit(-1); |
|
||||||
} |
|
||||||
|
|
||||||
VideoCapture cap; |
|
||||||
if (!parser.has("video")) |
|
||||||
{ |
|
||||||
int cameraDevice = parser.get<int>("camera_device"); |
|
||||||
cap = VideoCapture(cameraDevice); |
|
||||||
if(!cap.isOpened()) |
|
||||||
{ |
|
||||||
cout << "Couldn't find camera: " << cameraDevice << endl; |
|
||||||
return -1; |
|
||||||
} |
|
||||||
|
|
||||||
cap.set(CAP_PROP_FRAME_WIDTH, parser.get<int>("camera_width")); |
|
||||||
cap.set(CAP_PROP_FRAME_HEIGHT, parser.get<int>("camera_height")); |
|
||||||
} |
|
||||||
else |
|
||||||
{ |
|
||||||
cap.open(parser.get<String>("video")); |
|
||||||
if(!cap.isOpened()) |
|
||||||
{ |
|
||||||
cout << "Couldn't open image or video: " << parser.get<String>("video") << endl; |
|
||||||
return -1; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
//Acquire input size
|
|
||||||
Size inVideoSize((int) cap.get(CAP_PROP_FRAME_WIDTH), |
|
||||||
(int) cap.get(CAP_PROP_FRAME_HEIGHT)); |
|
||||||
|
|
||||||
double fps = cap.get(CAP_PROP_FPS); |
|
||||||
int fourcc = static_cast<int>(cap.get(CAP_PROP_FOURCC)); |
|
||||||
VideoWriter outputVideo; |
|
||||||
outputVideo.open(parser.get<String>("out") , |
|
||||||
(fourcc != 0 ? fourcc : VideoWriter::fourcc('M','J','P','G')), |
|
||||||
(fps != 0 ? fps : 10.0), inVideoSize, true); |
|
||||||
|
|
||||||
for(;;) |
|
||||||
{ |
|
||||||
Mat frame; |
|
||||||
cap >> frame; // get a new frame from camera/video or read image
|
|
||||||
|
|
||||||
if (frame.empty()) |
|
||||||
{ |
|
||||||
waitKey(); |
|
||||||
break; |
|
||||||
} |
|
||||||
|
|
||||||
if (frame.channels() == 4) |
|
||||||
cvtColor(frame, frame, COLOR_BGRA2BGR); |
|
||||||
|
|
||||||
//! [Prepare blob]
|
|
||||||
Mat inputBlob = blobFromImage(frame, inScaleFactor, |
|
||||||
Size(inWidth, inHeight), |
|
||||||
Scalar(meanVal, meanVal, meanVal), |
|
||||||
false, false); //Convert Mat to batch of images
|
|
||||||
//! [Prepare blob]
|
|
||||||
|
|
||||||
//! [Set input blob]
|
|
||||||
net.setInput(inputBlob); //set the network input
|
|
||||||
//! [Set input blob]
|
|
||||||
|
|
||||||
//! [Make forward pass]
|
|
||||||
Mat detection = net.forward(); //compute output
|
|
||||||
//! [Make forward pass]
|
|
||||||
|
|
||||||
vector<double> layersTimings; |
|
||||||
double freq = getTickFrequency() / 1000; |
|
||||||
double time = net.getPerfProfile(layersTimings) / freq; |
|
||||||
|
|
||||||
Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>()); |
|
||||||
|
|
||||||
if (!outputVideo.isOpened()) |
|
||||||
{ |
|
||||||
putText(frame, format("FPS: %.2f ; time: %.2f ms", 1000.f/time, time), |
|
||||||
Point(20,20), 0, 0.5, Scalar(0,0,255)); |
|
||||||
} |
|
||||||
else |
|
||||||
cout << "Inference time, ms: " << time << endl; |
|
||||||
|
|
||||||
float confidenceThreshold = parser.get<float>("min_confidence"); |
|
||||||
for(int i = 0; i < detectionMat.rows; i++) |
|
||||||
{ |
|
||||||
float confidence = detectionMat.at<float>(i, 2); |
|
||||||
|
|
||||||
if(confidence > confidenceThreshold) |
|
||||||
{ |
|
||||||
size_t objectClass = (size_t)(detectionMat.at<float>(i, 1)); |
|
||||||
|
|
||||||
int left = static_cast<int>(detectionMat.at<float>(i, 3) * frame.cols); |
|
||||||
int top = static_cast<int>(detectionMat.at<float>(i, 4) * frame.rows); |
|
||||||
int right = static_cast<int>(detectionMat.at<float>(i, 5) * frame.cols); |
|
||||||
int bottom = static_cast<int>(detectionMat.at<float>(i, 6) * frame.rows); |
|
||||||
|
|
||||||
rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 255, 0)); |
|
||||||
String label = format("%s: %.2f", classNames[objectClass], confidence); |
|
||||||
int baseLine = 0; |
|
||||||
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); |
|
||||||
top = max(top, labelSize.height); |
|
||||||
rectangle(frame, Point(left, top - labelSize.height), |
|
||||||
Point(left + labelSize.width, top + baseLine), |
|
||||||
Scalar(255, 255, 255), FILLED); |
|
||||||
putText(frame, label, Point(left, top), |
|
||||||
FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0)); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
if (outputVideo.isOpened()) |
|
||||||
outputVideo << frame; |
|
||||||
|
|
||||||
imshow("detections", frame); |
|
||||||
if (waitKey(1) >= 0) break; |
|
||||||
} |
|
||||||
|
|
||||||
return 0; |
|
||||||
} // main
|
|
@ -1,156 +0,0 @@ |
|||||||
#include <opencv2/dnn.hpp> |
|
||||||
#include <opencv2/dnn/shape_utils.hpp> |
|
||||||
#include <opencv2/imgproc.hpp> |
|
||||||
#include <opencv2/highgui.hpp> |
|
||||||
#include <iostream> |
|
||||||
|
|
||||||
using namespace cv; |
|
||||||
using namespace std; |
|
||||||
using namespace cv::dnn; |
|
||||||
|
|
||||||
const char* classNames[] = {"background", |
|
||||||
"aeroplane", "bicycle", "bird", "boat", |
|
||||||
"bottle", "bus", "car", "cat", "chair", |
|
||||||
"cow", "diningtable", "dog", "horse", |
|
||||||
"motorbike", "person", "pottedplant", |
|
||||||
"sheep", "sofa", "train", "tvmonitor"}; |
|
||||||
|
|
||||||
const char* about = "This sample uses Single-Shot Detector " |
|
||||||
"(https://arxiv.org/abs/1512.02325) " |
|
||||||
"to detect objects on camera/video/image.\n" |
|
||||||
".caffemodel model's file is available here: " |
|
||||||
"https://github.com/weiliu89/caffe/tree/ssd#models\n" |
|
||||||
"Default network is 300x300 and 20-classes VOC.\n"; |
|
||||||
|
|
||||||
const char* params |
|
||||||
= "{ help | false | print usage }" |
|
||||||
"{ proto | | model configuration }" |
|
||||||
"{ model | | model weights }" |
|
||||||
"{ camera_device | 0 | camera device number}" |
|
||||||
"{ video | | video or image for detection}" |
|
||||||
"{ min_confidence | 0.5 | min confidence }"; |
|
||||||
|
|
||||||
int main(int argc, char** argv) |
|
||||||
{ |
|
||||||
cv::CommandLineParser parser(argc, argv, params); |
|
||||||
|
|
||||||
if (parser.get<bool>("help")) |
|
||||||
{ |
|
||||||
cout << about << endl; |
|
||||||
parser.printMessage(); |
|
||||||
return 0; |
|
||||||
} |
|
||||||
|
|
||||||
String modelConfiguration = parser.get<string>("proto"); |
|
||||||
String modelBinary = parser.get<string>("model"); |
|
||||||
|
|
||||||
//! [Initialize network]
|
|
||||||
dnn::Net net = readNetFromCaffe(modelConfiguration, modelBinary); |
|
||||||
//! [Initialize network]
|
|
||||||
|
|
||||||
if (net.empty()) |
|
||||||
{ |
|
||||||
cerr << "Can't load network by using the following files: " << endl; |
|
||||||
cerr << "prototxt: " << modelConfiguration << endl; |
|
||||||
cerr << "caffemodel: " << modelBinary << endl; |
|
||||||
cerr << "Models can be downloaded here:" << endl; |
|
||||||
cerr << "https://github.com/weiliu89/caffe/tree/ssd#models" << endl; |
|
||||||
exit(-1); |
|
||||||
} |
|
||||||
|
|
||||||
VideoCapture cap; |
|
||||||
if (parser.get<String>("video").empty()) |
|
||||||
{ |
|
||||||
int cameraDevice = parser.get<int>("camera_device"); |
|
||||||
cap = VideoCapture(cameraDevice); |
|
||||||
if(!cap.isOpened()) |
|
||||||
{ |
|
||||||
cout << "Couldn't find camera: " << cameraDevice << endl; |
|
||||||
return -1; |
|
||||||
} |
|
||||||
} |
|
||||||
else |
|
||||||
{ |
|
||||||
cap.open(parser.get<String>("video")); |
|
||||||
if(!cap.isOpened()) |
|
||||||
{ |
|
||||||
cout << "Couldn't open image or video: " << parser.get<String>("video") << endl; |
|
||||||
return -1; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
for (;;) |
|
||||||
{ |
|
||||||
cv::Mat frame; |
|
||||||
cap >> frame; // get a new frame from camera/video or read image
|
|
||||||
|
|
||||||
if (frame.empty()) |
|
||||||
{ |
|
||||||
waitKey(); |
|
||||||
break; |
|
||||||
} |
|
||||||
|
|
||||||
if (frame.channels() == 4) |
|
||||||
cvtColor(frame, frame, COLOR_BGRA2BGR); |
|
||||||
|
|
||||||
//! [Prepare blob]
|
|
||||||
Mat inputBlob = blobFromImage(frame, 1.0f, Size(300, 300), Scalar(104, 117, 123), false, false); //Convert Mat to batch of images
|
|
||||||
//! [Prepare blob]
|
|
||||||
|
|
||||||
//! [Set input blob]
|
|
||||||
net.setInput(inputBlob, "data"); //set the network input
|
|
||||||
//! [Set input blob]
|
|
||||||
|
|
||||||
//! [Make forward pass]
|
|
||||||
Mat detection = net.forward("detection_out"); //compute output
|
|
||||||
//! [Make forward pass]
|
|
||||||
|
|
||||||
vector<double> layersTimings; |
|
||||||
double freq = getTickFrequency() / 1000; |
|
||||||
double time = net.getPerfProfile(layersTimings) / freq; |
|
||||||
ostringstream ss; |
|
||||||
ss << "FPS: " << 1000/time << " ; time: " << time << " ms"; |
|
||||||
putText(frame, ss.str(), Point(20,20), 0, 0.5, Scalar(0,0,255)); |
|
||||||
|
|
||||||
Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>()); |
|
||||||
|
|
||||||
float confidenceThreshold = parser.get<float>("min_confidence"); |
|
||||||
for(int i = 0; i < detectionMat.rows; i++) |
|
||||||
{ |
|
||||||
float confidence = detectionMat.at<float>(i, 2); |
|
||||||
|
|
||||||
if(confidence > confidenceThreshold) |
|
||||||
{ |
|
||||||
size_t objectClass = (size_t)(detectionMat.at<float>(i, 1)); |
|
||||||
|
|
||||||
int xLeftBottom = static_cast<int>(detectionMat.at<float>(i, 3) * frame.cols); |
|
||||||
int yLeftBottom = static_cast<int>(detectionMat.at<float>(i, 4) * frame.rows); |
|
||||||
int xRightTop = static_cast<int>(detectionMat.at<float>(i, 5) * frame.cols); |
|
||||||
int yRightTop = static_cast<int>(detectionMat.at<float>(i, 6) * frame.rows); |
|
||||||
|
|
||||||
ss.str(""); |
|
||||||
ss << confidence; |
|
||||||
String conf(ss.str()); |
|
||||||
|
|
||||||
Rect object(xLeftBottom, yLeftBottom, |
|
||||||
xRightTop - xLeftBottom, |
|
||||||
yRightTop - yLeftBottom); |
|
||||||
|
|
||||||
rectangle(frame, object, Scalar(0, 255, 0)); |
|
||||||
String label = String(classNames[objectClass]) + ": " + conf; |
|
||||||
int baseLine = 0; |
|
||||||
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); |
|
||||||
rectangle(frame, Rect(Point(xLeftBottom, yLeftBottom - labelSize.height), |
|
||||||
Size(labelSize.width, labelSize.height + baseLine)), |
|
||||||
Scalar(255, 255, 255), FILLED); |
|
||||||
putText(frame, label, Point(xLeftBottom, yLeftBottom), |
|
||||||
FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0)); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
imshow("detections", frame); |
|
||||||
if (waitKey(1) >= 0) break; |
|
||||||
} |
|
||||||
|
|
||||||
return 0; |
|
||||||
} // main
|
|
@ -1,185 +0,0 @@ |
|||||||
// Brief Sample of using OpenCV dnn module in real time with device capture, video and image.
|
|
||||||
// VIDEO DEMO: https://www.youtube.com/watch?v=NHtRlndE2cg
|
|
||||||
|
|
||||||
#include <opencv2/dnn.hpp> |
|
||||||
#include <opencv2/dnn/shape_utils.hpp> |
|
||||||
#include <opencv2/imgproc.hpp> |
|
||||||
#include <opencv2/highgui.hpp> |
|
||||||
#include <fstream> |
|
||||||
#include <iostream> |
|
||||||
|
|
||||||
using namespace std; |
|
||||||
using namespace cv; |
|
||||||
using namespace cv::dnn; |
|
||||||
|
|
||||||
static const char* about = |
|
||||||
"This sample uses You only look once (YOLO)-Detector (https://arxiv.org/abs/1612.08242) to detect objects on camera/video/image.\n" |
|
||||||
"Models can be downloaded here: https://pjreddie.com/darknet/yolo/\n" |
|
||||||
"Default network is 416x416.\n" |
|
||||||
"Class names can be downloaded here: https://github.com/pjreddie/darknet/tree/master/data\n"; |
|
||||||
|
|
||||||
static const char* params = |
|
||||||
"{ help | false | print usage }" |
|
||||||
"{ cfg | | model configuration }" |
|
||||||
"{ model | | model weights }" |
|
||||||
"{ camera_device | 0 | camera device number}" |
|
||||||
"{ source | | video or image for detection}" |
|
||||||
"{ out | | path to output video file}" |
|
||||||
"{ fps | 3 | frame per second }" |
|
||||||
"{ style | box | box or line style draw }" |
|
||||||
"{ min_confidence | 0.24 | min confidence }" |
|
||||||
"{ class_names | | File with class names, [PATH-TO-DARKNET]/data/coco.names }"; |
|
||||||
|
|
||||||
int main(int argc, char** argv) |
|
||||||
{ |
|
||||||
CommandLineParser parser(argc, argv, params); |
|
||||||
|
|
||||||
if (parser.get<bool>("help")) |
|
||||||
{ |
|
||||||
cout << about << endl; |
|
||||||
parser.printMessage(); |
|
||||||
return 0; |
|
||||||
} |
|
||||||
|
|
||||||
String modelConfiguration = parser.get<String>("cfg"); |
|
||||||
String modelBinary = parser.get<String>("model"); |
|
||||||
|
|
||||||
//! [Initialize network]
|
|
||||||
dnn::Net net = readNetFromDarknet(modelConfiguration, modelBinary); |
|
||||||
//! [Initialize network]
|
|
||||||
|
|
||||||
if (net.empty()) |
|
||||||
{ |
|
||||||
cerr << "Can't load network by using the following files: " << endl; |
|
||||||
cerr << "cfg-file: " << modelConfiguration << endl; |
|
||||||
cerr << "weights-file: " << modelBinary << endl; |
|
||||||
cerr << "Models can be downloaded here:" << endl; |
|
||||||
cerr << "https://pjreddie.com/darknet/yolo/" << endl; |
|
||||||
exit(-1); |
|
||||||
} |
|
||||||
|
|
||||||
VideoCapture cap; |
|
||||||
VideoWriter writer; |
|
||||||
int codec = CV_FOURCC('M', 'J', 'P', 'G'); |
|
||||||
double fps = parser.get<float>("fps"); |
|
||||||
if (parser.get<String>("source").empty()) |
|
||||||
{ |
|
||||||
int cameraDevice = parser.get<int>("camera_device"); |
|
||||||
cap = VideoCapture(cameraDevice); |
|
||||||
if(!cap.isOpened()) |
|
||||||
{ |
|
||||||
cout << "Couldn't find camera: " << cameraDevice << endl; |
|
||||||
return -1; |
|
||||||
} |
|
||||||
} |
|
||||||
else |
|
||||||
{ |
|
||||||
cap.open(parser.get<String>("source")); |
|
||||||
if(!cap.isOpened()) |
|
||||||
{ |
|
||||||
cout << "Couldn't open image or video: " << parser.get<String>("video") << endl; |
|
||||||
return -1; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
if(!parser.get<String>("out").empty()) |
|
||||||
{ |
|
||||||
writer.open(parser.get<String>("out"), codec, fps, Size((int)cap.get(CAP_PROP_FRAME_WIDTH),(int)cap.get(CAP_PROP_FRAME_HEIGHT)), 1); |
|
||||||
} |
|
||||||
|
|
||||||
vector<String> classNamesVec; |
|
||||||
ifstream classNamesFile(parser.get<String>("class_names").c_str()); |
|
||||||
if (classNamesFile.is_open()) |
|
||||||
{ |
|
||||||
string className = ""; |
|
||||||
while (std::getline(classNamesFile, className)) |
|
||||||
classNamesVec.push_back(className); |
|
||||||
} |
|
||||||
|
|
||||||
String object_roi_style = parser.get<String>("style"); |
|
||||||
|
|
||||||
for(;;) |
|
||||||
{ |
|
||||||
Mat frame; |
|
||||||
cap >> frame; // get a new frame from camera/video or read image
|
|
||||||
|
|
||||||
if (frame.empty()) |
|
||||||
{ |
|
||||||
waitKey(); |
|
||||||
break; |
|
||||||
} |
|
||||||
|
|
||||||
if (frame.channels() == 4) |
|
||||||
cvtColor(frame, frame, COLOR_BGRA2BGR); |
|
||||||
|
|
||||||
//! [Prepare blob]
|
|
||||||
Mat inputBlob = blobFromImage(frame, 1 / 255.F, Size(416, 416), Scalar(), true, false); //Convert Mat to batch of images
|
|
||||||
//! [Prepare blob]
|
|
||||||
|
|
||||||
//! [Set input blob]
|
|
||||||
net.setInput(inputBlob, "data"); //set the network input
|
|
||||||
//! [Set input blob]
|
|
||||||
|
|
||||||
//! [Make forward pass]
|
|
||||||
Mat detectionMat = net.forward("detection_out"); //compute output
|
|
||||||
//! [Make forward pass]
|
|
||||||
|
|
||||||
vector<double> layersTimings; |
|
||||||
double tick_freq = getTickFrequency(); |
|
||||||
double time_ms = net.getPerfProfile(layersTimings) / tick_freq * 1000; |
|
||||||
putText(frame, format("FPS: %.2f ; time: %.2f ms", 1000.f / time_ms, time_ms), |
|
||||||
Point(20, 20), 0, 0.5, Scalar(0, 0, 255)); |
|
||||||
|
|
||||||
float confidenceThreshold = parser.get<float>("min_confidence"); |
|
||||||
for (int i = 0; i < detectionMat.rows; i++) |
|
||||||
{ |
|
||||||
const int probability_index = 5; |
|
||||||
const int probability_size = detectionMat.cols - probability_index; |
|
||||||
float *prob_array_ptr = &detectionMat.at<float>(i, probability_index); |
|
||||||
|
|
||||||
size_t objectClass = max_element(prob_array_ptr, prob_array_ptr + probability_size) - prob_array_ptr; |
|
||||||
float confidence = detectionMat.at<float>(i, (int)objectClass + probability_index); |
|
||||||
|
|
||||||
if (confidence > confidenceThreshold) |
|
||||||
{ |
|
||||||
float x_center = detectionMat.at<float>(i, 0) * frame.cols; |
|
||||||
float y_center = detectionMat.at<float>(i, 1) * frame.rows; |
|
||||||
float width = detectionMat.at<float>(i, 2) * frame.cols; |
|
||||||
float height = detectionMat.at<float>(i, 3) * frame.rows; |
|
||||||
Point p1(cvRound(x_center - width / 2), cvRound(y_center - height / 2)); |
|
||||||
Point p2(cvRound(x_center + width / 2), cvRound(y_center + height / 2)); |
|
||||||
Rect object(p1, p2); |
|
||||||
|
|
||||||
Scalar object_roi_color(0, 255, 0); |
|
||||||
|
|
||||||
if (object_roi_style == "box") |
|
||||||
{ |
|
||||||
rectangle(frame, object, object_roi_color); |
|
||||||
} |
|
||||||
else |
|
||||||
{ |
|
||||||
Point p_center(cvRound(x_center), cvRound(y_center)); |
|
||||||
line(frame, object.tl(), p_center, object_roi_color, 1); |
|
||||||
} |
|
||||||
|
|
||||||
String className = objectClass < classNamesVec.size() ? classNamesVec[objectClass] : cv::format("unknown(%d)", objectClass); |
|
||||||
String label = format("%s: %.2f", className.c_str(), confidence); |
|
||||||
int baseLine = 0; |
|
||||||
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); |
|
||||||
rectangle(frame, Rect(p1, Size(labelSize.width, labelSize.height + baseLine)), |
|
||||||
object_roi_color, FILLED); |
|
||||||
putText(frame, label, p1 + Point(0, labelSize.height), |
|
||||||
FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0)); |
|
||||||
} |
|
||||||
} |
|
||||||
if(writer.isOpened()) |
|
||||||
{ |
|
||||||
writer.write(frame); |
|
||||||
} |
|
||||||
|
|
||||||
imshow("YOLO: Detections", frame); |
|
||||||
if (waitKey(1) >= 0) break; |
|
||||||
} |
|
||||||
|
|
||||||
return 0; |
|
||||||
} // main
|
|
Loading…
Reference in new issue