opencv/samples/dnn/yolo_object_detection.cpp

// Brief Sample of using OpenCV dnn module in real time with device capture, video and image.
// VIDEO DEMO: https://www.youtube.com/watch?v=NHtRlndE2cg

#include <opencv2/dnn.hpp>
#include <opencv2/dnn/shape_utils.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <fstream>
#include <iostream>
#include <algorithm>
#include <cstdlib>

using namespace std;
using namespace cv;
using namespace cv::dnn;

const size_t network_width = 416;
const size_t network_height = 416;

static const char* about =
"This sample uses You only look once (YOLO)-Detector (https://arxiv.org/abs/1612.08242) to detect objects on camera/video/image.\n"
"Models can be downloaded here: https://pjreddie.com/darknet/yolo/\n"
"Default network is 416x416.\n"
"Class names can be downloaded here: https://github.com/pjreddie/darknet/tree/master/data\n";

static const char* params =
"{ help           | false | print usage         }"
"{ cfg            |       | model configuration }"
"{ model          |       | model weights       }"
"{ camera_device  | 0     | camera device number}"
"{ source         |       | video or image for detection}"
"{ min_confidence | 0.24  | min confidence      }"
"{ class_names    |       | File with class names, [PATH-TO-DARKNET]/data/coco.names }";

int main(int argc, char** argv)
{
    CommandLineParser parser(argc, argv, params);

    if (parser.get<bool>("help"))
    {
        cout << about << endl;
        parser.printMessage();
        return 0;
    }

    String modelConfiguration = parser.get<String>("cfg");
    String modelBinary = parser.get<String>("model");

    //! [Initialize network]
    dnn::Net net = readNetFromDarknet(modelConfiguration, modelBinary);
    //! [Initialize network]

    if (net.empty())
    {
        cerr << "Can't load network by using the following files: " << endl;
        cerr << "cfg-file:     " << modelConfiguration << endl;
        cerr << "weights-file: " << modelBinary << endl;
        cerr << "Models can be downloaded here:" << endl;
        cerr << "https://pjreddie.com/darknet/yolo/" << endl;
        exit(-1);
    }

    VideoCapture cap;
    if (parser.get<String>("source").empty())
    {
        int cameraDevice = parser.get<int>("camera_device");
        cap = VideoCapture(cameraDevice);
        if(!cap.isOpened())
        {
            cout << "Couldn't find camera: " << cameraDevice << endl;
            return -1;
        }
    }
    else
    {
        cap.open(parser.get<String>("source"));
        if(!cap.isOpened())
        {
            cout << "Couldn't open image or video: " << parser.get<String>("video") << endl;
            return -1;
        }
    }

    vector<string> classNamesVec;
    ifstream classNamesFile(parser.get<String>("class_names").c_str());
    if (classNamesFile.is_open())
    {
        string className = "";
        while (std::getline(classNamesFile, className))
            classNamesVec.push_back(className);
    }

    for(;;)
    {
        Mat frame;
        cap >> frame; // get a new frame from camera/video or read image

        if (frame.empty())
        {
            waitKey();
            break;
        }

        if (frame.channels() == 4)
            cvtColor(frame, frame, COLOR_BGRA2BGR);

        //! [Resizing without keeping aspect ratio]
        Mat resized;
        resize(frame, resized, Size(network_width, network_height));
        //! [Resizing without keeping aspect ratio]

        //! [Prepare blob]
        Mat inputBlob = blobFromImage(resized, 1 / 255.F); //Convert Mat to batch of images
        //! [Prepare blob]

        //! [Set input blob]
        net.setInput(inputBlob, "data");                   //set the network input
        //! [Set input blob]

        //! [Make forward pass]
        Mat detectionMat = net.forward("detection_out");   //compute output
        //! [Make forward pass]

        vector<double> layersTimings;
        double freq = getTickFrequency() / 1000;
        double time = net.getPerfProfile(layersTimings) / freq;
        ostringstream ss;
        ss << "FPS: " << 1000/time << " ; time: " << time << " ms";
        putText(frame, ss.str(), Point(20,20), 0, 0.5, Scalar(0,0,255));

        float confidenceThreshold = parser.get<float>("min_confidence");
        for (int i = 0; i < detectionMat.rows; i++)
        {
            const int probability_index = 5;
            const int probability_size = detectionMat.cols - probability_index;
            float *prob_array_ptr = &detectionMat.at<float>(i, probability_index);

            size_t objectClass = max_element(prob_array_ptr, prob_array_ptr + probability_size) - prob_array_ptr;
            float confidence = detectionMat.at<float>(i, (int)objectClass + probability_index);

            if (confidence > confidenceThreshold)
            {
                float x = detectionMat.at<float>(i, 0);
                float y = detectionMat.at<float>(i, 1);
                float width = detectionMat.at<float>(i, 2);
                float height = detectionMat.at<float>(i, 3);
                int xLeftBottom = static_cast<int>((x - width / 2) * frame.cols);
                int yLeftBottom = static_cast<int>((y - height / 2) * frame.rows);
                int xRightTop = static_cast<int>((x + width / 2) * frame.cols);
                int yRightTop = static_cast<int>((y + height / 2) * frame.rows);

                Rect object(xLeftBottom, yLeftBottom,
                            xRightTop - xLeftBottom,
                            yRightTop - yLeftBottom);

                rectangle(frame, object, Scalar(0, 255, 0));

                if (objectClass < classNamesVec.size())
                {
                    ss.str("");
                    ss << confidence;
                    String conf(ss.str());
                    String label = String(classNamesVec[objectClass]) + ": " + conf;
                    int baseLine = 0;
                    Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
                    rectangle(frame, Rect(Point(xLeftBottom, yLeftBottom ),
                                          Size(labelSize.width, labelSize.height + baseLine)),
                              Scalar(255, 255, 255), CV_FILLED);
                    putText(frame, label, Point(xLeftBottom, yLeftBottom+labelSize.height),
                            FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0));
                }
                else
                {
                    cout << "Class: " << objectClass << endl;
                    cout << "Confidence: " << confidence << endl;
                    cout << " " << xLeftBottom
                         << " " << yLeftBottom
                         << " " << xRightTop
                         << " " << yRightTop << endl;
                }
            }
        }

        imshow("YOLO: Detections", frame);
        if (waitKey(1) >= 0) break;
    }

    return 0;
} // main
Repair: incorrect display of class name 7 years ago			`// Brief Sample of using OpenCV dnn module in real time with device capture, video and image.`
			`// VIDEO DEMO: https://www.youtube.com/watch?v=NHtRlndE2cg`

Added DNN Darknet Yolo v2 for object detection 7 years ago			`#include <opencv2/dnn.hpp>`
			`#include <opencv2/dnn/shape_utils.hpp>`
			`#include <opencv2/imgproc.hpp>`
			`#include <opencv2/highgui.hpp>`
			`#include <fstream>`
			`#include <iostream>`
			`#include <algorithm>`
			`#include <cstdlib>`
Repair: incorrect display of class name 7 years ago
Added DNN Darknet Yolo v2 for object detection 7 years ago			`using namespace std;`
Repair: incorrect display of class name 7 years ago			`using namespace cv;`
			`using namespace cv::dnn;`
Added DNN Darknet Yolo v2 for object detection 7 years ago
			`const size_t network_width = 416;`
			`const size_t network_height = 416;`

Repair: incorrect display of class name 7 years ago			`static const char* about =`
			`"This sample uses You only look once (YOLO)-Detector (https://arxiv.org/abs/1612.08242) to detect objects on camera/video/image.\n"`
			`"Models can be downloaded here: https://pjreddie.com/darknet/yolo/\n"`
			`"Default network is 416x416.\n"`
			`"Class names can be downloaded here: https://github.com/pjreddie/darknet/tree/master/data\n";`

			`static const char* params =`
			`"{ help \| false \| print usage }"`
			`"{ cfg \| \| model configuration }"`
			`"{ model \| \| model weights }"`
			`"{ camera_device \| 0 \| camera device number}"`
			`"{ source \| \| video or image for detection}"`
			`"{ min_confidence \| 0.24 \| min confidence }"`
			`"{ class_names \| \| File with class names, [PATH-TO-DARKNET]/data/coco.names }";`
Added DNN Darknet Yolo v2 for object detection 7 years ago
			`int main(int argc, char** argv)`
			`{`
Add camera/video/image input for C++ DNN object detection samples. Add nice display and computation time. 7 years ago			`CommandLineParser parser(argc, argv, params);`
Added DNN Darknet Yolo v2 for object detection 7 years ago
			`if (parser.get<bool>("help"))`
			`{`
Add camera/video/image input for C++ DNN object detection samples. Add nice display and computation time. 7 years ago			`cout << about << endl;`
Added DNN Darknet Yolo v2 for object detection 7 years ago			`parser.printMessage();`
			`return 0;`
			`}`

Add camera/video/image input for C++ DNN object detection samples. Add nice display and computation time. 7 years ago			`String modelConfiguration = parser.get<String>("cfg");`
			`String modelBinary = parser.get<String>("model");`
Added DNN Darknet Yolo v2 for object detection 7 years ago
			`//! [Initialize network]`
			`dnn::Net net = readNetFromDarknet(modelConfiguration, modelBinary);`
			`//! [Initialize network]`

			`if (net.empty())`
			`{`
			`cerr << "Can't load network by using the following files: " << endl;`
			`cerr << "cfg-file: " << modelConfiguration << endl;`
			`cerr << "weights-file: " << modelBinary << endl;`
			`cerr << "Models can be downloaded here:" << endl;`
			`cerr << "https://pjreddie.com/darknet/yolo/" << endl;`
			`exit(-1);`
			`}`

Add camera/video/image input for C++ DNN object detection samples. Add nice display and computation time. 7 years ago			`VideoCapture cap;`
Repair: incorrect display of class name 7 years ago			`if (parser.get<String>("source").empty())`
Add camera/video/image input for C++ DNN object detection samples. Add nice display and computation time. 7 years ago			`{`
			`int cameraDevice = parser.get<int>("camera_device");`
			`cap = VideoCapture(cameraDevice);`
			`if(!cap.isOpened())`
			`{`
			`cout << "Couldn't find camera: " << cameraDevice << endl;`
			`return -1;`
			`}`
			`}`
			`else`
			`{`
Repair: incorrect display of class name 7 years ago			`cap.open(parser.get<String>("source"));`
Add camera/video/image input for C++ DNN object detection samples. Add nice display and computation time. 7 years ago			`if(!cap.isOpened())`
			`{`
			`cout << "Couldn't open image or video: " << parser.get<String>("video") << endl;`
			`return -1;`
			`}`
			`}`

			`vector<string> classNamesVec;`
			`ifstream classNamesFile(parser.get<String>("class_names").c_str());`
			`if (classNamesFile.is_open())`
			`{`
			`string className = "";`
Repair: incorrect display of class name 7 years ago			`while (std::getline(classNamesFile, className))`
Add camera/video/image input for C++ DNN object detection samples. Add nice display and computation time. 7 years ago			`classNamesVec.push_back(className);`
			`}`
Added DNN Darknet Yolo v2 for object detection 7 years ago
Add camera/video/image input for C++ DNN object detection samples. Add nice display and computation time. 7 years ago			`for(;;)`
			`{`
			`Mat frame;`
			`cap >> frame; // get a new frame from camera/video or read image`
Added DNN Darknet Yolo v2 for object detection 7 years ago
Add camera/video/image input for C++ DNN object detection samples. Add nice display and computation time. 7 years ago			`if (frame.empty())`
			`{`
			`waitKey();`
			`break;`
			`}`
Added DNN Darknet Yolo v2 for object detection 7 years ago
Add camera/video/image input for C++ DNN object detection samples. Add nice display and computation time. 7 years ago			`if (frame.channels() == 4)`
			`cvtColor(frame, frame, COLOR_BGRA2BGR);`
Added DNN Darknet Yolo v2 for object detection 7 years ago
Add camera/video/image input for C++ DNN object detection samples. Add nice display and computation time. 7 years ago			`//! [Resizing without keeping aspect ratio]`
			`Mat resized;`
			`resize(frame, resized, Size(network_width, network_height));`
			`//! [Resizing without keeping aspect ratio]`
Added DNN Darknet Yolo v2 for object detection 7 years ago
Add camera/video/image input for C++ DNN object detection samples. Add nice display and computation time. 7 years ago			`//! [Prepare blob]`
			`Mat inputBlob = blobFromImage(resized, 1 / 255.F); //Convert Mat to batch of images`
			`//! [Prepare blob]`
Added DNN Darknet Yolo v2 for object detection 7 years ago
Add camera/video/image input for C++ DNN object detection samples. Add nice display and computation time. 7 years ago			`//! [Set input blob]`
			`net.setInput(inputBlob, "data"); //set the network input`
			`//! [Set input blob]`

			`//! [Make forward pass]`
			`Mat detectionMat = net.forward("detection_out"); //compute output`
Repair: incorrect display of class name 7 years ago			`//! [Make forward pass]`
Added DNN Darknet Yolo v2 for object detection 7 years ago
Repair: incorrect display of class name 7 years ago			`vector<double> layersTimings;`
			`double freq = getTickFrequency() / 1000;`
			`double time = net.getPerfProfile(layersTimings) / freq;`
			`ostringstream ss;`
			`ss << "FPS: " << 1000/time << " ; time: " << time << " ms";`
			`putText(frame, ss.str(), Point(20,20), 0, 0.5, Scalar(0,0,255));`
Added DNN Darknet Yolo v2 for object detection 7 years ago
Add camera/video/image input for C++ DNN object detection samples. Add nice display and computation time. 7 years ago			`float confidenceThreshold = parser.get<float>("min_confidence");`
			`for (int i = 0; i < detectionMat.rows; i++)`
Added DNN Darknet Yolo v2 for object detection 7 years ago			`{`
Add camera/video/image input for C++ DNN object detection samples. Add nice display and computation time. 7 years ago			`const int probability_index = 5;`
			`const int probability_size = detectionMat.cols - probability_index;`
			`float *prob_array_ptr = &detectionMat.at<float>(i, probability_index);`

			`size_t objectClass = max_element(prob_array_ptr, prob_array_ptr + probability_size) - prob_array_ptr;`
			`float confidence = detectionMat.at<float>(i, (int)objectClass + probability_index);`

			`if (confidence > confidenceThreshold)`
			`{`
			`float x = detectionMat.at<float>(i, 0);`
			`float y = detectionMat.at<float>(i, 1);`
			`float width = detectionMat.at<float>(i, 2);`
			`float height = detectionMat.at<float>(i, 3);`
			`int xLeftBottom = static_cast<int>((x - width / 2) * frame.cols);`
			`int yLeftBottom = static_cast<int>((y - height / 2) * frame.rows);`
			`int xRightTop = static_cast<int>((x + width / 2) * frame.cols);`
			`int yRightTop = static_cast<int>((y + height / 2) * frame.rows);`

			`Rect object(xLeftBottom, yLeftBottom,`
			`xRightTop - xLeftBottom,`
			`yRightTop - yLeftBottom);`

			`rectangle(frame, object, Scalar(0, 255, 0));`

			`if (objectClass < classNamesVec.size())`
			`{`
			`ss.str("");`
			`ss << confidence;`
			`String conf(ss.str());`
			`String label = String(classNamesVec[objectClass]) + ": " + conf;`
			`int baseLine = 0;`
			`Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);`
Repair: incorrect display of class name 7 years ago			`rectangle(frame, Rect(Point(xLeftBottom, yLeftBottom ),`
Add camera/video/image input for C++ DNN object detection samples. Add nice display and computation time. 7 years ago			`Size(labelSize.width, labelSize.height + baseLine)),`
			`Scalar(255, 255, 255), CV_FILLED);`
Repair: incorrect display of class name 7 years ago			`putText(frame, label, Point(xLeftBottom, yLeftBottom+labelSize.height),`
Add camera/video/image input for C++ DNN object detection samples. Add nice display and computation time. 7 years ago			`FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0));`
			`}`
			`else`
			`{`
			`cout << "Class: " << objectClass << endl;`
			`cout << "Confidence: " << confidence << endl;`
			`cout << " " << xLeftBottom`
			`<< " " << yLeftBottom`
			`<< " " << xRightTop`
			`<< " " << yRightTop << endl;`
			`}`
			`}`
Added DNN Darknet Yolo v2 for object detection 7 years ago			`}`

Repair: incorrect display of class name 7 years ago			`imshow("YOLO: Detections", frame);`
Add camera/video/image input for C++ DNN object detection samples. Add nice display and computation time. 7 years ago			`if (waitKey(1) >= 0) break;`
			`}`
Added DNN Darknet Yolo v2 for object detection 7 years ago
			`return 0;`
Add camera/video/image input for C++ DNN object detection samples. Add nice display and computation time. 7 years ago			`} // main`