yolov8_test/examples/YOLOv8-CPP-Inference/inference.cpp

#include "inference.h"

Inference::Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda)
{
    modelPath = onnxModelPath;
    modelShape = modelInputShape;
    classesPath = classesTxtFile;
    cudaEnabled = runWithCuda;

    loadOnnxNetwork();
    // loadClassesFromFile(); The classes are hard-coded for this example
}

std::vector<Detection> Inference::runInference(const cv::Mat &input)
{
    cv::Mat modelInput = input;
    if (letterBoxForSquare && modelShape.width == modelShape.height)
        modelInput = formatToSquare(modelInput);

    cv::Mat blob;
    cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false);
    net.setInput(blob);

    std::vector<cv::Mat> outputs;
    net.forward(outputs, net.getUnconnectedOutLayersNames());

    int rows = outputs[0].size[1];
    int dimensions = outputs[0].size[2];

    bool yolov8 = false;
    // yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
    // yolov8 has an output of shape (batchSize, 84,  8400) (Num classes + box[x,y,w,h])
    if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8)
    {
        yolov8 = true;
        rows = outputs[0].size[2];
        dimensions = outputs[0].size[1];

        outputs[0] = outputs[0].reshape(1, dimensions);
        cv::transpose(outputs[0], outputs[0]);
    }
    float *data = (float *)outputs[0].data;

    float x_factor = modelInput.cols / modelShape.width;
    float y_factor = modelInput.rows / modelShape.height;

    std::vector<int> class_ids;
    std::vector<float> confidences;
    std::vector<cv::Rect> boxes;

    for (int i = 0; i < rows; ++i)
    {
        if (yolov8)
        {
            float *classes_scores = data+4;

            cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
            cv::Point class_id;
            double maxClassScore;

            minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);

            if (maxClassScore > modelScoreThreshold)
            {
                confidences.push_back(maxClassScore);
                class_ids.push_back(class_id.x);

                float x = data[0];
                float y = data[1];
                float w = data[2];
                float h = data[3];

                int left = int((x - 0.5 * w) * x_factor);
                int top = int((y - 0.5 * h) * y_factor);

                int width = int(w * x_factor);
                int height = int(h * y_factor);

                boxes.push_back(cv::Rect(left, top, width, height));
            }
        }
        else // yolov5
        {
            float confidence = data[4];

            if (confidence >= modelConfidenseThreshold)
            {
                float *classes_scores = data+5;

                cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
                cv::Point class_id;
                double max_class_score;

                minMaxLoc(scores, 0, &max_class_score, 0, &class_id);

                if (max_class_score > modelScoreThreshold)
                {
                    confidences.push_back(confidence);
                    class_ids.push_back(class_id.x);

                    float x = data[0];
                    float y = data[1];
                    float w = data[2];
                    float h = data[3];

                    int left = int((x - 0.5 * w) * x_factor);
                    int top = int((y - 0.5 * h) * y_factor);

                    int width = int(w * x_factor);
                    int height = int(h * y_factor);

                    boxes.push_back(cv::Rect(left, top, width, height));
                }
            }
        }

        data += dimensions;
    }

    std::vector<int> nms_result;
    cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);

    std::vector<Detection> detections{};
    for (unsigned long i = 0; i < nms_result.size(); ++i)
    {
        int idx = nms_result[i];

        Detection result;
        result.class_id = class_ids[idx];
        result.confidence = confidences[idx];

        std::random_device rd;
        std::mt19937 gen(rd());
        std::uniform_int_distribution<int> dis(100, 255);
        result.color = cv::Scalar(dis(gen),
                                  dis(gen),
                                  dis(gen));

        result.className = classes[result.class_id];
        result.box = boxes[idx];

        detections.push_back(result);
    }

    return detections;
}

void Inference::loadClassesFromFile()
{
    std::ifstream inputFile(classesPath);
    if (inputFile.is_open())
    {
        std::string classLine;
        while (std::getline(inputFile, classLine))
            classes.push_back(classLine);
        inputFile.close();
    }
}

void Inference::loadOnnxNetwork()
{
    net = cv::dnn::readNetFromONNX(modelPath);
    if (cudaEnabled)
    {
        std::cout << "\nRunning on CUDA" << std::endl;
        net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
        net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
    }
    else
    {
        std::cout << "\nRunning on CPU" << std::endl;
        net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
        net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
    }
}

cv::Mat Inference::formatToSquare(const cv::Mat &source)
{
    int col = source.cols;
    int row = source.rows;
    int _max = MAX(col, row);
    cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
    source.copyTo(result(cv::Rect(0, 0, col, row)));
    return result;
}
Add YOLOv8 ONNX C++ inference Example (#856) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: Ayush Chaurasia <ayush.chaurarsia@gmail.com> 2 years ago			`#include "inference.h"`

			`Inference::Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda)`
			`{`
			`modelPath = onnxModelPath;`
			`modelShape = modelInputShape;`
			`classesPath = classesTxtFile;`
			`cudaEnabled = runWithCuda;`

			`loadOnnxNetwork();`
			`// loadClassesFromFile(); The classes are hard-coded for this example`
			`}`

			`std::vector<Detection> Inference::runInference(const cv::Mat &input)`
			`{`
			`cv::Mat modelInput = input;`
			`if (letterBoxForSquare && modelShape.width == modelShape.height)`
			`modelInput = formatToSquare(modelInput);`

			`cv::Mat blob;`
			`cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false);`
			`net.setInput(blob);`

			`std::vector<cv::Mat> outputs;`
			`net.forward(outputs, net.getUnconnectedOutLayersNames());`

			`int rows = outputs[0].size[1];`
			`int dimensions = outputs[0].size[2];`

			`bool yolov8 = false;`
			`// yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])`
			`// yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h])`
			`if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8)`
			`{`
			`yolov8 = true;`
			`rows = outputs[0].size[2];`
			`dimensions = outputs[0].size[1];`

			`outputs[0] = outputs[0].reshape(1, dimensions);`
			`cv::transpose(outputs[0], outputs[0]);`
			`}`
			`float data = (float )outputs[0].data;`

			`float x_factor = modelInput.cols / modelShape.width;`
			`float y_factor = modelInput.rows / modelShape.height;`

			`std::vector<int> class_ids;`
			`std::vector<float> confidences;`
			`std::vector<cv::Rect> boxes;`

			`for (int i = 0; i < rows; ++i)`
			`{`
			`if (yolov8)`
			`{`
			`float *classes_scores = data+4;`

			`cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);`
			`cv::Point class_id;`
			`double maxClassScore;`

			`minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);`

			`if (maxClassScore > modelScoreThreshold)`
			`{`
			`confidences.push_back(maxClassScore);`
			`class_ids.push_back(class_id.x);`

			`float x = data[0];`
			`float y = data[1];`
			`float w = data[2];`
			`float h = data[3];`

			`int left = int((x - 0.5 * w) * x_factor);`
			`int top = int((y - 0.5 * h) * y_factor);`

			`int width = int(w * x_factor);`
			`int height = int(h * y_factor);`

			`boxes.push_back(cv::Rect(left, top, width, height));`
			`}`
			`}`
			`else // yolov5`
			`{`
			`float confidence = data[4];`

			`if (confidence >= modelConfidenseThreshold)`
			`{`
			`float *classes_scores = data+5;`

			`cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);`
			`cv::Point class_id;`
			`double max_class_score;`

			`minMaxLoc(scores, 0, &max_class_score, 0, &class_id);`

			`if (max_class_score > modelScoreThreshold)`
			`{`
			`confidences.push_back(confidence);`
			`class_ids.push_back(class_id.x);`

			`float x = data[0];`
			`float y = data[1];`
			`float w = data[2];`
			`float h = data[3];`

			`int left = int((x - 0.5 * w) * x_factor);`
			`int top = int((y - 0.5 * h) * y_factor);`

			`int width = int(w * x_factor);`
			`int height = int(h * y_factor);`

			`boxes.push_back(cv::Rect(left, top, width, height));`
			`}`
			`}`
			`}`

			`data += dimensions;`
			`}`

			`std::vector<int> nms_result;`
			`cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);`

			`std::vector<Detection> detections{};`
			`for (unsigned long i = 0; i < nms_result.size(); ++i)`
			`{`
			`int idx = nms_result[i];`

			`Detection result;`
			`result.class_id = class_ids[idx];`
			`result.confidence = confidences[idx];`

			`std::random_device rd;`
			`std::mt19937 gen(rd());`
			`std::uniform_int_distribution<int> dis(100, 255);`
			`result.color = cv::Scalar(dis(gen),`
			`dis(gen),`
			`dis(gen));`

			`result.className = classes[result.class_id];`
			`result.box = boxes[idx];`

			`detections.push_back(result);`
			`}`

			`return detections;`
			`}`

			`void Inference::loadClassesFromFile()`
			`{`
			`std::ifstream inputFile(classesPath);`
			`if (inputFile.is_open())`
			`{`
			`std::string classLine;`
			`while (std::getline(inputFile, classLine))`
			`classes.push_back(classLine);`
			`inputFile.close();`
			`}`
			`}`

			`void Inference::loadOnnxNetwork()`
			`{`
			`net = cv::dnn::readNetFromONNX(modelPath);`
			`if (cudaEnabled)`
			`{`
			`std::cout << "\nRunning on CUDA" << std::endl;`
			`net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);`
			`net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);`
			`}`
			`else`
			`{`
			`std::cout << "\nRunning on CPU" << std::endl;`
			`net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);`
			`net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);`
			`}`
			`}`

			`cv::Mat Inference::formatToSquare(const cv::Mat &source)`
			`{`
			`int col = source.cols;`
			`int row = source.rows;`
			`int _max = MAX(col, row);`
			`cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);`
			`source.copyTo(result(cv::Rect(0, 0, col, row)));`
			`return result;`
			`}`