diff --git a/examples/README.md b/examples/README.md index 1f185da206..2c3367d085 100644 --- a/examples/README.md +++ b/examples/README.md @@ -2,9 +2,9 @@ This is a list of real-world applications and walkthroughs. These can be folders ## Ultralytics YOLO example applications -| Title | Format | Contributor | -| ------------------------------------------------------------------ | --------------- | ----------------------------------------- | -| \[Dummy\] [Inventory managenet using YOLO](./inventory-management) | python/notebook | [AyushExel](https://github.com/AyushExel) | +| Title | Format | Contributor | +| --------------------------------------------------------------- | -------- | ------------------------------------------------- | +| [Yolov8/yolov5 ONNX Inference with C++](./Yolov8_CPP_Inference) | C++/ONNX | [Justas Bartnykas](https://github.com/JustasBart) | ## How can you contribute ? diff --git a/examples/Yolov8_CPP_Inference/CMakeLists.txt b/examples/Yolov8_CPP_Inference/CMakeLists.txt new file mode 100644 index 0000000000..bc2f33fffd --- /dev/null +++ b/examples/Yolov8_CPP_Inference/CMakeLists.txt @@ -0,0 +1,28 @@ +cmake_minimum_required(VERSION 3.5) + +project(Yolov8CPPInference VERSION 0.1) + +set(CMAKE_INCLUDE_CURRENT_DIR ON) + +# CUDA +set(CUDA_TOOLKIT_ROOT_DIR "/usr/local/cuda") +find_package(CUDA 11 REQUIRED) + +set(CMAKE_CUDA_STANDARD 11) +set(CMAKE_CUDA_STANDARD_REQUIRED ON) +# !CUDA + +# OpenCV +find_package(OpenCV REQUIRED) +include_directories(${OpenCV_INCLUDE_DIRS}) +# !OpenCV + +set(PROJECT_SOURCES + main.cpp + + inference.h + inference.cpp +) + +add_executable(Yolov8CPPInference ${PROJECT_SOURCES}) +target_link_libraries(Yolov8CPPInference ${OpenCV_LIBS}) diff --git a/examples/Yolov8_CPP_Inference/README.md b/examples/Yolov8_CPP_Inference/README.md new file mode 100644 index 0000000000..b1381467d3 --- /dev/null +++ b/examples/Yolov8_CPP_Inference/README.md @@ -0,0 +1,53 @@ +# yolov8/yolov5 Inference C++ + +Usage: + +``` +# git clone ultralytics +pip install . +cd examples/cpp_ + +Add a **yolov8\_.onnx** and/or **yolov5\_.onnx** model(s) to the ultralytics folder. +Edit the **main.cpp** to change the **projectBasePath** to match your user. + +Note that by default the CMake file will try and import the CUDA library to be used with the OpenCVs dnn (cuDNN) GPU Inference. +If your OpenCV build does not use CUDA/cuDNN you can remove that import call and run the example on CPU. + +mkdir build +cd build +cmake .. +make +./Yolov8CPPInference +``` + +To export yolov8 models: + +``` +yolo export \ +model=yolov8s.pt \ +imgsz=[480,640] \ +format=onnx \ +opset=12 +``` + +To export yolov5 models: + +``` +python3 export.py \ +--weights yolov5s.pt \ +--img 480 640 \ +--include onnx \ +--opset 12 +``` + +yolov8s.onnx: + +![image](https://user-images.githubusercontent.com/40023722/217356132-a4cecf2e-2729-4acb-b80a-6559022d7707.png) + +yolov5s.onnx: + +![image](https://user-images.githubusercontent.com/40023722/217357005-07464492-d1da-42e3-98a7-fc753f87d5e6.png) + +This repository is based on OpenCVs dnn API to run an ONNX exported model of either yolov5/yolov8 (In theory should work for yolov6 and yolov7 but not tested). Note that for this example the networks are exported as rectangular (640x480) resolutions, but it would work for any resolution that you export as although you might want to use the letterBox approach for square images depending on your use-case. + +The **main** branch version is based on using Qt as a GUI wrapper the main interest here is the **Inference** class file which shows how to transpose yolov8 models to work as yolov5 models. diff --git a/examples/Yolov8_CPP_Inference/inference.cpp b/examples/Yolov8_CPP_Inference/inference.cpp new file mode 100644 index 0000000000..b45830e752 --- /dev/null +++ b/examples/Yolov8_CPP_Inference/inference.cpp @@ -0,0 +1,185 @@ +#include "inference.h" + +Inference::Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda) +{ + modelPath = onnxModelPath; + modelShape = modelInputShape; + classesPath = classesTxtFile; + cudaEnabled = runWithCuda; + + loadOnnxNetwork(); + // loadClassesFromFile(); The classes are hard-coded for this example +} + +std::vector Inference::runInference(const cv::Mat &input) +{ + cv::Mat modelInput = input; + if (letterBoxForSquare && modelShape.width == modelShape.height) + modelInput = formatToSquare(modelInput); + + cv::Mat blob; + cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false); + net.setInput(blob); + + std::vector outputs; + net.forward(outputs, net.getUnconnectedOutLayersNames()); + + int rows = outputs[0].size[1]; + int dimensions = outputs[0].size[2]; + + bool yolov8 = false; + // yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c]) + // yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h]) + if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8) + { + yolov8 = true; + rows = outputs[0].size[2]; + dimensions = outputs[0].size[1]; + + outputs[0] = outputs[0].reshape(1, dimensions); + cv::transpose(outputs[0], outputs[0]); + } + float *data = (float *)outputs[0].data; + + float x_factor = modelInput.cols / modelShape.width; + float y_factor = modelInput.rows / modelShape.height; + + std::vector class_ids; + std::vector confidences; + std::vector boxes; + + for (int i = 0; i < rows; ++i) + { + if (yolov8) + { + float *classes_scores = data+4; + + cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores); + cv::Point class_id; + double maxClassScore; + + minMaxLoc(scores, 0, &maxClassScore, 0, &class_id); + + if (maxClassScore > modelScoreThreshold) + { + confidences.push_back(maxClassScore); + class_ids.push_back(class_id.x); + + float x = data[0]; + float y = data[1]; + float w = data[2]; + float h = data[3]; + + int left = int((x - 0.5 * w) * x_factor); + int top = int((y - 0.5 * h) * y_factor); + + int width = int(w * x_factor); + int height = int(h * y_factor); + + boxes.push_back(cv::Rect(left, top, width, height)); + } + } + else // yolov5 + { + float confidence = data[4]; + + if (confidence >= modelConfidenseThreshold) + { + float *classes_scores = data+5; + + cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores); + cv::Point class_id; + double max_class_score; + + minMaxLoc(scores, 0, &max_class_score, 0, &class_id); + + if (max_class_score > modelScoreThreshold) + { + confidences.push_back(confidence); + class_ids.push_back(class_id.x); + + float x = data[0]; + float y = data[1]; + float w = data[2]; + float h = data[3]; + + int left = int((x - 0.5 * w) * x_factor); + int top = int((y - 0.5 * h) * y_factor); + + int width = int(w * x_factor); + int height = int(h * y_factor); + + boxes.push_back(cv::Rect(left, top, width, height)); + } + } + } + + data += dimensions; + } + + std::vector nms_result; + cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result); + + std::vector detections{}; + for (unsigned long i = 0; i < nms_result.size(); ++i) + { + int idx = nms_result[i]; + + Detection result; + result.class_id = class_ids[idx]; + result.confidence = confidences[idx]; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(100, 255); + result.color = cv::Scalar(dis(gen), + dis(gen), + dis(gen)); + + result.className = classes[result.class_id]; + result.box = boxes[idx]; + + detections.push_back(result); + } + + return detections; +} + +void Inference::loadClassesFromFile() +{ + std::ifstream inputFile(classesPath); + if (inputFile.is_open()) + { + std::string classLine; + while (std::getline(inputFile, classLine)) + classes.push_back(classLine); + inputFile.close(); + } +} + +void Inference::loadOnnxNetwork() +{ + net = cv::dnn::readNetFromONNX(modelPath); + if (cudaEnabled) + { + std::cout << "\nRunning on CUDA" << std::endl; + net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); + net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA); + } + else + { + std::cout << "\nRunning on CPU" << std::endl; + net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); + net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); + } +} + +cv::Mat Inference::formatToSquare(const cv::Mat &source) +{ + int col = source.cols; + int row = source.rows; + int _max = MAX(col, row); + cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3); + source.copyTo(result(cv::Rect(0, 0, col, row))); + return result; +} diff --git a/examples/Yolov8_CPP_Inference/inference.h b/examples/Yolov8_CPP_Inference/inference.h new file mode 100644 index 0000000000..5763e10417 --- /dev/null +++ b/examples/Yolov8_CPP_Inference/inference.h @@ -0,0 +1,52 @@ +#ifndef INFERENCE_H +#define INFERENCE_H + +// Cpp native +#include +#include +#include +#include + +// OpenCV / DNN / Inference +#include +#include +#include + +struct Detection +{ + int class_id{0}; + std::string className{}; + float confidence{0.0}; + cv::Scalar color{}; + cv::Rect box{}; +}; + +class Inference +{ +public: + Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape = {640, 640}, const std::string &classesTxtFile = "", const bool &runWithCuda = true); + std::vector runInference(const cv::Mat &input); + +private: + void loadClassesFromFile(); + void loadOnnxNetwork(); + cv::Mat formatToSquare(const cv::Mat &source); + + std::string modelPath{}; + std::string classesPath{}; + bool cudaEnabled{}; + + std::vector classes{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"}; + + cv::Size2f modelShape{}; + + float modelConfidenseThreshold {0.25}; + float modelScoreThreshold {0.45}; + float modelNMSThreshold {0.50}; + + bool letterBoxForSquare = true; + + cv::dnn::Net net; +}; + +#endif // INFERENCE_H diff --git a/examples/Yolov8_CPP_Inference/main.cpp b/examples/Yolov8_CPP_Inference/main.cpp new file mode 100644 index 0000000000..6d1ba988f5 --- /dev/null +++ b/examples/Yolov8_CPP_Inference/main.cpp @@ -0,0 +1,70 @@ +#include +#include +#include + +#include + +#include "inference.h" + +using namespace std; +using namespace cv; + +int main(int argc, char **argv) +{ + std::string projectBasePath = "/home/user/ultralytics"; // Set your ultralytics base path + + bool runOnGPU = true; + + // + // Pass in either: + // + // "yolov8s.onnx" or "yolov5s.onnx" + // + // To run Inference with yolov8/yolov5 (ONNX) + // + + // Note that in this example the classes are hard-coded and 'classes.txt' is a place holder. + Inference inf(projectBasePath + "/yolov8s.onnx", cv::Size(640, 480), "classes.txt", runOnGPU); + + std::vector imageNames; + imageNames.push_back(projectBasePath + "/ultralytics/assets/bus.jpg"); + imageNames.push_back(projectBasePath + "/ultralytics/assets/zidane.jpg"); + + for (int i = 0; i < imageNames.size(); ++i) + { + cv::Mat frame = cv::imread(imageNames[i]); + + // Inference starts here... + std::vector output = inf.runInference(frame); + + int detections = output.size(); + std::cout << "Number of detections:" << detections << std::endl; + + for (int i = 0; i < detections; ++i) + { + Detection detection = output[i]; + + cv::Rect box = detection.box; + cv::Scalar color = detection.color; + + // Detection box + cv::rectangle(frame, box, color, 2); + + // Detection box text + std::string classString = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4); + cv::Size textSize = cv::getTextSize(classString, cv::FONT_HERSHEY_DUPLEX, 1, 2, 0); + cv::Rect textBox(box.x, box.y - 40, textSize.width + 10, textSize.height + 20); + + cv::rectangle(frame, textBox, color, cv::FILLED); + cv::putText(frame, classString, cv::Point(box.x + 5, box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 2, 0); + } + // Inference ends here... + + // This is only for preview purposes + float scale = 0.8; + cv::resize(frame, frame, cv::Size(frame.cols*scale, frame.rows*scale)); + cv::imshow("Inference", frame); + + cv::waitKey(-1); + } +} diff --git a/examples/inventory-management/README.md b/examples/inventory-management/README.md deleted file mode 100644 index 8f562573df..0000000000 --- a/examples/inventory-management/README.md +++ /dev/null @@ -1 +0,0 @@ -This is a dummy project