You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
185 lines
5.5 KiB
185 lines
5.5 KiB
#include "inference.h" |
|
|
|
Inference::Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda) |
|
{ |
|
modelPath = onnxModelPath; |
|
modelShape = modelInputShape; |
|
classesPath = classesTxtFile; |
|
cudaEnabled = runWithCuda; |
|
|
|
loadOnnxNetwork(); |
|
// loadClassesFromFile(); The classes are hard-coded for this example |
|
} |
|
|
|
std::vector<Detection> Inference::runInference(const cv::Mat &input) |
|
{ |
|
cv::Mat modelInput = input; |
|
if (letterBoxForSquare && modelShape.width == modelShape.height) |
|
modelInput = formatToSquare(modelInput); |
|
|
|
cv::Mat blob; |
|
cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false); |
|
net.setInput(blob); |
|
|
|
std::vector<cv::Mat> outputs; |
|
net.forward(outputs, net.getUnconnectedOutLayersNames()); |
|
|
|
int rows = outputs[0].size[1]; |
|
int dimensions = outputs[0].size[2]; |
|
|
|
bool yolov8 = false; |
|
// yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c]) |
|
// yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h]) |
|
if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8) |
|
{ |
|
yolov8 = true; |
|
rows = outputs[0].size[2]; |
|
dimensions = outputs[0].size[1]; |
|
|
|
outputs[0] = outputs[0].reshape(1, dimensions); |
|
cv::transpose(outputs[0], outputs[0]); |
|
} |
|
float *data = (float *)outputs[0].data; |
|
|
|
float x_factor = modelInput.cols / modelShape.width; |
|
float y_factor = modelInput.rows / modelShape.height; |
|
|
|
std::vector<int> class_ids; |
|
std::vector<float> confidences; |
|
std::vector<cv::Rect> boxes; |
|
|
|
for (int i = 0; i < rows; ++i) |
|
{ |
|
if (yolov8) |
|
{ |
|
float *classes_scores = data+4; |
|
|
|
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores); |
|
cv::Point class_id; |
|
double maxClassScore; |
|
|
|
minMaxLoc(scores, 0, &maxClassScore, 0, &class_id); |
|
|
|
if (maxClassScore > modelScoreThreshold) |
|
{ |
|
confidences.push_back(maxClassScore); |
|
class_ids.push_back(class_id.x); |
|
|
|
float x = data[0]; |
|
float y = data[1]; |
|
float w = data[2]; |
|
float h = data[3]; |
|
|
|
int left = int((x - 0.5 * w) * x_factor); |
|
int top = int((y - 0.5 * h) * y_factor); |
|
|
|
int width = int(w * x_factor); |
|
int height = int(h * y_factor); |
|
|
|
boxes.push_back(cv::Rect(left, top, width, height)); |
|
} |
|
} |
|
else // yolov5 |
|
{ |
|
float confidence = data[4]; |
|
|
|
if (confidence >= modelConfidenseThreshold) |
|
{ |
|
float *classes_scores = data+5; |
|
|
|
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores); |
|
cv::Point class_id; |
|
double max_class_score; |
|
|
|
minMaxLoc(scores, 0, &max_class_score, 0, &class_id); |
|
|
|
if (max_class_score > modelScoreThreshold) |
|
{ |
|
confidences.push_back(confidence); |
|
class_ids.push_back(class_id.x); |
|
|
|
float x = data[0]; |
|
float y = data[1]; |
|
float w = data[2]; |
|
float h = data[3]; |
|
|
|
int left = int((x - 0.5 * w) * x_factor); |
|
int top = int((y - 0.5 * h) * y_factor); |
|
|
|
int width = int(w * x_factor); |
|
int height = int(h * y_factor); |
|
|
|
boxes.push_back(cv::Rect(left, top, width, height)); |
|
} |
|
} |
|
} |
|
|
|
data += dimensions; |
|
} |
|
|
|
std::vector<int> nms_result; |
|
cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result); |
|
|
|
std::vector<Detection> detections{}; |
|
for (unsigned long i = 0; i < nms_result.size(); ++i) |
|
{ |
|
int idx = nms_result[i]; |
|
|
|
Detection result; |
|
result.class_id = class_ids[idx]; |
|
result.confidence = confidences[idx]; |
|
|
|
std::random_device rd; |
|
std::mt19937 gen(rd()); |
|
std::uniform_int_distribution<int> dis(100, 255); |
|
result.color = cv::Scalar(dis(gen), |
|
dis(gen), |
|
dis(gen)); |
|
|
|
result.className = classes[result.class_id]; |
|
result.box = boxes[idx]; |
|
|
|
detections.push_back(result); |
|
} |
|
|
|
return detections; |
|
} |
|
|
|
void Inference::loadClassesFromFile() |
|
{ |
|
std::ifstream inputFile(classesPath); |
|
if (inputFile.is_open()) |
|
{ |
|
std::string classLine; |
|
while (std::getline(inputFile, classLine)) |
|
classes.push_back(classLine); |
|
inputFile.close(); |
|
} |
|
} |
|
|
|
void Inference::loadOnnxNetwork() |
|
{ |
|
net = cv::dnn::readNetFromONNX(modelPath); |
|
if (cudaEnabled) |
|
{ |
|
std::cout << "\nRunning on CUDA" << std::endl; |
|
net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); |
|
net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA); |
|
} |
|
else |
|
{ |
|
std::cout << "\nRunning on CPU" << std::endl; |
|
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); |
|
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); |
|
} |
|
} |
|
|
|
cv::Mat Inference::formatToSquare(const cv::Mat &source) |
|
{ |
|
int col = source.cols; |
|
int row = source.rows; |
|
int _max = MAX(col, row); |
|
cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3); |
|
source.copyTo(result(cv::Rect(0, 0, col, row))); |
|
return result; |
|
}
|
|
|