commit
b396ee85d2
59 changed files with 1855 additions and 315 deletions
@ -0,0 +1,54 @@ |
||||
# YOLOv8 OnnxRuntime C++ |
||||
|
||||
This example demonstrates how to perform inference using YOLOv8 in C++ with ONNX Runtime and OpenCV's API. |
||||
|
||||
We recommend using Visual Studio to build the project. |
||||
|
||||
## Benefits |
||||
|
||||
- Friendly for deployment in the industrial sector. |
||||
- Faster than OpenCV's DNN inference on both CPU and GPU. |
||||
- Supports CUDA acceleration. |
||||
- Easy to add FP16 inference (using template functions). |
||||
|
||||
## Exporting YOLOv8 Models |
||||
|
||||
To export YOLOv8 models, use the following Python script: |
||||
|
||||
```python |
||||
from ultralytics import YOLO |
||||
|
||||
# Load a YOLOv8 model |
||||
model = YOLO("yolov8n.pt") |
||||
|
||||
# Export the model |
||||
model.export(format="onnx", opset=12, simplify=True, dynamic=False, imgsz=640) |
||||
``` |
||||
|
||||
## Dependencies |
||||
|
||||
| Dependency | Version | |
||||
| ----------------------- | -------- | |
||||
| Onnxruntime-win-x64-gpu | >=1.14.1 | |
||||
| OpenCV | >=4.0.0 | |
||||
| C++ | >=17 | |
||||
|
||||
Note: The dependency on C++17 is due to the usage of the C++17 filesystem feature. |
||||
|
||||
## Usage |
||||
|
||||
```c++ |
||||
// CPU inference |
||||
DCSP_INIT_PARAM params{ model_path, YOLO_ORIGIN_V8, {imgsz_w, imgsz_h}, class_num, 0.1, 0.5, false}; |
||||
// GPU inference |
||||
DCSP_INIT_PARAM params{ model_path, YOLO_ORIGIN_V8, {imgsz_w, imgsz_h}, class_num, 0.1, 0.5, true}; |
||||
|
||||
// Load your image |
||||
cv::Mat img = cv::imread(img_path); |
||||
|
||||
char* ret = p1->CreateSession(params); |
||||
|
||||
ret = p->RunSession(img, res); |
||||
``` |
||||
|
||||
This repository should also work for YOLOv5, which needs a permute operator for the output of the YOLOv5 model, but this has not been implemented yet. |
@ -0,0 +1,271 @@ |
||||
#include "inference.h" |
||||
#include <regex> |
||||
|
||||
#define benchmark |
||||
#define ELOG |
||||
|
||||
DCSP_CORE::DCSP_CORE() |
||||
{ |
||||
|
||||
} |
||||
|
||||
|
||||
DCSP_CORE::~DCSP_CORE() |
||||
{ |
||||
delete session; |
||||
} |
||||
|
||||
|
||||
template<typename T> |
||||
char* BlobFromImage(cv::Mat& iImg, T& iBlob) |
||||
{ |
||||
int channels = iImg.channels(); |
||||
int imgHeight = iImg.rows; |
||||
int imgWidth = iImg.cols; |
||||
|
||||
for (int c = 0; c < channels; c++) |
||||
{ |
||||
for (int h = 0; h < imgHeight; h++) |
||||
{ |
||||
for (int w = 0; w < imgWidth; w++) |
||||
{ |
||||
iBlob[c * imgWidth * imgHeight + h * imgWidth + w] = (std::remove_pointer<T>::type)((iImg.at<cv::Vec3b>(h, w)[c]) / 255.0f); |
||||
} |
||||
} |
||||
} |
||||
return RET_OK; |
||||
} |
||||
|
||||
|
||||
char* PostProcess(cv::Mat& iImg, std::vector<int> iImgSize, cv::Mat& oImg) |
||||
{ |
||||
cv::Mat img = iImg.clone(); |
||||
cv::resize(iImg, oImg, cv::Size(iImgSize.at(0), iImgSize.at(1))); |
||||
if (img.channels() == 1) |
||||
{ |
||||
cv::cvtColor(oImg, oImg, cv::COLOR_GRAY2BGR); |
||||
} |
||||
cv::cvtColor(oImg, oImg, cv::COLOR_BGR2RGB); |
||||
return RET_OK; |
||||
} |
||||
|
||||
|
||||
char* DCSP_CORE::CreateSession(DCSP_INIT_PARAM &iParams) |
||||
{ |
||||
char* Ret = RET_OK; |
||||
std::regex pattern("[\u4e00-\u9fa5]"); |
||||
bool result = std::regex_search(iParams.ModelPath, pattern); |
||||
if (result) |
||||
{ |
||||
Ret = "[DCSP_ONNX]:model path error.change your model path without chinese characters."; |
||||
std::cout << Ret << std::endl; |
||||
return Ret; |
||||
} |
||||
try |
||||
{ |
||||
rectConfidenceThreshold = iParams.RectConfidenceThreshold; |
||||
iouThreshold = iParams.iouThreshold; |
||||
imgSize = iParams.imgSize; |
||||
modelType = iParams.ModelType; |
||||
env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "Yolo"); |
||||
Ort::SessionOptions sessionOption; |
||||
if (iParams.CudaEnable) |
||||
{ |
||||
cudaEnable = iParams.CudaEnable; |
||||
OrtCUDAProviderOptions cudaOption; |
||||
cudaOption.device_id = 0; |
||||
sessionOption.AppendExecutionProvider_CUDA(cudaOption); |
||||
//OrtOpenVINOProviderOptions ovOption;
|
||||
//sessionOption.AppendExecutionProvider_OpenVINO(ovOption);
|
||||
} |
||||
sessionOption.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); |
||||
sessionOption.SetIntraOpNumThreads(iParams.IntraOpNumThreads); |
||||
sessionOption.SetLogSeverityLevel(iParams.LogSeverityLevel); |
||||
int ModelPathSize = MultiByteToWideChar(CP_UTF8, 0, iParams.ModelPath.c_str(), static_cast<int>(iParams.ModelPath.length()), nullptr, 0); |
||||
wchar_t* wide_cstr = new wchar_t[ModelPathSize + 1]; |
||||
MultiByteToWideChar(CP_UTF8, 0, iParams.ModelPath.c_str(), static_cast<int>(iParams.ModelPath.length()), wide_cstr, ModelPathSize); |
||||
wide_cstr[ModelPathSize] = L'\0'; |
||||
const wchar_t* modelPath = wide_cstr; |
||||
session = new Ort::Session(env, modelPath, sessionOption); |
||||
Ort::AllocatorWithDefaultOptions allocator; |
||||
size_t inputNodesNum = session->GetInputCount(); |
||||
for (size_t i = 0; i < inputNodesNum; i++) |
||||
{ |
||||
Ort::AllocatedStringPtr input_node_name = session->GetInputNameAllocated(i, allocator); |
||||
char* temp_buf = new char[50]; |
||||
strcpy(temp_buf, input_node_name.get()); |
||||
inputNodeNames.push_back(temp_buf); |
||||
} |
||||
|
||||
size_t OutputNodesNum = session->GetOutputCount(); |
||||
for (size_t i = 0; i < OutputNodesNum; i++) |
||||
{ |
||||
Ort::AllocatedStringPtr output_node_name = session->GetOutputNameAllocated(i, allocator); |
||||
char* temp_buf = new char[10]; |
||||
strcpy(temp_buf, output_node_name.get()); |
||||
outputNodeNames.push_back(temp_buf); |
||||
} |
||||
options = Ort::RunOptions{ nullptr }; |
||||
WarmUpSession(); |
||||
//std::cout << OrtGetApiBase()->GetVersionString() << std::endl;;
|
||||
Ret = RET_OK; |
||||
return Ret; |
||||
} |
||||
catch (const std::exception& e) |
||||
{ |
||||
const char* str1 = "[DCSP_ONNX]:"; |
||||
const char* str2 = e.what(); |
||||
std::string result = std::string(str1) + std::string(str2); |
||||
char* merged = new char[result.length() + 1]; |
||||
std::strcpy(merged, result.c_str()); |
||||
std::cout << merged << std::endl; |
||||
delete[] merged; |
||||
//return merged;
|
||||
return "[DCSP_ONNX]:Create session failed."; |
||||
} |
||||
|
||||
} |
||||
|
||||
|
||||
char* DCSP_CORE::RunSession(cv::Mat &iImg, std::vector<DCSP_RESULT>& oResult) |
||||
{ |
||||
#ifdef benchmark |
||||
clock_t starttime_1 = clock(); |
||||
#endif // benchmark
|
||||
|
||||
char* Ret = RET_OK; |
||||
cv::Mat processedImg; |
||||
PostProcess(iImg, imgSize, processedImg); |
||||
if (modelType < 4) |
||||
{ |
||||
float* blob = new float[processedImg.total() * 3]; |
||||
BlobFromImage(processedImg, blob); |
||||
std::vector<int64_t> inputNodeDims = { 1,3,imgSize.at(0),imgSize.at(1) }; |
||||
TensorProcess(starttime_1, iImg, blob, inputNodeDims, oResult); |
||||
} |
||||
|
||||
return Ret; |
||||
} |
||||
|
||||
|
||||
template<typename N> |
||||
char* DCSP_CORE::TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std::vector<int64_t>& inputNodeDims, std::vector<DCSP_RESULT>& oResult) |
||||
{ |
||||
Ort::Value inputTensor = Ort::Value::CreateTensor<std::remove_pointer<N>::type>(Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), inputNodeDims.data(), inputNodeDims.size()); |
||||
#ifdef benchmark |
||||
clock_t starttime_2 = clock(); |
||||
#endif // benchmark
|
||||
auto outputTensor = session->Run(options, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(), outputNodeNames.size()); |
||||
#ifdef benchmark |
||||
clock_t starttime_3 = clock(); |
||||
#endif // benchmark
|
||||
Ort::TypeInfo typeInfo = outputTensor.front().GetTypeInfo(); |
||||
auto tensor_info = typeInfo.GetTensorTypeAndShapeInfo(); |
||||
std::vector<int64_t>outputNodeDims = tensor_info.GetShape(); |
||||
std::remove_pointer<N>::type* output = outputTensor.front().GetTensorMutableData<std::remove_pointer<N>::type>(); |
||||
delete blob; |
||||
switch (modelType) |
||||
{ |
||||
case 1: |
||||
{ |
||||
int strideNum = outputNodeDims[2]; |
||||
int signalResultNum = outputNodeDims[1]; |
||||
std::vector<int> class_ids; |
||||
std::vector<float> confidences; |
||||
std::vector<cv::Rect> boxes; |
||||
cv::Mat rowData(signalResultNum, strideNum, CV_32F, output); |
||||
rowData = rowData.t(); |
||||
|
||||
float* data = (float*)rowData.data; |
||||
|
||||
float x_factor = iImg.cols / 640.; |
||||
float y_factor = iImg.rows / 640.; |
||||
for (int i = 0; i < strideNum; ++i) |
||||
{ |
||||
float* classesScores = data + 4; |
||||
cv::Mat scores(1, classesNum, CV_32FC1, classesScores); |
||||
cv::Point class_id; |
||||
double maxClassScore; |
||||
cv::minMaxLoc(scores, 0, &maxClassScore, 0, &class_id); |
||||
if (maxClassScore > rectConfidenceThreshold) |
||||
{ |
||||
confidences.push_back(maxClassScore); |
||||
class_ids.push_back(class_id.x); |
||||
|
||||
float x = data[0]; |
||||
float y = data[1]; |
||||
float w = data[2]; |
||||
float h = data[3]; |
||||
|
||||
int left = int((x - 0.5 * w) * x_factor); |
||||
int top = int((y - 0.5 * h) * y_factor); |
||||
|
||||
int width = int(w * x_factor); |
||||
int height = int(h * y_factor); |
||||
|
||||
boxes.push_back(cv::Rect(left, top, width, height)); |
||||
} |
||||
data += signalResultNum; |
||||
} |
||||
|
||||
std::vector<int> nmsResult; |
||||
cv::dnn::NMSBoxes(boxes, confidences, rectConfidenceThreshold, iouThreshold, nmsResult); |
||||
for (int i = 0; i < nmsResult.size(); ++i) |
||||
{ |
||||
int idx = nmsResult[i]; |
||||
DCSP_RESULT result; |
||||
result.classId = class_ids[idx]; |
||||
result.confidence = confidences[idx]; |
||||
result.box = boxes[idx]; |
||||
oResult.push_back(result); |
||||
} |
||||
|
||||
|
||||
#ifdef benchmark |
||||
clock_t starttime_4 = clock(); |
||||
double pre_process_time = (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000; |
||||
double process_time = (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000; |
||||
double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; |
||||
if (cudaEnable) |
||||
{ |
||||
std::cout << "[DCSP_ONNX(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; |
||||
} |
||||
else |
||||
{ |
||||
std::cout << "[DCSP_ONNX(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; |
||||
} |
||||
#endif // benchmark
|
||||
|
||||
break; |
||||
} |
||||
} |
||||
char* Ret = RET_OK; |
||||
return Ret; |
||||
} |
||||
|
||||
|
||||
char* DCSP_CORE::WarmUpSession() |
||||
{ |
||||
clock_t starttime_1 = clock(); |
||||
char* Ret = RET_OK; |
||||
cv::Mat iImg = cv::Mat(cv::Size(imgSize.at(0), imgSize.at(1)), CV_8UC3); |
||||
cv::Mat processedImg; |
||||
PostProcess(iImg, imgSize, processedImg); |
||||
if (modelType < 4) |
||||
{ |
||||
float* blob = new float[iImg.total() * 3]; |
||||
BlobFromImage(processedImg, blob); |
||||
std::vector<int64_t> YOLO_input_node_dims = { 1,3,imgSize.at(0),imgSize.at(1) }; |
||||
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), YOLO_input_node_dims.data(), YOLO_input_node_dims.size()); |
||||
auto output_tensors = session->Run(options, inputNodeNames.data(), &input_tensor, 1, outputNodeNames.data(), outputNodeNames.size()); |
||||
delete[] blob; |
||||
clock_t starttime_4 = clock(); |
||||
double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; |
||||
if (cudaEnable) |
||||
{ |
||||
std::cout << "[DCSP_ONNX(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl; |
||||
} |
||||
} |
||||
|
||||
return Ret; |
||||
} |
@ -0,0 +1,83 @@ |
||||
#pragma once |
||||
|
||||
#define _CRT_SECURE_NO_WARNINGS |
||||
#define RET_OK nullptr |
||||
|
||||
#include <string> |
||||
#include <vector> |
||||
#include <stdio.h> |
||||
#include "io.h" |
||||
#include "direct.h" |
||||
#include "opencv.hpp" |
||||
#include <Windows.h> |
||||
#include "onnxruntime_cxx_api.h" |
||||
|
||||
|
||||
enum MODEL_TYPE |
||||
{ |
||||
//FLOAT32 MODEL
|
||||
YOLO_ORIGIN_V5 = 0, |
||||
YOLO_ORIGIN_V8 = 1,//only support v8 detector currently
|
||||
YOLO_POSE_V8 = 2, |
||||
YOLO_CLS_V8 = 3 |
||||
}; |
||||
|
||||
|
||||
typedef struct _DCSP_INIT_PARAM |
||||
{ |
||||
std::string ModelPath; |
||||
MODEL_TYPE ModelType = YOLO_ORIGIN_V8; |
||||
std::vector<int> imgSize={640, 640}; |
||||
|
||||
int classesNum=80; |
||||
float RectConfidenceThreshold = 0.6; |
||||
float iouThreshold = 0.5; |
||||
bool CudaEnable = false; |
||||
int LogSeverityLevel = 3; |
||||
int IntraOpNumThreads = 1; |
||||
}DCSP_INIT_PARAM; |
||||
|
||||
|
||||
typedef struct _DCSP_RESULT |
||||
{ |
||||
int classId; |
||||
float confidence; |
||||
cv::Rect box; |
||||
}DCSP_RESULT; |
||||
|
||||
|
||||
class DCSP_CORE |
||||
{ |
||||
public: |
||||
DCSP_CORE(); |
||||
~DCSP_CORE(); |
||||
|
||||
public: |
||||
char* CreateSession(DCSP_INIT_PARAM &iParams); |
||||
|
||||
|
||||
char* RunSession(cv::Mat &iImg, std::vector<DCSP_RESULT>& oResult); |
||||
|
||||
|
||||
char* WarmUpSession(); |
||||
|
||||
|
||||
template<typename N> |
||||
char* TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std::vector<int64_t>& inputNodeDims, std::vector<DCSP_RESULT>& oResult); |
||||
|
||||
|
||||
private: |
||||
Ort::Env env; |
||||
Ort::Session* session; |
||||
bool cudaEnable; |
||||
Ort::RunOptions options; |
||||
std::vector<const char*> inputNodeNames; |
||||
std::vector<const char*> outputNodeNames; |
||||
|
||||
|
||||
int classesNum; |
||||
MODEL_TYPE modelType; |
||||
std::vector<int> imgSize; |
||||
float rectConfidenceThreshold; |
||||
float iouThreshold; |
||||
}; |
@ -0,0 +1,44 @@ |
||||
#include <iostream> |
||||
#include <stdio.h> |
||||
#include "inference.h" |
||||
#include <filesystem> |
||||
|
||||
|
||||
|
||||
void file_iterator(DCSP_CORE*& p) |
||||
{ |
||||
std::filesystem::path img_path = R"(E:\project\Project_C++\DCPS_ONNX\TEST_ORIGIN)"; |
||||
int k = 0; |
||||
for (auto& i : std::filesystem::directory_iterator(img_path)) |
||||
{ |
||||
if (i.path().extension() == ".jpg") |
||||
{ |
||||
std::string img_path = i.path().string(); |
||||
//std::cout << img_path << std::endl;
|
||||
cv::Mat img = cv::imread(img_path); |
||||
std::vector<DCSP_RESULT> res; |
||||
char* ret = p->RunSession(img, res); |
||||
for (int i = 0; i < res.size(); i++) |
||||
{ |
||||
cv::rectangle(img, res.at(i).box, cv::Scalar(125, 123, 0), 3); |
||||
} |
||||
|
||||
k++; |
||||
cv::imshow("TEST_ORIGIN", img); |
||||
cv::waitKey(0); |
||||
cv::destroyAllWindows(); |
||||
//cv::imwrite("E:\\output\\" + std::to_string(k) + ".png", img);
|
||||
} |
||||
} |
||||
} |
||||
|
||||
|
||||
|
||||
int main() |
||||
{ |
||||
DCSP_CORE* p1 = new DCSP_CORE; |
||||
std::string model_path = "yolov8n.onnx"; |
||||
DCSP_INIT_PARAM params{ model_path, YOLO_ORIGIN_V8, {640, 640}, 80, 0.1, 0.5, false }; |
||||
char* ret = p1->CreateSession(params); |
||||
file_iterator(p1); |
||||
} |
@ -1,13 +1,14 @@ |
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license |
||||
|
||||
__version__ = '8.0.123' |
||||
__version__ = '8.0.131' |
||||
|
||||
from ultralytics.hub import start |
||||
from ultralytics.vit.rtdetr import RTDETR |
||||
from ultralytics.vit.sam import SAM |
||||
from ultralytics.yolo.engine.model import YOLO |
||||
from ultralytics.yolo.fastsam import FastSAM |
||||
from ultralytics.yolo.nas import NAS |
||||
from ultralytics.yolo.utils.checks import check_yolo as checks |
||||
from ultralytics.yolo.utils.downloads import download |
||||
|
||||
__all__ = '__version__', 'YOLO', 'NAS', 'SAM', 'RTDETR', 'checks', 'start', 'download' # allow simpler import |
||||
__all__ = '__version__', 'YOLO', 'NAS', 'SAM', 'FastSAM', 'RTDETR', 'checks', 'download', 'start' # allow simpler import |
||||
|
@ -0,0 +1,8 @@ |
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license |
||||
|
||||
from .model import FastSAM |
||||
from .predict import FastSAMPredictor |
||||
from .prompt import FastSAMPrompt |
||||
from .val import FastSAMValidator |
||||
|
||||
__all__ = 'FastSAMPredictor', 'FastSAM', 'FastSAMPrompt', 'FastSAMValidator' |
@ -0,0 +1,53 @@ |
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license |
||||
|
||||
import torch |
||||
|
||||
from ultralytics.yolo.engine.results import Results |
||||
from ultralytics.yolo.fastsam.utils import bbox_iou |
||||
from ultralytics.yolo.utils import DEFAULT_CFG, ops |
||||
from ultralytics.yolo.v8.detect.predict import DetectionPredictor |
||||
|
||||
|
||||
class FastSAMPredictor(DetectionPredictor): |
||||
|
||||
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): |
||||
super().__init__(cfg, overrides, _callbacks) |
||||
self.args.task = 'segment' |
||||
|
||||
def postprocess(self, preds, img, orig_imgs): |
||||
"""TODO: filter by classes.""" |
||||
p = ops.non_max_suppression(preds[0], |
||||
self.args.conf, |
||||
self.args.iou, |
||||
agnostic=self.args.agnostic_nms, |
||||
max_det=self.args.max_det, |
||||
nc=len(self.model.names), |
||||
classes=self.args.classes) |
||||
full_box = torch.zeros_like(p[0][0]) |
||||
full_box[2], full_box[3], full_box[4], full_box[6:] = img.shape[3], img.shape[2], 1.0, 1.0 |
||||
full_box = full_box.view(1, -1) |
||||
critical_iou_index = bbox_iou(full_box[0][:4], p[0][:, :4], iou_thres=0.9, image_shape=img.shape[2:]) |
||||
if critical_iou_index.numel() != 0: |
||||
full_box[0][4] = p[0][critical_iou_index][:, 4] |
||||
full_box[0][6:] = p[0][critical_iou_index][:, 6:] |
||||
p[0][critical_iou_index] = full_box |
||||
results = [] |
||||
proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported |
||||
for i, pred in enumerate(p): |
||||
orig_img = orig_imgs[i] if isinstance(orig_imgs, list) else orig_imgs |
||||
path = self.batch[0] |
||||
img_path = path[i] if isinstance(path, list) else path |
||||
if not len(pred): # save empty boxes |
||||
results.append(Results(orig_img=orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6])) |
||||
continue |
||||
if self.args.retina_masks: |
||||
if not isinstance(orig_imgs, torch.Tensor): |
||||
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) |
||||
masks = ops.process_mask_native(proto[i], pred[:, 6:], pred[:, :4], orig_img.shape[:2]) # HWC |
||||
else: |
||||
masks = ops.process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True) # HWC |
||||
if not isinstance(orig_imgs, torch.Tensor): |
||||
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) |
||||
results.append( |
||||
Results(orig_img=orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], masks=masks)) |
||||
return results |
@ -0,0 +1,406 @@ |
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license |
||||
|
||||
import os |
||||
|
||||
import cv2 |
||||
import matplotlib.pyplot as plt |
||||
import numpy as np |
||||
import torch |
||||
from PIL import Image |
||||
|
||||
|
||||
class FastSAMPrompt: |
||||
|
||||
def __init__(self, img_path, results, device='cuda') -> None: |
||||
# self.img_path = img_path |
||||
self.device = device |
||||
self.results = results |
||||
self.img_path = img_path |
||||
self.ori_img = cv2.imread(img_path) |
||||
|
||||
# Import and assign clip |
||||
try: |
||||
import clip # for linear_assignment |
||||
except ImportError: |
||||
from ultralytics.yolo.utils.checks import check_requirements |
||||
check_requirements('git+https://github.com/openai/CLIP.git') # required before installing lap from source |
||||
import clip |
||||
self.clip = clip |
||||
|
||||
@staticmethod |
||||
def _segment_image(image, bbox): |
||||
image_array = np.array(image) |
||||
segmented_image_array = np.zeros_like(image_array) |
||||
x1, y1, x2, y2 = bbox |
||||
segmented_image_array[y1:y2, x1:x2] = image_array[y1:y2, x1:x2] |
||||
segmented_image = Image.fromarray(segmented_image_array) |
||||
black_image = Image.new('RGB', image.size, (255, 255, 255)) |
||||
# transparency_mask = np.zeros_like((), dtype=np.uint8) |
||||
transparency_mask = np.zeros((image_array.shape[0], image_array.shape[1]), dtype=np.uint8) |
||||
transparency_mask[y1:y2, x1:x2] = 255 |
||||
transparency_mask_image = Image.fromarray(transparency_mask, mode='L') |
||||
black_image.paste(segmented_image, mask=transparency_mask_image) |
||||
return black_image |
||||
|
||||
@staticmethod |
||||
def _format_results(result, filter=0): |
||||
annotations = [] |
||||
n = len(result.masks.data) |
||||
for i in range(n): |
||||
mask = result.masks.data[i] == 1.0 |
||||
|
||||
if torch.sum(mask) < filter: |
||||
continue |
||||
annotation = { |
||||
'id': i, |
||||
'segmentation': mask.cpu().numpy(), |
||||
'bbox': result.boxes.data[i], |
||||
'score': result.boxes.conf[i]} |
||||
annotation['area'] = annotation['segmentation'].sum() |
||||
annotations.append(annotation) |
||||
return annotations |
||||
|
||||
@staticmethod |
||||
def filter_masks(annotations): # filter the overlap mask |
||||
annotations.sort(key=lambda x: x['area'], reverse=True) |
||||
to_remove = set() |
||||
for i in range(len(annotations)): |
||||
a = annotations[i] |
||||
for j in range(i + 1, len(annotations)): |
||||
b = annotations[j] |
||||
if i != j and j not in to_remove and b['area'] < a['area'] and \ |
||||
(a['segmentation'] & b['segmentation']).sum() / b['segmentation'].sum() > 0.8: |
||||
to_remove.add(j) |
||||
|
||||
return [a for i, a in enumerate(annotations) if i not in to_remove], to_remove |
||||
|
||||
@staticmethod |
||||
def _get_bbox_from_mask(mask): |
||||
mask = mask.astype(np.uint8) |
||||
contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
||||
x1, y1, w, h = cv2.boundingRect(contours[0]) |
||||
x2, y2 = x1 + w, y1 + h |
||||
if len(contours) > 1: |
||||
for b in contours: |
||||
x_t, y_t, w_t, h_t = cv2.boundingRect(b) |
||||
# 将多个bbox合并成一个 |
||||
x1 = min(x1, x_t) |
||||
y1 = min(y1, y_t) |
||||
x2 = max(x2, x_t + w_t) |
||||
y2 = max(y2, y_t + h_t) |
||||
h = y2 - y1 |
||||
w = x2 - x1 |
||||
return [x1, y1, x2, y2] |
||||
|
||||
def plot(self, |
||||
annotations, |
||||
output, |
||||
bbox=None, |
||||
points=None, |
||||
point_label=None, |
||||
mask_random_color=True, |
||||
better_quality=True, |
||||
retina=False, |
||||
withContours=True): |
||||
if isinstance(annotations[0], dict): |
||||
annotations = [annotation['segmentation'] for annotation in annotations] |
||||
result_name = os.path.basename(self.img_path) |
||||
image = self.ori_img |
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) |
||||
original_h = image.shape[0] |
||||
original_w = image.shape[1] |
||||
# for macOS only |
||||
# plt.switch_backend('TkAgg') |
||||
plt.figure(figsize=(original_w / 100, original_h / 100)) |
||||
# Add subplot with no margin. |
||||
plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0) |
||||
plt.margins(0, 0) |
||||
plt.gca().xaxis.set_major_locator(plt.NullLocator()) |
||||
plt.gca().yaxis.set_major_locator(plt.NullLocator()) |
||||
|
||||
plt.imshow(image) |
||||
if better_quality: |
||||
if isinstance(annotations[0], torch.Tensor): |
||||
annotations = np.array(annotations.cpu()) |
||||
for i, mask in enumerate(annotations): |
||||
mask = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8)) |
||||
annotations[i] = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_OPEN, np.ones((8, 8), np.uint8)) |
||||
if self.device == 'cpu': |
||||
annotations = np.array(annotations) |
||||
self.fast_show_mask( |
||||
annotations, |
||||
plt.gca(), |
||||
random_color=mask_random_color, |
||||
bbox=bbox, |
||||
points=points, |
||||
pointlabel=point_label, |
||||
retinamask=retina, |
||||
target_height=original_h, |
||||
target_width=original_w, |
||||
) |
||||
else: |
||||
if isinstance(annotations[0], np.ndarray): |
||||
annotations = torch.from_numpy(annotations) |
||||
self.fast_show_mask_gpu( |
||||
annotations, |
||||
plt.gca(), |
||||
random_color=mask_random_color, |
||||
bbox=bbox, |
||||
points=points, |
||||
pointlabel=point_label, |
||||
retinamask=retina, |
||||
target_height=original_h, |
||||
target_width=original_w, |
||||
) |
||||
if isinstance(annotations, torch.Tensor): |
||||
annotations = annotations.cpu().numpy() |
||||
if withContours: |
||||
contour_all = [] |
||||
temp = np.zeros((original_h, original_w, 1)) |
||||
for i, mask in enumerate(annotations): |
||||
if type(mask) == dict: |
||||
mask = mask['segmentation'] |
||||
annotation = mask.astype(np.uint8) |
||||
if not retina: |
||||
annotation = cv2.resize( |
||||
annotation, |
||||
(original_w, original_h), |
||||
interpolation=cv2.INTER_NEAREST, |
||||
) |
||||
contours, hierarchy = cv2.findContours(annotation, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) |
||||
contour_all.extend(iter(contours)) |
||||
cv2.drawContours(temp, contour_all, -1, (255, 255, 255), 2) |
||||
color = np.array([0 / 255, 0 / 255, 1.0, 0.8]) |
||||
contour_mask = temp / 255 * color.reshape(1, 1, -1) |
||||
plt.imshow(contour_mask) |
||||
|
||||
save_path = output |
||||
if not os.path.exists(save_path): |
||||
os.makedirs(save_path) |
||||
plt.axis('off') |
||||
fig = plt.gcf() |
||||
plt.draw() |
||||
|
||||
try: |
||||
buf = fig.canvas.tostring_rgb() |
||||
except AttributeError: |
||||
fig.canvas.draw() |
||||
buf = fig.canvas.tostring_rgb() |
||||
cols, rows = fig.canvas.get_width_height() |
||||
img_array = np.frombuffer(buf, dtype=np.uint8).reshape(rows, cols, 3) |
||||
cv2.imwrite(os.path.join(save_path, result_name), cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)) |
||||
|
||||
# CPU post process |
||||
def fast_show_mask( |
||||
self, |
||||
annotation, |
||||
ax, |
||||
random_color=False, |
||||
bbox=None, |
||||
points=None, |
||||
pointlabel=None, |
||||
retinamask=True, |
||||
target_height=960, |
||||
target_width=960, |
||||
): |
||||
msak_sum = annotation.shape[0] |
||||
height = annotation.shape[1] |
||||
weight = annotation.shape[2] |
||||
# 将annotation 按照面积 排序 |
||||
areas = np.sum(annotation, axis=(1, 2)) |
||||
sorted_indices = np.argsort(areas) |
||||
annotation = annotation[sorted_indices] |
||||
|
||||
index = (annotation != 0).argmax(axis=0) |
||||
if random_color: |
||||
color = np.random.random((msak_sum, 1, 1, 3)) |
||||
else: |
||||
color = np.ones((msak_sum, 1, 1, 3)) * np.array([30 / 255, 144 / 255, 1.0]) |
||||
transparency = np.ones((msak_sum, 1, 1, 1)) * 0.6 |
||||
visual = np.concatenate([color, transparency], axis=-1) |
||||
mask_image = np.expand_dims(annotation, -1) * visual |
||||
|
||||
show = np.zeros((height, weight, 4)) |
||||
h_indices, w_indices = np.meshgrid(np.arange(height), np.arange(weight), indexing='ij') |
||||
indices = (index[h_indices, w_indices], h_indices, w_indices, slice(None)) |
||||
# 使用向量化索引更新show的值 |
||||
show[h_indices, w_indices, :] = mask_image[indices] |
||||
if bbox is not None: |
||||
x1, y1, x2, y2 = bbox |
||||
ax.add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor='b', linewidth=1)) |
||||
# draw point |
||||
if points is not None: |
||||
plt.scatter( |
||||
[point[0] for i, point in enumerate(points) if pointlabel[i] == 1], |
||||
[point[1] for i, point in enumerate(points) if pointlabel[i] == 1], |
||||
s=20, |
||||
c='y', |
||||
) |
||||
plt.scatter( |
||||
[point[0] for i, point in enumerate(points) if pointlabel[i] == 0], |
||||
[point[1] for i, point in enumerate(points) if pointlabel[i] == 0], |
||||
s=20, |
||||
c='m', |
||||
) |
||||
|
||||
if not retinamask: |
||||
show = cv2.resize(show, (target_width, target_height), interpolation=cv2.INTER_NEAREST) |
||||
ax.imshow(show) |
||||
|
||||
def fast_show_mask_gpu( |
||||
self, |
||||
annotation, |
||||
ax, |
||||
random_color=False, |
||||
bbox=None, |
||||
points=None, |
||||
pointlabel=None, |
||||
retinamask=True, |
||||
target_height=960, |
||||
target_width=960, |
||||
): |
||||
msak_sum = annotation.shape[0] |
||||
height = annotation.shape[1] |
||||
weight = annotation.shape[2] |
||||
areas = torch.sum(annotation, dim=(1, 2)) |
||||
sorted_indices = torch.argsort(areas, descending=False) |
||||
annotation = annotation[sorted_indices] |
||||
# 找每个位置第一个非零值下标 |
||||
index = (annotation != 0).to(torch.long).argmax(dim=0) |
||||
if random_color: |
||||
color = torch.rand((msak_sum, 1, 1, 3)).to(annotation.device) |
||||
else: |
||||
color = torch.ones((msak_sum, 1, 1, 3)).to(annotation.device) * torch.tensor([30 / 255, 144 / 255, 1.0]).to( |
||||
annotation.device) |
||||
transparency = torch.ones((msak_sum, 1, 1, 1)).to(annotation.device) * 0.6 |
||||
visual = torch.cat([color, transparency], dim=-1) |
||||
mask_image = torch.unsqueeze(annotation, -1) * visual |
||||
# 按index取数,index指每个位置选哪个batch的数,把mask_image转成一个batch的形式 |
||||
show = torch.zeros((height, weight, 4)).to(annotation.device) |
||||
h_indices, w_indices = torch.meshgrid(torch.arange(height), torch.arange(weight), indexing='ij') |
||||
indices = (index[h_indices, w_indices], h_indices, w_indices, slice(None)) |
||||
# 使用向量化索引更新show的值 |
||||
show[h_indices, w_indices, :] = mask_image[indices] |
||||
show_cpu = show.cpu().numpy() |
||||
if bbox is not None: |
||||
x1, y1, x2, y2 = bbox |
||||
ax.add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor='b', linewidth=1)) |
||||
# draw point |
||||
if points is not None: |
||||
plt.scatter( |
||||
[point[0] for i, point in enumerate(points) if pointlabel[i] == 1], |
||||
[point[1] for i, point in enumerate(points) if pointlabel[i] == 1], |
||||
s=20, |
||||
c='y', |
||||
) |
||||
plt.scatter( |
||||
[point[0] for i, point in enumerate(points) if pointlabel[i] == 0], |
||||
[point[1] for i, point in enumerate(points) if pointlabel[i] == 0], |
||||
s=20, |
||||
c='m', |
||||
) |
||||
if not retinamask: |
||||
show_cpu = cv2.resize(show_cpu, (target_width, target_height), interpolation=cv2.INTER_NEAREST) |
||||
ax.imshow(show_cpu) |
||||
|
||||
# clip |
||||
@torch.no_grad() |
||||
def retrieve(self, model, preprocess, elements, search_text: str, device) -> int: |
||||
preprocessed_images = [preprocess(image).to(device) for image in elements] |
||||
tokenized_text = self.clip.tokenize([search_text]).to(device) |
||||
stacked_images = torch.stack(preprocessed_images) |
||||
image_features = model.encode_image(stacked_images) |
||||
text_features = model.encode_text(tokenized_text) |
||||
image_features /= image_features.norm(dim=-1, keepdim=True) |
||||
text_features /= text_features.norm(dim=-1, keepdim=True) |
||||
probs = 100.0 * image_features @ text_features.T |
||||
return probs[:, 0].softmax(dim=0) |
||||
|
||||
def _crop_image(self, format_results): |
||||
|
||||
image = Image.fromarray(cv2.cvtColor(self.ori_img, cv2.COLOR_BGR2RGB)) |
||||
ori_w, ori_h = image.size |
||||
annotations = format_results |
||||
mask_h, mask_w = annotations[0]['segmentation'].shape |
||||
if ori_w != mask_w or ori_h != mask_h: |
||||
image = image.resize((mask_w, mask_h)) |
||||
cropped_boxes = [] |
||||
cropped_images = [] |
||||
not_crop = [] |
||||
filter_id = [] |
||||
# annotations, _ = filter_masks(annotations) |
||||
# filter_id = list(_) |
||||
for _, mask in enumerate(annotations): |
||||
if np.sum(mask['segmentation']) <= 100: |
||||
filter_id.append(_) |
||||
continue |
||||
bbox = self._get_bbox_from_mask(mask['segmentation']) # mask 的 bbox |
||||
cropped_boxes.append(self._segment_image(image, bbox)) # 保存裁剪的图片 |
||||
# cropped_boxes.append(segment_image(image,mask["segmentation"])) |
||||
cropped_images.append(bbox) # 保存裁剪的图片的bbox |
||||
|
||||
return cropped_boxes, cropped_images, not_crop, filter_id, annotations |
||||
|
||||
def box_prompt(self, bbox): |
||||
|
||||
assert (bbox[2] != 0 and bbox[3] != 0) |
||||
masks = self.results[0].masks.data |
||||
target_height = self.ori_img.shape[0] |
||||
target_width = self.ori_img.shape[1] |
||||
h = masks.shape[1] |
||||
w = masks.shape[2] |
||||
if h != target_height or w != target_width: |
||||
bbox = [ |
||||
int(bbox[0] * w / target_width), |
||||
int(bbox[1] * h / target_height), |
||||
int(bbox[2] * w / target_width), |
||||
int(bbox[3] * h / target_height), ] |
||||
bbox[0] = max(round(bbox[0]), 0) |
||||
bbox[1] = max(round(bbox[1]), 0) |
||||
bbox[2] = min(round(bbox[2]), w) |
||||
bbox[3] = min(round(bbox[3]), h) |
||||
|
||||
# IoUs = torch.zeros(len(masks), dtype=torch.float32) |
||||
bbox_area = (bbox[3] - bbox[1]) * (bbox[2] - bbox[0]) |
||||
|
||||
masks_area = torch.sum(masks[:, bbox[1]:bbox[3], bbox[0]:bbox[2]], dim=(1, 2)) |
||||
orig_masks_area = torch.sum(masks, dim=(1, 2)) |
||||
|
||||
union = bbox_area + orig_masks_area - masks_area |
||||
IoUs = masks_area / union |
||||
max_iou_index = torch.argmax(IoUs) |
||||
|
||||
return np.array([masks[max_iou_index].cpu().numpy()]) |
||||
|
||||
def point_prompt(self, points, pointlabel): # numpy 处理 |
||||
|
||||
masks = self._format_results(self.results[0], 0) |
||||
target_height = self.ori_img.shape[0] |
||||
target_width = self.ori_img.shape[1] |
||||
h = masks[0]['segmentation'].shape[0] |
||||
w = masks[0]['segmentation'].shape[1] |
||||
if h != target_height or w != target_width: |
||||
points = [[int(point[0] * w / target_width), int(point[1] * h / target_height)] for point in points] |
||||
onemask = np.zeros((h, w)) |
||||
for i, annotation in enumerate(masks): |
||||
mask = annotation['segmentation'] if type(annotation) == dict else annotation |
||||
for i, point in enumerate(points): |
||||
if mask[point[1], point[0]] == 1 and pointlabel[i] == 1: |
||||
onemask += mask |
||||
if mask[point[1], point[0]] == 1 and pointlabel[i] == 0: |
||||
onemask -= mask |
||||
onemask = onemask >= 1 |
||||
return np.array([onemask]) |
||||
|
||||
def text_prompt(self, text): |
||||
format_results = self._format_results(self.results[0], 0) |
||||
cropped_boxes, cropped_images, not_crop, filter_id, annotations = self._crop_image(format_results) |
||||
clip_model, preprocess = self.clip.load('ViT-B/32', device=self.device) |
||||
scores = self.retrieve(clip_model, preprocess, cropped_boxes, text, device=self.device) |
||||
max_idx = scores.argsort() |
||||
max_idx = max_idx[-1] |
||||
max_idx += sum(np.array(filter_id) <= int(max_idx)) |
||||
return np.array([annotations[max_idx]['segmentation']]) |
||||
|
||||
def everything_prompt(self): |
||||
return self.results[0].masks.data |
@ -0,0 +1,64 @@ |
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license |
||||
|
||||
import torch |
||||
|
||||
|
||||
def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20): |
||||
""" |
||||
Adjust bounding boxes to stick to image border if they are within a certain threshold. |
||||
|
||||
Args: |
||||
boxes: (n, 4) |
||||
image_shape: (height, width) |
||||
threshold: pixel threshold |
||||
|
||||
Returns: |
||||
adjusted_boxes: adjusted bounding boxes |
||||
""" |
||||
|
||||
# Image dimensions |
||||
h, w = image_shape |
||||
|
||||
# Adjust boxes |
||||
boxes[boxes[:, 0] < threshold, 0] = 0 # x1 |
||||
boxes[boxes[:, 1] < threshold, 1] = 0 # y1 |
||||
boxes[boxes[:, 2] > w - threshold, 2] = w # x2 |
||||
boxes[boxes[:, 3] > h - threshold, 3] = h # y2 |
||||
return boxes |
||||
|
||||
|
||||
def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=False): |
||||
""" |
||||
Compute the Intersection-Over-Union of a bounding box with respect to an array of other bounding boxes. |
||||
|
||||
Args: |
||||
box1: (4, ) |
||||
boxes: (n, 4) |
||||
|
||||
Returns: |
||||
high_iou_indices: Indices of boxes with IoU > thres |
||||
""" |
||||
boxes = adjust_bboxes_to_image_border(boxes, image_shape) |
||||
# obtain coordinates for intersections |
||||
x1 = torch.max(box1[0], boxes[:, 0]) |
||||
y1 = torch.max(box1[1], boxes[:, 1]) |
||||
x2 = torch.min(box1[2], boxes[:, 2]) |
||||
y2 = torch.min(box1[3], boxes[:, 3]) |
||||
|
||||
# compute the area of intersection |
||||
intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0) |
||||
|
||||
# compute the area of both individual boxes |
||||
box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1]) |
||||
box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) |
||||
|
||||
# compute the area of union |
||||
union = box1_area + box2_area - intersection |
||||
|
||||
# compute the IoU |
||||
iou = intersection / union # Should be shape (n, ) |
||||
if raw_output: |
||||
return 0 if iou.numel() == 0 else iou |
||||
|
||||
# return indices of boxes with IoU > thres |
||||
return torch.nonzero(iou > iou_thres).flatten() |
@ -0,0 +1,244 @@ |
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license |
||||
|
||||
from multiprocessing.pool import ThreadPool |
||||
from pathlib import Path |
||||
|
||||
import numpy as np |
||||
import torch |
||||
import torch.nn.functional as F |
||||
|
||||
from ultralytics.yolo.utils import LOGGER, NUM_THREADS, ops |
||||
from ultralytics.yolo.utils.checks import check_requirements |
||||
from ultralytics.yolo.utils.metrics import SegmentMetrics, box_iou, mask_iou |
||||
from ultralytics.yolo.utils.plotting import output_to_target, plot_images |
||||
from ultralytics.yolo.v8.detect import DetectionValidator |
||||
|
||||
|
||||
class FastSAMValidator(DetectionValidator): |
||||
|
||||
def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None): |
||||
"""Initialize SegmentationValidator and set task to 'segment', metrics to SegmentMetrics.""" |
||||
super().__init__(dataloader, save_dir, pbar, args, _callbacks) |
||||
self.args.task = 'segment' |
||||
self.metrics = SegmentMetrics(save_dir=self.save_dir, on_plot=self.on_plot) |
||||
|
||||
def preprocess(self, batch): |
||||
"""Preprocesses batch by converting masks to float and sending to device.""" |
||||
batch = super().preprocess(batch) |
||||
batch['masks'] = batch['masks'].to(self.device).float() |
||||
return batch |
||||
|
||||
def init_metrics(self, model): |
||||
"""Initialize metrics and select mask processing function based on save_json flag.""" |
||||
super().init_metrics(model) |
||||
self.plot_masks = [] |
||||
if self.args.save_json: |
||||
check_requirements('pycocotools>=2.0.6') |
||||
self.process = ops.process_mask_upsample # more accurate |
||||
else: |
||||
self.process = ops.process_mask # faster |
||||
|
||||
def get_desc(self): |
||||
"""Return a formatted description of evaluation metrics.""" |
||||
return ('%22s' + '%11s' * 10) % ('Class', 'Images', 'Instances', 'Box(P', 'R', 'mAP50', 'mAP50-95)', 'Mask(P', |
||||
'R', 'mAP50', 'mAP50-95)') |
||||
|
||||
def postprocess(self, preds): |
||||
"""Postprocesses YOLO predictions and returns output detections with proto.""" |
||||
p = ops.non_max_suppression(preds[0], |
||||
self.args.conf, |
||||
self.args.iou, |
||||
labels=self.lb, |
||||
multi_label=True, |
||||
agnostic=self.args.single_cls, |
||||
max_det=self.args.max_det, |
||||
nc=self.nc) |
||||
proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported |
||||
return p, proto |
||||
|
||||
def update_metrics(self, preds, batch): |
||||
"""Metrics.""" |
||||
for si, (pred, proto) in enumerate(zip(preds[0], preds[1])): |
||||
idx = batch['batch_idx'] == si |
||||
cls = batch['cls'][idx] |
||||
bbox = batch['bboxes'][idx] |
||||
nl, npr = cls.shape[0], pred.shape[0] # number of labels, predictions |
||||
shape = batch['ori_shape'][si] |
||||
correct_masks = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init |
||||
correct_bboxes = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init |
||||
self.seen += 1 |
||||
|
||||
if npr == 0: |
||||
if nl: |
||||
self.stats.append((correct_bboxes, correct_masks, *torch.zeros( |
||||
(2, 0), device=self.device), cls.squeeze(-1))) |
||||
if self.args.plots: |
||||
self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1)) |
||||
continue |
||||
|
||||
# Masks |
||||
midx = [si] if self.args.overlap_mask else idx |
||||
gt_masks = batch['masks'][midx] |
||||
pred_masks = self.process(proto, pred[:, 6:], pred[:, :4], shape=batch['img'][si].shape[1:]) |
||||
|
||||
# Predictions |
||||
if self.args.single_cls: |
||||
pred[:, 5] = 0 |
||||
predn = pred.clone() |
||||
ops.scale_boxes(batch['img'][si].shape[1:], predn[:, :4], shape, |
||||
ratio_pad=batch['ratio_pad'][si]) # native-space pred |
||||
|
||||
# Evaluate |
||||
if nl: |
||||
height, width = batch['img'].shape[2:] |
||||
tbox = ops.xywh2xyxy(bbox) * torch.tensor( |
||||
(width, height, width, height), device=self.device) # target boxes |
||||
ops.scale_boxes(batch['img'][si].shape[1:], tbox, shape, |
||||
ratio_pad=batch['ratio_pad'][si]) # native-space labels |
||||
labelsn = torch.cat((cls, tbox), 1) # native-space labels |
||||
correct_bboxes = self._process_batch(predn, labelsn) |
||||
# TODO: maybe remove these `self.` arguments as they already are member variable |
||||
correct_masks = self._process_batch(predn, |
||||
labelsn, |
||||
pred_masks, |
||||
gt_masks, |
||||
overlap=self.args.overlap_mask, |
||||
masks=True) |
||||
if self.args.plots: |
||||
self.confusion_matrix.process_batch(predn, labelsn) |
||||
|
||||
# Append correct_masks, correct_boxes, pconf, pcls, tcls |
||||
self.stats.append((correct_bboxes, correct_masks, pred[:, 4], pred[:, 5], cls.squeeze(-1))) |
||||
|
||||
pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8) |
||||
if self.args.plots and self.batch_i < 3: |
||||
self.plot_masks.append(pred_masks[:15].cpu()) # filter top 15 to plot |
||||
|
||||
# Save |
||||
if self.args.save_json: |
||||
pred_masks = ops.scale_image(pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(), |
||||
shape, |
||||
ratio_pad=batch['ratio_pad'][si]) |
||||
self.pred_to_json(predn, batch['im_file'][si], pred_masks) |
||||
# if self.args.save_txt: |
||||
# save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt') |
||||
|
||||
def finalize_metrics(self, *args, **kwargs): |
||||
"""Sets speed and confusion matrix for evaluation metrics.""" |
||||
self.metrics.speed = self.speed |
||||
self.metrics.confusion_matrix = self.confusion_matrix |
||||
|
||||
def _process_batch(self, detections, labels, pred_masks=None, gt_masks=None, overlap=False, masks=False): |
||||
""" |
||||
Return correct prediction matrix |
||||
Arguments: |
||||
detections (array[N, 6]), x1, y1, x2, y2, conf, class |
||||
labels (array[M, 5]), class, x1, y1, x2, y2 |
||||
Returns: |
||||
correct (array[N, 10]), for 10 IoU levels |
||||
""" |
||||
if masks: |
||||
if overlap: |
||||
nl = len(labels) |
||||
index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1 |
||||
gt_masks = gt_masks.repeat(nl, 1, 1) # shape(1,640,640) -> (n,640,640) |
||||
gt_masks = torch.where(gt_masks == index, 1.0, 0.0) |
||||
if gt_masks.shape[1:] != pred_masks.shape[1:]: |
||||
gt_masks = F.interpolate(gt_masks[None], pred_masks.shape[1:], mode='bilinear', align_corners=False)[0] |
||||
gt_masks = gt_masks.gt_(0.5) |
||||
iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1)) |
||||
else: # boxes |
||||
iou = box_iou(labels[:, 1:], detections[:, :4]) |
||||
|
||||
correct = np.zeros((detections.shape[0], self.iouv.shape[0])).astype(bool) |
||||
correct_class = labels[:, 0:1] == detections[:, 5] |
||||
for i in range(len(self.iouv)): |
||||
x = torch.where((iou >= self.iouv[i]) & correct_class) # IoU > threshold and classes match |
||||
if x[0].shape[0]: |
||||
matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), |
||||
1).cpu().numpy() # [label, detect, iou] |
||||
if x[0].shape[0] > 1: |
||||
matches = matches[matches[:, 2].argsort()[::-1]] |
||||
matches = matches[np.unique(matches[:, 1], return_index=True)[1]] |
||||
# matches = matches[matches[:, 2].argsort()[::-1]] |
||||
matches = matches[np.unique(matches[:, 0], return_index=True)[1]] |
||||
correct[matches[:, 1].astype(int), i] = True |
||||
return torch.tensor(correct, dtype=torch.bool, device=detections.device) |
||||
|
||||
def plot_val_samples(self, batch, ni): |
||||
"""Plots validation samples with bounding box labels.""" |
||||
plot_images(batch['img'], |
||||
batch['batch_idx'], |
||||
batch['cls'].squeeze(-1), |
||||
batch['bboxes'], |
||||
batch['masks'], |
||||
paths=batch['im_file'], |
||||
fname=self.save_dir / f'val_batch{ni}_labels.jpg', |
||||
names=self.names, |
||||
on_plot=self.on_plot) |
||||
|
||||
def plot_predictions(self, batch, preds, ni): |
||||
"""Plots batch predictions with masks and bounding boxes.""" |
||||
plot_images( |
||||
batch['img'], |
||||
*output_to_target(preds[0], max_det=15), # not set to self.args.max_det due to slow plotting speed |
||||
torch.cat(self.plot_masks, dim=0) if len(self.plot_masks) else self.plot_masks, |
||||
paths=batch['im_file'], |
||||
fname=self.save_dir / f'val_batch{ni}_pred.jpg', |
||||
names=self.names, |
||||
on_plot=self.on_plot) # pred |
||||
self.plot_masks.clear() |
||||
|
||||
def pred_to_json(self, predn, filename, pred_masks): |
||||
"""Save one JSON result.""" |
||||
# Example result = {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236} |
||||
from pycocotools.mask import encode # noqa |
||||
|
||||
def single_encode(x): |
||||
"""Encode predicted masks as RLE and append results to jdict.""" |
||||
rle = encode(np.asarray(x[:, :, None], order='F', dtype='uint8'))[0] |
||||
rle['counts'] = rle['counts'].decode('utf-8') |
||||
return rle |
||||
|
||||
stem = Path(filename).stem |
||||
image_id = int(stem) if stem.isnumeric() else stem |
||||
box = ops.xyxy2xywh(predn[:, :4]) # xywh |
||||
box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner |
||||
pred_masks = np.transpose(pred_masks, (2, 0, 1)) |
||||
with ThreadPool(NUM_THREADS) as pool: |
||||
rles = pool.map(single_encode, pred_masks) |
||||
for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())): |
||||
self.jdict.append({ |
||||
'image_id': image_id, |
||||
'category_id': self.class_map[int(p[5])], |
||||
'bbox': [round(x, 3) for x in b], |
||||
'score': round(p[4], 5), |
||||
'segmentation': rles[i]}) |
||||
|
||||
def eval_json(self, stats): |
||||
"""Return COCO-style object detection evaluation metrics.""" |
||||
if self.args.save_json and self.is_coco and len(self.jdict): |
||||
anno_json = self.data['path'] / 'annotations/instances_val2017.json' # annotations |
||||
pred_json = self.save_dir / 'predictions.json' # predictions |
||||
LOGGER.info(f'\nEvaluating pycocotools mAP using {pred_json} and {anno_json}...') |
||||
try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb |
||||
check_requirements('pycocotools>=2.0.6') |
||||
from pycocotools.coco import COCO # noqa |
||||
from pycocotools.cocoeval import COCOeval # noqa |
||||
|
||||
for x in anno_json, pred_json: |
||||
assert x.is_file(), f'{x} file not found' |
||||
anno = COCO(str(anno_json)) # init annotations api |
||||
pred = anno.loadRes(str(pred_json)) # init predictions api (must pass string, not Path) |
||||
for i, eval in enumerate([COCOeval(anno, pred, 'bbox'), COCOeval(anno, pred, 'segm')]): |
||||
if self.is_coco: |
||||
eval.params.imgIds = [int(Path(x).stem) for x in self.dataloader.dataset.im_files] # im to eval |
||||
eval.evaluate() |
||||
eval.accumulate() |
||||
eval.summarize() |
||||
idx = i * 4 + 2 |
||||
stats[self.metrics.keys[idx + 1]], stats[ |
||||
self.metrics.keys[idx]] = eval.stats[:2] # update mAP50-95 and mAP50 |
||||
except Exception as e: |
||||
LOGGER.warning(f'pycocotools unable to run: {e}') |
||||
return stats |
Loading…
Reference in new issue