diff --git a/csrc/detect/end2end/include/common.hpp b/csrc/detect/end2end/include/common.hpp index ee8973f..b27deb3 100644 --- a/csrc/detect/end2end/include/common.hpp +++ b/csrc/detect/end2end/include/common.hpp @@ -2,8 +2,8 @@ // Created by ubuntu on 1/24/23. // -#ifndef DETECTION_END2END_COMMON_HPP -#define DETECTION_END2END_COMMON_HPP +#ifndef DETECT_END2END_COMMON_HPP +#define DETECT_END2END_COMMON_HPP #include "opencv2/opencv.hpp" #include #include @@ -153,4 +153,4 @@ namespace det float width = 0; }; } -#endif //DETECTION_END2END_COMMON_HPP +#endif //DETECT_END2END_COMMON_HPP diff --git a/csrc/detect/end2end/include/yolov8.hpp b/csrc/detect/end2end/include/yolov8.hpp index 08709b0..b219cf2 100644 --- a/csrc/detect/end2end/include/yolov8.hpp +++ b/csrc/detect/end2end/include/yolov8.hpp @@ -1,6 +1,8 @@ // // Created by ubuntu on 1/20/23. // +#ifndef DETECT_END2END_YOLOV8_HPP +#define DETECT_END2END_YOLOV8_HPP #include "fstream" #include "common.hpp" #include "NvInferPlugin.h" @@ -421,3 +423,4 @@ void YOLOv8::draw_objects( ); } } +#endif //DETECT_END2END_YOLOV8_HPP diff --git a/csrc/detect/normal/include/common.hpp b/csrc/detect/normal/include/common.hpp index 6b2d570..4ad0ae6 100644 --- a/csrc/detect/normal/include/common.hpp +++ b/csrc/detect/normal/include/common.hpp @@ -2,8 +2,8 @@ // Created by ubuntu on 1/24/23. // -#ifndef DETECTION_NORMAL_COMMON_HPP -#define DETECTION_NORMAL_COMMON_HPP +#ifndef DETECT_NORMAL_COMMON_HPP +#define DETECT_NORMAL_COMMON_HPP #include "opencv2/opencv.hpp" #include #include @@ -153,4 +153,4 @@ namespace det float width = 0; }; } -#endif //DETECTION_NORMAL_COMMON_HPP +#endif //DETECT_NORMAL_COMMON_HPP diff --git a/csrc/detect/normal/include/yolov8.hpp b/csrc/detect/normal/include/yolov8.hpp index 274319e..f676b8a 100644 --- a/csrc/detect/normal/include/yolov8.hpp +++ b/csrc/detect/normal/include/yolov8.hpp @@ -1,6 +1,8 @@ // // Created by ubuntu on 1/20/23. // +#ifndef DETECT_NORMAL_YOLOV8_HPP +#define DETECT_NORMAL_YOLOV8_HPP #include "fstream" #include "common.hpp" #include "NvInferPlugin.h" @@ -187,7 +189,11 @@ void YOLOv8::make_pipe(bool warmup) } } -void YOLOv8::letterbox(const cv::Mat& image, cv::Mat& out, cv::Size& size) +void YOLOv8::letterbox( + const cv::Mat& image, + cv::Mat& out, + cv::Size& size +) { const float inp_h = size.height; const float inp_w = size.width; @@ -489,3 +495,4 @@ void YOLOv8::draw_objects( ); } } +#endif //DETECT_NORMAL_YOLOV8_HPP diff --git a/csrc/detection/CMakeLists.txt b/csrc/detection/CMakeLists.txt deleted file mode 100644 index 5a9e9c5..0000000 --- a/csrc/detection/CMakeLists.txt +++ /dev/null @@ -1,55 +0,0 @@ -cmake_minimum_required(VERSION 2.8.12) - -set(CMAKE_CUDA_ARCHITECTURES 60 61 62 70 72 75 86) -set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc) - -project(yolov8 LANGUAGES CXX CUDA) - -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -O3 -g") -set(CMAKE_CXX_STANDARD 14) -set(CMAKE_BUILD_TYPE Release) -option(CUDA_USE_STATIC_CUDA_RUNTIME OFF) - -# CUDA -find_package(CUDA REQUIRED) -message(STATUS "CUDA Libs: \n${CUDA_LIBRARIES}\n") -message(STATUS "CUDA Headers: \n${CUDA_INCLUDE_DIRS}\n") - -# OpenCV -find_package(OpenCV REQUIRED) -message(STATUS "OpenCV Libs: \n${OpenCV_LIBS}\n") -message(STATUS "OpenCV Libraries: \n${OpenCV_LIBRARIES}\n") -message(STATUS "OpenCV Headers: \n${OpenCV_INCLUDE_DIRS}\n") - -# TensorRT -set(TensorRT_INCLUDE_DIRS /usr/include/x86_64-linux-gnu) -set(TensorRT_LIBRARIES /usr/lib/x86_64-linux-gnu) - - -message(STATUS "TensorRT Libs: \n${TensorRT_LIBRARIES}\n") -message(STATUS "TensorRT Headers: \n${TensorRT_INCLUDE_DIRS}\n") - -list(APPEND INCLUDE_DIRS - ${CUDA_INCLUDE_DIRS} - ${OpenCV_INCLUDE_DIRS} - ${TensorRT_INCLUDE_DIRS} - ./include - ) - -list(APPEND ALL_LIBS - ${CUDA_LIBRARIES} - ${OpenCV_LIBRARIES} - ${TensorRT_LIBRARIES} - ) - -include_directories(${INCLUDE_DIRS}) - -add_executable(${PROJECT_NAME} - main.cpp - include/yolov8.hpp - include/config.h - include/utils.h - ) - -target_link_directories(${PROJECT_NAME} PUBLIC ${ALL_LIBS}) -target_link_libraries(${PROJECT_NAME} PRIVATE nvinfer nvinfer_plugin cudart ${OpenCV_LIBS}) diff --git a/csrc/detection/include/config.h b/csrc/detection/include/config.h deleted file mode 100644 index 600c3fe..0000000 --- a/csrc/detection/include/config.h +++ /dev/null @@ -1,84 +0,0 @@ -// -// Created by ubuntu on 1/10/23. -// - -#ifndef YOLOV8_CSRC_DETECT_INCLUDE_CONFIG_H -#define YOLOV8_CSRC_DETECT_INCLUDE_CONFIG_H -#include "opencv2/opencv.hpp" -namespace det -{ - const int DEVICE = 0; - - const int INPUT_W = 640; - const int INPUT_H = 640; - const int NUM_INPUT = 1; - const int NUM_OUTPUT = 4; - - const int NUM_BINDINGS = NUM_INPUT + NUM_OUTPUT; - const cv::Scalar PAD_COLOR = { 114, 114, 114 }; - const cv::Scalar RECT_COLOR = cv::Scalar(0, 0, 255); - const cv::Scalar TXT_COLOR = cv::Scalar(255, 255, 255); - - const char* INPUT = "images"; - const char* NUM_DETS = "num_dets"; - const char* BBOXES = "bboxes"; - const char* SCORES = "scores"; - const char* LABELS = "labels"; - - const char* CLASS_NAMES[] = { - "person", "bicycle", "car", "motorcycle", "airplane", "bus", - "train", "truck", "boat", "traffic light", "fire hydrant", - "stop sign", "parking meter", "bench", "bird", "cat", - "dog", "horse", "sheep", "cow", "elephant", - "bear", "zebra", "giraffe", "backpack", "umbrella", - "handbag", "tie", "suitcase", "frisbee", "skis", - "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", - "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", - "cup", "fork", "knife", "spoon", "bowl", - "banana", "apple", "sandwich", "orange", "broccoli", - "carrot", "hot dog", "pizza", "donut", "cake", - "chair", "couch", "potted plant", "bed", "dining table", - "toilet", "tv", "laptop", "mouse", "remote", - "keyboard", "cell phone", "microwave", "oven", - "toaster", "sink", "refrigerator", "book", "clock", "vase", - "scissors", "teddy bear", "hair drier", "toothbrush" }; - - const unsigned int COLORS[80][3] = { - { 0, 114, 189 }, { 217, 83, 25 }, { 237, 177, 32 }, - { 126, 47, 142 }, { 119, 172, 48 }, { 77, 190, 238 }, - { 162, 20, 47 }, { 76, 76, 76 }, { 153, 153, 153 }, - { 255, 0, 0 }, { 255, 128, 0 }, { 191, 191, 0 }, - { 0, 255, 0 }, { 0, 0, 255 }, { 170, 0, 255 }, - { 85, 85, 0 }, { 85, 170, 0 }, { 85, 255, 0 }, - { 170, 85, 0 }, { 170, 170, 0 }, { 170, 255, 0 }, - { 255, 85, 0 }, { 255, 170, 0 }, { 255, 255, 0 }, - { 0, 85, 128 }, { 0, 170, 128 }, { 0, 255, 128 }, - { 85, 0, 128 }, { 85, 85, 128 }, { 85, 170, 128 }, - { 85, 255, 128 }, { 170, 0, 128 }, { 170, 85, 128 }, - { 170, 170, 128 }, { 170, 255, 128 }, { 255, 0, 128 }, - { 255, 85, 128 }, { 255, 170, 128 }, { 255, 255, 128 }, - { 0, 85, 255 }, { 0, 170, 255 }, { 0, 255, 255 }, - { 85, 0, 255 }, { 85, 85, 255 }, { 85, 170, 255 }, - { 85, 255, 255 }, { 170, 0, 255 }, { 170, 85, 255 }, - { 170, 170, 255 }, { 170, 255, 255 }, { 255, 0, 255 }, - { 255, 85, 255 }, { 255, 170, 255 }, { 85, 0, 0 }, - { 128, 0, 0 }, { 170, 0, 0 }, { 212, 0, 0 }, - { 255, 0, 0 }, { 0, 43, 0 }, { 0, 85, 0 }, - { 0, 128, 0 }, { 0, 170, 0 }, { 0, 212, 0 }, - { 0, 255, 0 }, { 0, 0, 43 }, { 0, 0, 85 }, - { 0, 0, 128 }, { 0, 0, 170 }, { 0, 0, 212 }, - { 0, 0, 255 }, { 0, 0, 0 }, { 36, 36, 36 }, - { 73, 73, 73 }, { 109, 109, 109 }, { 146, 146, 146 }, - { 182, 182, 182 }, { 219, 219, 219 }, { 0, 114, 189 }, - { 80, 183, 189 }, { 128, 128, 0 } - }; - - struct Object - { - cv::Rect_ rect; - int label = 0; - float prob = 0.0; - }; - -} -#endif //YOLOV8_CSRC_DETECT_INCLUDE_CONFIG_H diff --git a/csrc/detection/include/utils.h b/csrc/detection/include/utils.h deleted file mode 100644 index c03db75..0000000 --- a/csrc/detection/include/utils.h +++ /dev/null @@ -1,133 +0,0 @@ -// -// Created by ubuntu on 1/10/23. -// - -#ifndef YOLOV8_CSRC_DETECT_INCLUDE_UTILS_H -#define YOLOV8_CSRC_DETECT_INCLUDE_UTILS_H -#include -#include -#include -#include -#include -#include "NvInfer.h" - -#define CHECK(call) \ -do \ -{ \ - const cudaError_t error_code = call; \ - if (error_code != cudaSuccess) \ - { \ - printf("CUDA Error:\n"); \ - printf(" File: %s\n", __FILE__); \ - printf(" Line: %d\n", __LINE__); \ - printf(" Error code: %d\n", error_code); \ - printf(" Error text: %s\n", \ - cudaGetErrorString(error_code)); \ - exit(1); \ - } \ -} while (0) - -class Logger : public nvinfer1::ILogger -{ -public: - nvinfer1::ILogger::Severity reportableSeverity; - - explicit Logger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kINFO) : - reportableSeverity(severity) - { - } - - void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override - { - if (severity > reportableSeverity) - { - return; - } - switch (severity) - { - case nvinfer1::ILogger::Severity::kINTERNAL_ERROR: - std::cerr << "INTERNAL_ERROR: "; - break; - case nvinfer1::ILogger::Severity::kERROR: - std::cerr << "ERROR: "; - break; - case nvinfer1::ILogger::Severity::kWARNING: - std::cerr << "WARNING: "; - break; - case nvinfer1::ILogger::Severity::kINFO: - std::cerr << "INFO: "; - break; - default: - std::cerr << "VERBOSE: "; - break; - } - std::cerr << msg << std::endl; - } -}; - -inline int get_size_by_dims(const nvinfer1::Dims& dims) -{ - int size = 1; - for (int i = 0; i < dims.nbDims; i++) - { - size *= dims.d[i]; - } - return size; -} - -inline int DataTypeToSize(const nvinfer1::DataType& dataType) -{ - switch (dataType) - { - case nvinfer1::DataType::kFLOAT: - return sizeof(float); - case nvinfer1::DataType::kHALF: - return 2; - case nvinfer1::DataType::kINT8: - return sizeof(int8_t); - case nvinfer1::DataType::kINT32: - return sizeof(int32_t); - case nvinfer1::DataType::kBOOL: - return sizeof(bool); - default: - return sizeof(float); - } -} - -inline float clamp(const float val, const float minVal = 0.f, const float maxVal = 1280.f) -{ - assert(minVal <= maxVal); - return std::min(maxVal, std::max(minVal, val)); -} - -inline bool IsPathExist(const std::string& path) -{ - if (access(path.c_str(), 0) == F_OK) - { - return true; - } - return false; -} - -inline bool IsFile(const std::string& path) -{ - if (!IsPathExist(path)) - { - printf("%s:%d %s not exist\n", __FILE__, __LINE__, path.c_str()); - return false; - } - struct stat buffer; - return (stat(path.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode)); -} - -inline bool IsFolder(const std::string& path) -{ - if (!IsPathExist(path)) - { - return false; - } - struct stat buffer; - return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode)); -} - -#endif //YOLOV8_CSRC_DETECT_INCLUDE_UTILS_H diff --git a/csrc/detection/include/yolov8.hpp b/csrc/detection/include/yolov8.hpp deleted file mode 100644 index 7417a90..0000000 --- a/csrc/detection/include/yolov8.hpp +++ /dev/null @@ -1,266 +0,0 @@ -// -// Created by ubuntu on 1/8/23. -// -#include "config.h" -#include "utils.h" -#include -#include "NvInferPlugin.h" - -using namespace det; - -class YOLOv8 -{ -public: - explicit YOLOv8(const std::string& engine_file_path); - ~YOLOv8(); - - void make_pipe(bool warmup = true); - void copy_from_Mat(const cv::Mat& image); - void infer(); - void postprocess(std::vector& objs); - - size_t in_size = 1 * 3 * INPUT_W * INPUT_H; - float w = INPUT_W; - float h = INPUT_H; - float ratio = 1.0f; - float dw = 0.f; - float dh = 0.f; - std::array, NUM_OUTPUT> out_sizes{}; - std::array outputs{}; -private: - nvinfer1::ICudaEngine* engine = nullptr; - nvinfer1::IRuntime* runtime = nullptr; - nvinfer1::IExecutionContext* context = nullptr; - cudaStream_t stream = nullptr; - std::array buffs{}; - Logger gLogger{ nvinfer1::ILogger::Severity::kERROR }; - -}; - -YOLOv8::YOLOv8(const std::string& engine_file_path) -{ - std::ifstream file(engine_file_path, std::ios::binary); - assert(file.good()); - file.seekg(0, std::ios::end); - auto size = file.tellg(); - std::ostringstream fmt; - - file.seekg(0, std::ios::beg); - char* trtModelStream = new char[size]; - assert(trtModelStream); - file.read(trtModelStream, size); - file.close(); - initLibNvInferPlugins(&this->gLogger, ""); - this->runtime = nvinfer1::createInferRuntime(this->gLogger); - assert(this->runtime != nullptr); - - this->engine = this->runtime->deserializeCudaEngine(trtModelStream, size); - assert(this->engine != nullptr); - - this->context = this->engine->createExecutionContext(); - - assert(this->context != nullptr); - cudaStreamCreate(&this->stream); - -} - -YOLOv8::~YOLOv8() -{ - this->context->destroy(); - this->engine->destroy(); - this->runtime->destroy(); - cudaStreamDestroy(this->stream); - for (auto& ptr : this->buffs) - { - CHECK(cudaFree(ptr)); - } - - for (auto& ptr : this->outputs) - { - CHECK(cudaFreeHost(ptr)); - } - -} -void YOLOv8::make_pipe(bool warmup) -{ - const nvinfer1::Dims input_dims = this->engine->getBindingDimensions( - this->engine->getBindingIndex(INPUT) - ); - this->in_size = get_size_by_dims(input_dims); - CHECK(cudaMalloc(&this->buffs[0], this->in_size * sizeof(float))); - - this->context->setBindingDimensions(0, input_dims); - const int32_t num_dets_idx = this->engine->getBindingIndex(NUM_DETS); - const nvinfer1::Dims num_dets_dims = this->context->getBindingDimensions(num_dets_idx); - this->out_sizes[num_dets_idx - NUM_INPUT].first = get_size_by_dims(num_dets_dims); - this->out_sizes[num_dets_idx - NUM_INPUT].second = DataTypeToSize( - this->engine->getBindingDataType(num_dets_idx)); - - const int32_t bboxes_idx = this->engine->getBindingIndex(BBOXES); - const nvinfer1::Dims bboxes_dims = this->context->getBindingDimensions(bboxes_idx); - - this->out_sizes[bboxes_idx - NUM_INPUT].first = get_size_by_dims(bboxes_dims); - this->out_sizes[bboxes_idx - NUM_INPUT].second = DataTypeToSize( - this->engine->getBindingDataType(bboxes_idx)); - - const int32_t scores_idx = this->engine->getBindingIndex(SCORES); - const nvinfer1::Dims scores_dims = this->context->getBindingDimensions(scores_idx); - this->out_sizes[scores_idx - NUM_INPUT].first = get_size_by_dims(scores_dims); - this->out_sizes[scores_idx - NUM_INPUT].second = DataTypeToSize( - this->engine->getBindingDataType(scores_idx)); - - const int32_t labels_idx = this->engine->getBindingIndex(LABELS); - const nvinfer1::Dims labels_dims = this->context->getBindingDimensions(labels_idx); - this->out_sizes[labels_idx - NUM_INPUT].first = get_size_by_dims(labels_dims); - this->out_sizes[labels_idx - NUM_INPUT].second = DataTypeToSize( - this->engine->getBindingDataType(labels_idx)); - - for (int i = 0; i < NUM_OUTPUT; i++) - { - const int osize = this->out_sizes[i].first * out_sizes[i].second; - CHECK(cudaHostAlloc(&this->outputs[i], osize, 0)); - CHECK(cudaMalloc(&this->buffs[NUM_INPUT + i], osize)); - } - if (warmup) - { - for (int i = 0; i < 10; i++) - { - size_t isize = this->in_size * sizeof(float); - auto* tmp = new float[isize]; - - CHECK(cudaMemcpyAsync(this->buffs[0], - tmp, - isize, - cudaMemcpyHostToDevice, - this->stream)); - this->infer(); - } - printf("model warmup 10 times\n"); - - } -} - -void YOLOv8::copy_from_Mat(const cv::Mat& image) -{ - float height = (float)image.rows; - float width = (float)image.cols; - - float r = std::min(INPUT_H / height, INPUT_W / width); - - int padw = (int)std::round(width * r); - int padh = (int)std::round(height * r); - - cv::Mat tmp; - if ((int)width != padw || (int)height != padh) - { - cv::resize(image, tmp, cv::Size(padw, padh)); - } - else - { - tmp = image.clone(); - } - - float _dw = INPUT_W - padw; - float _dh = INPUT_H - padh; - - _dw /= 2.0f; - _dh /= 2.0f; - int top = int(std::round(_dh - 0.1f)); - int bottom = int(std::round(_dh + 0.1f)); - int left = int(std::round(_dw - 0.1f)); - int right = int(std::round(_dw + 0.1f)); - cv::copyMakeBorder(tmp, tmp, top, bottom, left, right, cv::BORDER_CONSTANT, PAD_COLOR); - cv::dnn::blobFromImage(tmp, - tmp, - 1 / 255.f, - cv::Size(), - cv::Scalar(0, 0, 0), - true, - false, - CV_32F); - CHECK(cudaMemcpyAsync(this->buffs[0], - tmp.ptr(), - this->in_size * sizeof(float), - cudaMemcpyHostToDevice, - this->stream)); - - this->ratio = 1 / r; - this->dw = _dw; - this->dh = _dh; - this->w = width; - this->h = height; -} - -void YOLOv8::infer() -{ - this->context->enqueueV2(buffs.data(), this->stream, nullptr); - for (int i = 0; i < NUM_OUTPUT; i++) - { - const int osize = this->out_sizes[i].first * out_sizes[i].second; - CHECK(cudaMemcpyAsync(this->outputs[i], - this->buffs[NUM_INPUT + i], - osize, - cudaMemcpyDeviceToHost, - this->stream)); - } - cudaStreamSynchronize(this->stream); - -} - -void YOLOv8::postprocess(std::vector& objs) -{ - int* num_dets = static_cast(this->outputs[0]); - auto* boxes = static_cast(this->outputs[1]); - auto* scores = static_cast(this->outputs[2]); - int* labels = static_cast(this->outputs[3]); - for (int i = 0; i < num_dets[0]; i++) - { - float* ptr = boxes + i * 4; - Object obj; - float x0 = *ptr++ - this->dw; - float y0 = *ptr++ - this->dh; - float x1 = *ptr++ - this->dw; - float y1 = *ptr++ - this->dh; - - x0 = clamp(x0 * this->ratio, 0.f, this->w); - y0 = clamp(y0 * this->ratio, 0.f, this->h); - x1 = clamp(x1 * this->ratio, 0.f, this->w); - y1 = clamp(y1 * this->ratio, 0.f, this->h); - obj.rect.x = x0; - obj.rect.y = y0; - obj.rect.width = x1 - x0; - obj.rect.height = y1 - y0; - obj.prob = *(scores + i); - obj.label = *(labels + i); - - objs.push_back(obj); - - } -} - -static void draw_objects(const cv::Mat& image, cv::Mat& res, const std::vector& objs) -{ - res = image.clone(); - for (auto& obj : objs) - { - cv::Scalar color = cv::Scalar(COLORS[obj.label][0], COLORS[obj.label][1], COLORS[obj.label][2]); - cv::rectangle(res, obj.rect, color, 2); - - char text[256]; - sprintf(text, "%s %.1f%%", CLASS_NAMES[obj.label], obj.prob * 100); - - int baseLine = 0; - cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine); - - int x = (int)obj.rect.x; - int y = (int)obj.rect.y + 1; - - if (y > res.rows) - y = res.rows; - - cv::rectangle(res, cv::Rect(x, y, label_size.width, label_size.height + baseLine), RECT_COLOR, -1); - - cv::putText(res, text, cv::Point(x, y + label_size.height), - cv::FONT_HERSHEY_SIMPLEX, 0.4, TXT_COLOR, 1); - } -} diff --git a/csrc/detection/main.cpp b/csrc/detection/main.cpp deleted file mode 100644 index de02d55..0000000 --- a/csrc/detection/main.cpp +++ /dev/null @@ -1,86 +0,0 @@ -// -// Created by ubuntu on 1/8/23. -// -#include "include/yolov8.hpp" -int main(int argc, char** argv) -{ - cudaSetDevice(DEVICE); - - const std::string engine_file_path{ argv[1] }; - const std::string path{ argv[2] }; - std::vector imagePathList; - bool isVideo{ false }; - if (IsFile(path)) - { - std::string suffix = path.substr(path.find_last_of('.') + 1); - if (suffix == "jpg") - { - imagePathList.push_back(path); - } - else if (suffix == "mp4") - { - isVideo = true; - } - } - else if (IsFolder(path)) - { - cv::glob(path + "/*.jpg", imagePathList); - } - - auto* yolov8 = new YOLOv8(engine_file_path); - yolov8->make_pipe(true); - cv::Mat res; - cv::namedWindow("result", cv::WINDOW_AUTOSIZE); - if (isVideo) - { - cv::VideoCapture cap(path); - cv::Mat image; - if (!cap.isOpened()) - { - printf("can not open ...\n"); - return -1; - } - double fp_ = cap.get(cv::CAP_PROP_FPS); - int fps = round(1000.0 / fp_); - while (cap.read(image)) - { - auto start = std::chrono::system_clock::now(); - yolov8->copy_from_Mat(image); - yolov8->infer(); - std::vector objs; - yolov8->postprocess(objs); - draw_objects(image, res, objs); - auto end = std::chrono::system_clock::now(); - auto tc = std::chrono::duration_cast(end - start).count() / 1000.f; - cv::imshow("result", res); - printf("cost %2.4f ms\n", tc); - if (cv::waitKey(fps) == 'q') - { - break; - } - } - } - else - { - for (auto path : imagePathList) - { - cv::Mat image = cv::imread(path); - yolov8->copy_from_Mat(image); - auto start = std::chrono::system_clock::now(); - yolov8->infer(); - auto end = std::chrono::system_clock::now(); - auto tc = std::chrono::duration_cast(end - start).count() / 1000.f; - - printf("infer %-20s\tcost %2.4f ms\n", path.c_str(), tc); - - std::vector objs; - yolov8->postprocess(objs); - draw_objects(image, res, objs); - cv::imshow("result", res); - cv::waitKey(0); - } - } - cv::destroyAllWindows(); - delete yolov8; - return 0; -} diff --git a/csrc/segment/CMakeLists.txt b/csrc/segment/CMakeLists.txt index 7ac4d2a..d66812b 100644 --- a/csrc/segment/CMakeLists.txt +++ b/csrc/segment/CMakeLists.txt @@ -47,8 +47,7 @@ include_directories(${INCLUDE_DIRS}) add_executable(${PROJECT_NAME} main.cpp include/yolov8-seg.hpp - include/config.h - include/utils.h + include/common.hpp ) target_link_directories(${PROJECT_NAME} PUBLIC ${ALL_LIBS}) diff --git a/csrc/segment/include/utils.h b/csrc/segment/include/common.hpp similarity index 78% rename from csrc/segment/include/utils.h rename to csrc/segment/include/common.hpp index 6f5e702..dc83775 100644 --- a/csrc/segment/include/utils.h +++ b/csrc/segment/include/common.hpp @@ -1,13 +1,11 @@ // -// Created by ubuntu on 1/10/23. +// Created by ubuntu on 1/24/23. // -#ifndef YOLOV8_CSRC_SEGMENT_INCLUDE_UTILS_H -#define YOLOV8_CSRC_SEGMENT_INCLUDE_UTILS_H +#ifndef SEGMENT_COMMON_HPP +#define SEGMENT_COMMON_HPP +#include "opencv2/opencv.hpp" #include -#include -#include -#include #include #include "NvInfer.h" @@ -75,29 +73,28 @@ inline int get_size_by_dims(const nvinfer1::Dims& dims) return size; } -inline int DataTypeToSize(const nvinfer1::DataType& dataType) +inline int type_to_size(const nvinfer1::DataType& dataType) { switch (dataType) { case nvinfer1::DataType::kFLOAT: - return sizeof(float); + return 4; case nvinfer1::DataType::kHALF: return 2; - case nvinfer1::DataType::kINT8: - return sizeof(int8_t); case nvinfer1::DataType::kINT32: - return sizeof(int32_t); + return 4; + case nvinfer1::DataType::kINT8: + return 1; case nvinfer1::DataType::kBOOL: - return sizeof(bool); + return 1; default: - return sizeof(float); + return 4; } } -inline float clamp(const float val, const float minVal = 0.f, const float maxVal = 1280.f) +inline static float clamp(float val, float min, float max) { - assert(minVal <= maxVal); - return std::min(maxVal, std::max(minVal, val)); + return val > min ? (val < max ? val : max) : min; } inline bool IsPathExist(const std::string& path) @@ -130,4 +127,31 @@ inline bool IsFolder(const std::string& path) return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode)); } -#endif //YOLOV8_CSRC_SEGMENT_INCLUDE_UTILS_H +namespace seg +{ + struct Binding + { + size_t size = 1; + size_t dsize = 1; + nvinfer1::Dims dims; + std::string name; + }; + + struct Object + { + cv::Rect_ rect; + int label = 0; + float prob = 0.0; + cv::Mat boxMask; + }; + + struct PreParam + { + float ratio = 1.0f; + float dw = 0.0f; + float dh = 0.0f; + float height = 0; + float width = 0; + }; +} +#endif //SEGMENT_COMMON_HPP diff --git a/csrc/segment/include/config.h b/csrc/segment/include/config.h deleted file mode 100644 index 7a4139f..0000000 --- a/csrc/segment/include/config.h +++ /dev/null @@ -1,107 +0,0 @@ -// -// Created by ubuntu on 1/16/23. -// - -#ifndef YOLOV8_TENSORRT_CSRC_SEGMENT_INCLUDE_CONFIG_H -#define YOLOV8_TENSORRT_CSRC_SEGMENT_INCLUDE_CONFIG_H -#include "opencv2/opencv.hpp" -namespace seg -{ - const int DEVICE = 0; - - const int INPUT_W = 640; - const int INPUT_H = 640; - const int NUM_INPUT = 1; - const int NUM_OUTPUT = 2; - const int NUM_PROPOSAL = 8400; // feature map 20*20+40*40+80*80 - const int NUM_SEG_C = 32; // seg channel - const int NUM_COLS = 6 + NUM_SEG_C; // x0 y0 x1 y1 score label 32 - - const int SEG_W = 160; - const int SEG_H = 160; - - // thresholds - const float CONF_THRES = 0.25; - const float IOU_THRES = 0.65; - const float MASK_THRES = 0.5; - - // distance - const float DIS = 7680.f; - - const int NUM_BINDINGS = NUM_INPUT + NUM_OUTPUT; - const cv::Scalar PAD_COLOR = { 114, 114, 114 }; - const cv::Scalar RECT_COLOR = cv::Scalar(0, 0, 255); - const cv::Scalar TXT_COLOR = cv::Scalar(255, 255, 255); - - const char* INPUT = "images"; - const char* OUTPUT = "outputs"; - const char* PROTO = "proto"; - - const char* CLASS_NAMES[] = { - "person", "bicycle", "car", "motorcycle", "airplane", "bus", - "train", "truck", "boat", "traffic light", "fire hydrant", - "stop sign", "parking meter", "bench", "bird", "cat", - "dog", "horse", "sheep", "cow", "elephant", - "bear", "zebra", "giraffe", "backpack", "umbrella", - "handbag", "tie", "suitcase", "frisbee", "skis", - "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", - "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", - "cup", "fork", "knife", "spoon", "bowl", - "banana", "apple", "sandwich", "orange", "broccoli", - "carrot", "hot dog", "pizza", "donut", "cake", - "chair", "couch", "potted plant", "bed", "dining table", - "toilet", "tv", "laptop", "mouse", "remote", - "keyboard", "cell phone", "microwave", "oven", - "toaster", "sink", "refrigerator", "book", "clock", "vase", - "scissors", "teddy bear", "hair drier", "toothbrush" }; - - const unsigned int COLORS[80][3] = { - { 0, 114, 189 }, { 217, 83, 25 }, { 237, 177, 32 }, - { 126, 47, 142 }, { 119, 172, 48 }, { 77, 190, 238 }, - { 162, 20, 47 }, { 76, 76, 76 }, { 153, 153, 153 }, - { 255, 0, 0 }, { 255, 128, 0 }, { 191, 191, 0 }, - { 0, 255, 0 }, { 0, 0, 255 }, { 170, 0, 255 }, - { 85, 85, 0 }, { 85, 170, 0 }, { 85, 255, 0 }, - { 170, 85, 0 }, { 170, 170, 0 }, { 170, 255, 0 }, - { 255, 85, 0 }, { 255, 170, 0 }, { 255, 255, 0 }, - { 0, 85, 128 }, { 0, 170, 128 }, { 0, 255, 128 }, - { 85, 0, 128 }, { 85, 85, 128 }, { 85, 170, 128 }, - { 85, 255, 128 }, { 170, 0, 128 }, { 170, 85, 128 }, - { 170, 170, 128 }, { 170, 255, 128 }, { 255, 0, 128 }, - { 255, 85, 128 }, { 255, 170, 128 }, { 255, 255, 128 }, - { 0, 85, 255 }, { 0, 170, 255 }, { 0, 255, 255 }, - { 85, 0, 255 }, { 85, 85, 255 }, { 85, 170, 255 }, - { 85, 255, 255 }, { 170, 0, 255 }, { 170, 85, 255 }, - { 170, 170, 255 }, { 170, 255, 255 }, { 255, 0, 255 }, - { 255, 85, 255 }, { 255, 170, 255 }, { 85, 0, 0 }, - { 128, 0, 0 }, { 170, 0, 0 }, { 212, 0, 0 }, - { 255, 0, 0 }, { 0, 43, 0 }, { 0, 85, 0 }, - { 0, 128, 0 }, { 0, 170, 0 }, { 0, 212, 0 }, - { 0, 255, 0 }, { 0, 0, 43 }, { 0, 0, 85 }, - { 0, 0, 128 }, { 0, 0, 170 }, { 0, 0, 212 }, - { 0, 0, 255 }, { 0, 0, 0 }, { 36, 36, 36 }, - { 73, 73, 73 }, { 109, 109, 109 }, { 146, 146, 146 }, - { 182, 182, 182 }, { 219, 219, 219 }, { 0, 114, 189 }, - { 80, 183, 189 }, { 128, 128, 0 } - }; - - const unsigned int MASK_COLORS[20][3] = { - { 255, 56, 56 }, { 255, 157, 151 }, { 255, 112, 31 }, - { 255, 178, 29 }, { 207, 210, 49 }, { 72, 249, 10 }, - { 146, 204, 23 }, { 61, 219, 134 }, { 26, 147, 52 }, - { 0, 212, 187 }, { 44, 153, 168 }, { 0, 194, 255 }, - { 52, 69, 147 }, { 100, 115, 255 }, { 0, 24, 236 }, - { 132, 56, 255 }, { 82, 0, 133 }, { 203, 56, 255 }, - { 255, 149, 200 }, { 255, 55, 199 } - }; - - struct Object - { - cv::Rect_ rect; - int label = 0; - float prob = 0.0; - cv::Mat boxMask; - }; - -} -#endif //YOLOV8_TENSORRT_CSRC_SEGMENT_INCLUDE_CONFIG_H diff --git a/csrc/segment/include/yolov8-seg.hpp b/csrc/segment/include/yolov8-seg.hpp index 2b941b1..d9e4ce4 100644 --- a/csrc/segment/include/yolov8-seg.hpp +++ b/csrc/segment/include/yolov8-seg.hpp @@ -1,9 +1,10 @@ // -// Created by ubuntu on 1/8/23. +// Created by ubuntu on 1/24/23. // -#include "config.h" -#include "utils.h" +#ifndef SEGMENT_YOLOV8_SEG_HPP +#define SEGMENT_YOLOV8_SEG_HPP #include +#include "common.hpp" #include "NvInferPlugin.h" using namespace seg; @@ -16,23 +17,44 @@ public: void make_pipe(bool warmup = true); void copy_from_Mat(const cv::Mat& image); + void copy_from_Mat(const cv::Mat& image, cv::Size& size); + void letterbox( + const cv::Mat& image, + cv::Mat& out, + cv::Size& size + ); void infer(); - void postprocess(std::vector& objs); - - size_t in_size = 1 * 3 * INPUT_W * INPUT_H; - float w = INPUT_W; - float h = INPUT_H; - float ratio = 1.0f; - float dw = 0.f; - float dh = 0.f; - std::array, NUM_OUTPUT> out_sizes{}; - std::array outputs{}; + void postprocess( + std::vector& objs, + float score_thres = 0.25f, + float iou_thres = 0.65f, + int topk = 100, + int seg_channels = 32, + int seg_h = 160, + int seg_w = 160 + ); + static void draw_objects( + const cv::Mat& image, + cv::Mat& res, + const std::vector& objs, + const std::vector& CLASS_NAMES, + const std::vector>& COLORS, + const std::vector>& MASK_COLORS + ); + int num_bindings; + int num_inputs = 0; + int num_outputs = 0; + std::vector input_bindings; + std::vector output_bindings; + std::vector host_ptrs; + std::vector device_ptrs; + + PreParam pparam; private: nvinfer1::ICudaEngine* engine = nullptr; nvinfer1::IRuntime* runtime = nullptr; nvinfer1::IExecutionContext* context = nullptr; cudaStream_t stream = nullptr; - std::array buffs{}; Logger gLogger{ nvinfer1::ILogger::Severity::kERROR }; }; @@ -43,8 +65,6 @@ YOLOv8_seg::YOLOv8_seg(const std::string& engine_file_path) assert(file.good()); file.seekg(0, std::ios::end); auto size = file.tellg(); - std::ostringstream fmt; - file.seekg(0, std::ios::beg); char* trtModelStream = new char[size]; assert(trtModelStream); @@ -61,6 +81,41 @@ YOLOv8_seg::YOLOv8_seg(const std::string& engine_file_path) assert(this->context != nullptr); cudaStreamCreate(&this->stream); + this->num_bindings = this->engine->getNbBindings(); + + for (int i = 0; i < this->num_bindings; ++i) + { + Binding binding; + nvinfer1::Dims dims; + nvinfer1::DataType dtype = this->engine->getBindingDataType(i); + std::string name = this->engine->getBindingName(i); + binding.name = name; + binding.dsize = type_to_size(dtype); + + bool IsInput = engine->bindingIsInput(i); + if (IsInput) + { + this->num_inputs += 1; + dims = this->engine->getProfileDimensions( + i, + 0, + nvinfer1::OptProfileSelector::kMAX); + binding.size = get_size_by_dims(dims); + binding.dims = dims; + this->input_bindings.push_back(binding); + // set max opt shape + this->context->setBindingDimensions(i, dims); + + } + else + { + dims = this->context->getBindingDimensions(i); + binding.size = get_size_by_dims(dims); + binding.dims = dims; + this->output_bindings.push_back(binding); + this->num_outputs += 1; + } + } } @@ -70,58 +125,67 @@ YOLOv8_seg::~YOLOv8_seg() this->engine->destroy(); this->runtime->destroy(); cudaStreamDestroy(this->stream); - for (auto& ptr : this->buffs) + for (auto& ptr : this->device_ptrs) { CHECK(cudaFree(ptr)); } - for (auto& ptr : this->outputs) + for (auto& ptr : this->host_ptrs) { CHECK(cudaFreeHost(ptr)); } - } + void YOLOv8_seg::make_pipe(bool warmup) { - const nvinfer1::Dims input_dims = this->engine->getBindingDimensions( - this->engine->getBindingIndex(INPUT) - ); - this->in_size = get_size_by_dims(input_dims); - CHECK(cudaMalloc(&this->buffs[0], this->in_size * sizeof(float))); - - this->context->setBindingDimensions(0, input_dims); - - const int32_t output_idx = this->engine->getBindingIndex(OUTPUT); - const nvinfer1::Dims output_dims = this->context->getBindingDimensions(output_idx); - this->out_sizes[output_idx - NUM_INPUT].first = get_size_by_dims(output_dims); - this->out_sizes[output_idx - NUM_INPUT].second = DataTypeToSize( - this->engine->getBindingDataType(output_idx)); - - const int32_t proto_idx = this->engine->getBindingIndex(PROTO); - const nvinfer1::Dims proto_dims = this->context->getBindingDimensions(proto_idx); - this->out_sizes[proto_idx - NUM_INPUT].first = get_size_by_dims(proto_dims); - this->out_sizes[proto_idx - NUM_INPUT].second = DataTypeToSize( - this->engine->getBindingDataType(proto_idx)); + for (auto& bindings : this->input_bindings) + { + void* d_ptr; + CHECK(cudaMallocAsync( + &d_ptr, + bindings.size * bindings.dsize, + this->stream) + ); + this->device_ptrs.push_back(d_ptr); + } - for (int i = 0; i < NUM_OUTPUT; i++) + for (auto& bindings : this->output_bindings) { - const int osize = this->out_sizes[i].first * out_sizes[i].second; - CHECK(cudaHostAlloc(&this->outputs[i], osize, 0)); - CHECK(cudaMalloc(&this->buffs[NUM_INPUT + i], osize)); + void* d_ptr, * h_ptr; + size_t size = bindings.size * bindings.dsize; + CHECK(cudaMallocAsync( + &d_ptr, + size, + this->stream) + ); + CHECK(cudaHostAlloc( + &h_ptr, + size, + 0) + ); + this->device_ptrs.push_back(d_ptr); + this->host_ptrs.push_back(h_ptr); } + if (warmup) { for (int i = 0; i < 10; i++) { - size_t isize = this->in_size * sizeof(float); - auto* tmp = new float[isize]; - - CHECK(cudaMemcpyAsync(this->buffs[0], - tmp, - isize, - cudaMemcpyHostToDevice, - this->stream)); + for (auto& bindings : this->input_bindings) + { + size_t size = bindings.size * bindings.dsize; + void* h_ptr = malloc(size); + memset(h_ptr, 0, size); + CHECK(cudaMemcpyAsync( + this->device_ptrs[0], + h_ptr, + size, + cudaMemcpyHostToDevice, + this->stream) + ); + free(h_ptr); + } this->infer(); } printf("model warmup 10 times\n"); @@ -129,158 +193,257 @@ void YOLOv8_seg::make_pipe(bool warmup) } } -void YOLOv8_seg::copy_from_Mat(const cv::Mat& image) +void YOLOv8_seg::letterbox( + const cv::Mat& image, + cv::Mat& out, + cv::Size& size +) { - float height = (float)image.rows; - float width = (float)image.cols; - - float r = std::min(INPUT_H / height, INPUT_W / width); + const float inp_h = size.height; + const float inp_w = size.width; + float height = image.rows; + float width = image.cols; - int padw = (int)std::round(width * r); - int padh = (int)std::round(height * r); + float r = std::min(inp_h / height, inp_w / width); + int padw = std::round(width * r); + int padh = std::round(height * r); cv::Mat tmp; if ((int)width != padw || (int)height != padh) { - cv::resize(image, tmp, cv::Size(padw, padh)); + cv::resize( + image, + tmp, + cv::Size(padw, padh) + ); } else { tmp = image.clone(); } - float _dw = INPUT_W - padw; - float _dh = INPUT_H - padh; + float dw = inp_w - padw; + float dh = inp_h - padh; - _dw /= 2.0f; - _dh /= 2.0f; - int top = int(std::round(_dh - 0.1f)); - int bottom = int(std::round(_dh + 0.1f)); - int left = int(std::round(_dw - 0.1f)); - int right = int(std::round(_dw + 0.1f)); - cv::copyMakeBorder(tmp, tmp, top, bottom, left, right, cv::BORDER_CONSTANT, PAD_COLOR); - cv::dnn::blobFromImage(tmp, + dw /= 2.0f; + dh /= 2.0f; + int top = int(std::round(dh - 0.1f)); + int bottom = int(std::round(dh + 0.1f)); + int left = int(std::round(dw - 0.1f)); + int right = int(std::round(dw + 0.1f)); + + cv::copyMakeBorder( tmp, + tmp, + top, + bottom, + left, + right, + cv::BORDER_CONSTANT, + { 114, 114, 114 } + ); + + cv::dnn::blobFromImage(tmp, + out, 1 / 255.f, cv::Size(), cv::Scalar(0, 0, 0), true, false, - CV_32F); - CHECK(cudaMemcpyAsync(this->buffs[0], - tmp.ptr(), - this->in_size * sizeof(float), + CV_32F + ); + this->pparam.ratio = 1 / r; + this->pparam.dw = dw; + this->pparam.dh = dh; + this->pparam.height = height; + this->pparam.width = width;; +} + +void YOLOv8_seg::copy_from_Mat(const cv::Mat& image) +{ + cv::Mat nchw; + auto& in_binding = this->input_bindings[0]; + auto width = in_binding.dims.d[3]; + auto height = in_binding.dims.d[2]; + cv::Size size{ width, height }; + this->letterbox( + image, + nchw, + size + ); + + this->context->setBindingDimensions( + 0, + nvinfer1::Dims + { + 4, + { 1, 3, height, width } + } + ); + + CHECK(cudaMemcpyAsync( + this->device_ptrs[0], + nchw.ptr(), + nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, - this->stream)); + this->stream) + ); +} - this->ratio = 1 / r; - this->dw = _dw; - this->dh = _dh; - this->w = width; - this->h = height; +void YOLOv8_seg::copy_from_Mat(const cv::Mat& image, cv::Size& size) +{ + cv::Mat nchw; + this->letterbox( + image, + nchw, + size + ); + this->context->setBindingDimensions( + 0, + nvinfer1::Dims + { 4, + { 1, 3, size.height, size.width } + } + ); + CHECK(cudaMemcpyAsync( + this->device_ptrs[0], + nchw.ptr(), + nchw.total() * nchw.elemSize(), + cudaMemcpyHostToDevice, + this->stream) + ); } void YOLOv8_seg::infer() { - this->context->enqueueV2(buffs.data(), this->stream, nullptr); - for (int i = 0; i < NUM_OUTPUT; i++) + + this->context->enqueueV2( + this->device_ptrs.data(), + this->stream, + nullptr + ); + for (int i = 0; i < this->num_outputs; i++) { - const int osize = this->out_sizes[i].first * out_sizes[i].second; - CHECK(cudaMemcpyAsync(this->outputs[i], - this->buffs[NUM_INPUT + i], + size_t osize = this->output_bindings[i].size * this->output_bindings[i].dsize; + CHECK(cudaMemcpyAsync(this->host_ptrs[i], + this->device_ptrs[i + this->num_inputs], osize, cudaMemcpyDeviceToHost, - this->stream)); + this->stream) + ); + } cudaStreamSynchronize(this->stream); } -void YOLOv8_seg::postprocess(std::vector& objs) +void YOLOv8_seg::postprocess(std::vector& objs, + float score_thres, + float iou_thres, + int topk, + int seg_channels, + int seg_h, + int seg_w +) { objs.clear(); - auto* output = static_cast(this->outputs[0]); // x0 y0 x1 y1 s l *32 - cv::Mat protos = cv::Mat(NUM_SEG_C, SEG_W * SEG_H, CV_32F, - static_cast(this->outputs[1])); + auto input_h = this->input_bindings[0].dims.d[2]; + auto input_w = this->input_bindings[0].dims.d[3]; + auto num_anchors = this->output_bindings[0].dims.d[1]; + auto num_channels = this->output_bindings[0].dims.d[2]; + + auto& dw = this->pparam.dw; + auto& dh = this->pparam.dh; + auto& width = this->pparam.width; + auto& height = this->pparam.height; + auto& ratio = this->pparam.ratio; + + auto* output = static_cast(this->host_ptrs[0]); + cv::Mat protos = cv::Mat(seg_channels, seg_h * seg_w, CV_32F, + static_cast(this->host_ptrs[1])); std::vector labels; std::vector scores; std::vector bboxes; std::vector mask_confs; + std::vector indices; - for (int i = 0; i < NUM_PROPOSAL; i++) + for (int i = 0; i < num_anchors; i++) { - float* ptr = output + i * NUM_COLS; + float* ptr = output + i * num_channels; float score = *(ptr + 4); - if (score > CONF_THRES) + if (score > score_thres) { - float x0 = *ptr++ - this->dw; - float y0 = *ptr++ - this->dh; - float x1 = *ptr++ - this->dw; - float y1 = *ptr++ - this->dh; + float x0 = *ptr++ - dw; + float y0 = *ptr++ - dh; + float x1 = *ptr++ - dw; + float y1 = *ptr++ - dh; - x0 = clamp(x0 * this->ratio, 0.f, this->w); - y0 = clamp(y0 * this->ratio, 0.f, this->h); - x1 = clamp(x1 * this->ratio, 0.f, this->w); - y1 = clamp(y1 * this->ratio, 0.f, this->h); + x0 = clamp(x0 * ratio, 0.f, width); + y0 = clamp(y0 * ratio, 0.f, height); + x1 = clamp(x1 * ratio, 0.f, width); + y1 = clamp(y1 * ratio, 0.f, height); int label = *(++ptr); - cv::Mat mask_conf = cv::Mat(1, NUM_SEG_C, CV_32F, ++ptr); + cv::Mat mask_conf = cv::Mat(1, seg_channels, CV_32F, ++ptr); mask_confs.push_back(mask_conf); labels.push_back(label); scores.push_back(score); - -#if defined(BATCHED_NMS) bboxes.push_back(cv::Rect_(x0, y0, x1 - x0, y1 - y0)); -#else - bboxes.push_back(cv::Rect_(x0 + label * DIS, - y0 + label * DIS, - x1 - x0, - y1 - y0)); -#endif + } } - std::vector indices; + #if defined(BATCHED_NMS) - cv::dnn::NMSBoxesBatched(bboxes, scores, labels, CONF_THRES, IOU_THRES, indices); + cv::dnn::NMSBoxesBatched( + bboxes, + scores, + labels, + score_thres, + iou_thres, + indices + ); #else - cv::dnn::NMSBoxes(bboxes, scores, CONF_THRES, IOU_THRES, indices); + cv::dnn::NMSBoxes( + bboxes, + scores, + score_thres, + iou_thres, + indices + ); #endif cv::Mat masks; - + int cnt = 0; for (auto& i : indices) { -#if defined(BATCHED_NMS) + if (cnt >= topk) + { + break; + } cv::Rect tmp = bboxes[i]; -#else - cv::Rect tmp = { (int)(bboxes[i].x - labels[i] * DIS), - (int)(bboxes[i].y - labels[i] * DIS), - bboxes[i].width, - bboxes[i].height }; -#endif - Object obj; obj.label = labels[i]; obj.rect = tmp; obj.prob = scores[i]; masks.push_back(mask_confs[i]); objs.push_back(obj); + cnt += 1; } cv::Mat matmulRes = (masks * protos).t(); - cv::Mat maskMat = matmulRes.reshape(indices.size(), { SEG_W, SEG_H }); + cv::Mat maskMat = matmulRes.reshape(indices.size(), { seg_w, seg_h }); std::vector maskChannels; cv::split(maskMat, maskChannels); - int scale_dw = this->dw / INPUT_W * SEG_W; - int scale_dh = this->dh / INPUT_H * SEG_H; + int scale_dw = dw / input_w * seg_w; + int scale_dh = dh / input_h * seg_h; cv::Rect roi( scale_dw, scale_dh, - SEG_W - 2 * scale_dw, - SEG_H - 2 * scale_dh); + seg_w - 2 * scale_dw, + seg_h - 2 * scale_dh); for (int i = 0; i < indices.size(); i++) { @@ -288,30 +451,64 @@ void YOLOv8_seg::postprocess(std::vector& objs) cv::exp(-maskChannels[i], dest); dest = 1.0 / (1.0 + dest); dest = dest(roi); - cv::resize(dest, mask, cv::Size((int)this->w, (int)this->h), cv::INTER_LINEAR); - objs[i].boxMask = mask(objs[i].rect) > MASK_THRES; + cv::resize( + dest, + mask, + cv::Size((int)width, (int)height), + cv::INTER_LINEAR + ); + objs[i].boxMask = mask(objs[i].rect) > 0.5f; } } -static void draw_objects(const cv::Mat& image, cv::Mat& res, const std::vector& objs) +void YOLOv8_seg::draw_objects(const cv::Mat& image, + cv::Mat& res, + const std::vector& objs, + const std::vector& CLASS_NAMES, + const std::vector>& COLORS, + const std::vector>& MASK_COLORS +) { res = image.clone(); cv::Mat mask = image.clone(); for (auto& obj : objs) { int idx = obj.label; - cv::Scalar color = cv::Scalar(COLORS[idx][0], COLORS[idx][1], COLORS[idx][2]); + cv::Scalar color = cv::Scalar( + COLORS[idx][0], + COLORS[idx][1], + COLORS[idx][2] + ); cv::Scalar mask_color = cv::Scalar( - MASK_COLORS[idx % 20][0], MASK_COLORS[idx % 20][1], MASK_COLORS[idx % 20][2]); - cv::rectangle(res, obj.rect, color, 2); + MASK_COLORS[idx % 20][0], + MASK_COLORS[idx % 20][1], + MASK_COLORS[idx % 20][2] + ); + cv::rectangle( + res, + obj.rect, + color, + 2 + ); char text[256]; - sprintf(text, "%s %.1f%%", CLASS_NAMES[idx], obj.prob * 100); + sprintf( + text, + "%s %.1f%%", + CLASS_NAMES[idx].c_str(), + obj.prob * 100 + ); mask(obj.rect).setTo(mask_color, obj.boxMask); int baseLine = 0; - cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine); + cv::Size label_size = cv::getTextSize( + text, + cv::FONT_HERSHEY_SIMPLEX, + 0.4, + 1, + &baseLine + ); int x = (int)obj.rect.x; int y = (int)obj.rect.y + 1; @@ -319,11 +516,30 @@ static void draw_objects(const cv::Mat& image, cv::Mat& res, const std::vector res.rows) y = res.rows; - cv::rectangle(res, cv::Rect(x, y, label_size.width, label_size.height + baseLine), RECT_COLOR, -1); - - cv::putText(res, text, cv::Point(x, y + label_size.height), - cv::FONT_HERSHEY_SIMPLEX, 0.4, TXT_COLOR, 1); + cv::rectangle( + res, + cv::Rect(x, y, label_size.width, label_size.height + baseLine), + { 0, 0, 255 }, + -1 + ); + + cv::putText( + res, + text, + cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, + 0.4, + { 255, 255, 255 }, + 1 + ); } - cv::addWeighted(res, 0.5, mask, 0.8, 1, res); - + cv::addWeighted( + res, + 0.5, + mask, + 0.8, + 1, + res + ); } +#endif //SEGMENT_YOLOV8_SEG_HPP diff --git a/csrc/segment/main.cpp b/csrc/segment/main.cpp index 00f5b36..2185dd1 100644 --- a/csrc/segment/main.cpp +++ b/csrc/segment/main.cpp @@ -1,60 +1,153 @@ // -// Created by ubuntu on 1/8/23. +// Created by ubuntu on 1/20/23. // -#include "include/yolov8-seg.hpp" +#include "chrono" +#include "yolov8-seg.hpp" +#include "opencv2/opencv.hpp" + +const std::vector CLASS_NAMES = { + "person", "bicycle", "car", "motorcycle", "airplane", "bus", + "train", "truck", "boat", "traffic light", "fire hydrant", + "stop sign", "parking meter", "bench", "bird", "cat", + "dog", "horse", "sheep", "cow", "elephant", + "bear", "zebra", "giraffe", "backpack", "umbrella", + "handbag", "tie", "suitcase", "frisbee", "skis", + "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", + "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", + "cup", "fork", "knife", "spoon", "bowl", + "banana", "apple", "sandwich", "orange", "broccoli", + "carrot", "hot dog", "pizza", "donut", "cake", + "chair", "couch", "potted plant", "bed", "dining table", + "toilet", "tv", "laptop", "mouse", "remote", + "keyboard", "cell phone", "microwave", "oven", + "toaster", "sink", "refrigerator", "book", "clock", "vase", + "scissors", "teddy bear", "hair drier", "toothbrush" }; + +const std::vector> COLORS = { + { 0, 114, 189 }, { 217, 83, 25 }, { 237, 177, 32 }, + { 126, 47, 142 }, { 119, 172, 48 }, { 77, 190, 238 }, + { 162, 20, 47 }, { 76, 76, 76 }, { 153, 153, 153 }, + { 255, 0, 0 }, { 255, 128, 0 }, { 191, 191, 0 }, + { 0, 255, 0 }, { 0, 0, 255 }, { 170, 0, 255 }, + { 85, 85, 0 }, { 85, 170, 0 }, { 85, 255, 0 }, + { 170, 85, 0 }, { 170, 170, 0 }, { 170, 255, 0 }, + { 255, 85, 0 }, { 255, 170, 0 }, { 255, 255, 0 }, + { 0, 85, 128 }, { 0, 170, 128 }, { 0, 255, 128 }, + { 85, 0, 128 }, { 85, 85, 128 }, { 85, 170, 128 }, + { 85, 255, 128 }, { 170, 0, 128 }, { 170, 85, 128 }, + { 170, 170, 128 }, { 170, 255, 128 }, { 255, 0, 128 }, + { 255, 85, 128 }, { 255, 170, 128 }, { 255, 255, 128 }, + { 0, 85, 255 }, { 0, 170, 255 }, { 0, 255, 255 }, + { 85, 0, 255 }, { 85, 85, 255 }, { 85, 170, 255 }, + { 85, 255, 255 }, { 170, 0, 255 }, { 170, 85, 255 }, + { 170, 170, 255 }, { 170, 255, 255 }, { 255, 0, 255 }, + { 255, 85, 255 }, { 255, 170, 255 }, { 85, 0, 0 }, + { 128, 0, 0 }, { 170, 0, 0 }, { 212, 0, 0 }, + { 255, 0, 0 }, { 0, 43, 0 }, { 0, 85, 0 }, + { 0, 128, 0 }, { 0, 170, 0 }, { 0, 212, 0 }, + { 0, 255, 0 }, { 0, 0, 43 }, { 0, 0, 85 }, + { 0, 0, 128 }, { 0, 0, 170 }, { 0, 0, 212 }, + { 0, 0, 255 }, { 0, 0, 0 }, { 36, 36, 36 }, + { 73, 73, 73 }, { 109, 109, 109 }, { 146, 146, 146 }, + { 182, 182, 182 }, { 219, 219, 219 }, { 0, 114, 189 }, + { 80, 183, 189 }, { 128, 128, 0 } +}; + +const std::vector> MASK_COLORS = { + { 255, 56, 56 }, { 255, 157, 151 }, { 255, 112, 31 }, + { 255, 178, 29 }, { 207, 210, 49 }, { 72, 249, 10 }, + { 146, 204, 23 }, { 61, 219, 134 }, { 26, 147, 52 }, + { 0, 212, 187 }, { 44, 153, 168 }, { 0, 194, 255 }, + { 52, 69, 147 }, { 100, 115, 255 }, { 0, 24, 236 }, + { 132, 56, 255 }, { 82, 0, 133 }, { 203, 56, 255 }, + { 255, 149, 200 }, { 255, 55, 199 } +}; + int main(int argc, char** argv) { - cudaSetDevice(DEVICE); + // cuda:0 + cudaSetDevice(0); const std::string engine_file_path{ argv[1] }; const std::string path{ argv[2] }; - std::vector imagePathList; + + std::vector imagePathList; bool isVideo{ false }; + + assert(argc == 3); + + auto yolov8 = new YOLOv8_seg(engine_file_path); + yolov8->make_pipe(true); + if (IsFile(path)) { std::string suffix = path.substr(path.find_last_of('.') + 1); - if (suffix == "jpg") + if ( + suffix == "jpg" || + suffix == "jpeg" || + suffix == "png" + ) { imagePathList.push_back(path); } - else if (suffix == "mp4") + else if ( + suffix == "mp4" || + suffix == "avi" || + suffix == "m4v" || + suffix == "mpeg" || + suffix == "mov" || + suffix == "mkv" + ) { isVideo = true; } + else + { + printf("suffix %s is wrong !!!\n", suffix.c_str()); + std::abort(); + } } else if (IsFolder(path)) { cv::glob(path + "/*.jpg", imagePathList); } - auto* yolov8 = new YOLOv8_seg(engine_file_path); - yolov8->make_pipe(true); - cv::Mat res; + cv::Mat res, image; + cv::Size size = cv::Size{ 640, 640 }; + int topk = 100; + int seg_h = 160; + int seg_w = 160; + int seg_channels = 32; + float score_thres = 0.25f; + float iou_thres = 0.65f; + + std::vector objs; + cv::namedWindow("result", cv::WINDOW_AUTOSIZE); + if (isVideo) { cv::VideoCapture cap(path); - cv::Mat image; + if (!cap.isOpened()) { - printf("can not open ...\n"); + printf("can not open %s\n", path.c_str()); return -1; } - double fp_ = cap.get(cv::CAP_PROP_FPS); - int fps = round(1000.0 / fp_); while (cap.read(image)) { + objs.clear(); + yolov8->copy_from_Mat(image, size); auto start = std::chrono::system_clock::now(); - yolov8->copy_from_Mat(image); yolov8->infer(); - std::vector objs; - yolov8->postprocess(objs); - draw_objects(image, res, objs); auto end = std::chrono::system_clock::now(); - auto tc = std::chrono::duration_cast(end - start).count() / 1000.f; + yolov8->postprocess(objs, score_thres, iou_thres, topk, seg_channels, seg_h, seg_w); + yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS, MASK_COLORS); + auto tc = (double) + std::chrono::duration_cast(end - start).count() / 1000.; + printf("cost %2.4lf ms\n", tc); cv::imshow("result", res); - printf("cost %2.4f ms\n", tc); - if (cv::waitKey(fps) == 'q') + if (cv::waitKey(10) == 'q') { break; } @@ -62,20 +155,19 @@ int main(int argc, char** argv) } else { - for (auto path : imagePathList) + for (auto& path : imagePathList) { - cv::Mat image = cv::imread(path); - yolov8->copy_from_Mat(image); + objs.clear(); + image = cv::imread(path); + yolov8->copy_from_Mat(image, size); auto start = std::chrono::system_clock::now(); yolov8->infer(); auto end = std::chrono::system_clock::now(); - auto tc = std::chrono::duration_cast(end - start).count() / 1000.f; - - printf("infer %-20s\tcost %2.4f ms\n", path.c_str(), tc); - - std::vector objs; - yolov8->postprocess(objs); - draw_objects(image, res, objs); + yolov8->postprocess(objs, score_thres, iou_thres, topk, seg_channels, seg_h, seg_w); + yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS, MASK_COLORS); + auto tc = (double) + std::chrono::duration_cast(end - start).count() / 1000.; + printf("cost %2.4lf ms\n", tc); cv::imshow("result", res); cv::waitKey(0); } diff --git a/docs/Segment.md b/docs/Segment.md index f8dba62..8aef1de 100644 --- a/docs/Segment.md +++ b/docs/Segment.md @@ -100,7 +100,16 @@ You can infer segment engine with c++ in [`csrc/segment`](../csrc/segment) . ### Build: -Please set you own librarys in [`CMakeLists.txt`](../csrc/segment/CMakeLists.txt) and modify you own config in [`config.h`](../csrc/segment/include/config.h) such as `CLASS_NAMES`, `COLORS` and others . +Please set you own librarys in [`CMakeLists.txt`](../csrc/segment/CMakeLists.txt) and modify you own config in [`main.cpp`](../csrc/segment/main.cpp) such as `CLASS_NAMES`, `COLORS`, `MASK_COLORS` and postprocess parameters . + +```c++ +int topk = 100; +int seg_h = 160; // yolov8 model proto height +int seg_w = 160; // yolov8 model proto width +int seg_channels = 32; // yolov8 model proto channels +float score_thres = 0.25f; +float iou_thres = 0.65f; +``` ``` shell export root=${PWD}