Seg model rebuild

2 years ago · 1478775411
parent 303c0ae8bb
commit 1478775411
15 changed files with 546 additions and 927 deletions
--- a/csrc/detect/end2end/include/common.hpp
+++ b/csrc/detect/end2end/include/common.hpp
@ -2,8 +2,8 @@
 // Created by ubuntu on 1/24/23.
 //

-#ifndef DETECTION_END2END_COMMON_HPP
-#define DETECTION_END2END_COMMON_HPP
+#ifndef DETECT_END2END_COMMON_HPP
+#define DETECT_END2END_COMMON_HPP
 #include "opencv2/opencv.hpp"
 #include <sys/stat.h>
 #include <unistd.h>
@ -153,4 +153,4 @@ namespace det
 		float width = 0;
 	};
 }
-#endif //DETECTION_END2END_COMMON_HPP
+#endif //DETECT_END2END_COMMON_HPP
--- a/csrc/detect/end2end/include/yolov8.hpp
+++ b/csrc/detect/end2end/include/yolov8.hpp
@ -1,6 +1,8 @@
 //
 // Created by ubuntu on 1/20/23.
 //
+#ifndef DETECT_END2END_YOLOV8_HPP
+#define DETECT_END2END_YOLOV8_HPP
 #include "fstream"
 #include "common.hpp"
 #include "NvInferPlugin.h"
@ -421,3 +423,4 @@ void YOLOv8::draw_objects(
 		);
 	}
 }
+#endif //DETECT_END2END_YOLOV8_HPP
--- a/csrc/detect/normal/include/common.hpp
+++ b/csrc/detect/normal/include/common.hpp
@ -2,8 +2,8 @@
 // Created by ubuntu on 1/24/23.
 //

-#ifndef DETECTION_NORMAL_COMMON_HPP
-#define DETECTION_NORMAL_COMMON_HPP
+#ifndef DETECT_NORMAL_COMMON_HPP
+#define DETECT_NORMAL_COMMON_HPP
 #include "opencv2/opencv.hpp"
 #include <sys/stat.h>
 #include <unistd.h>
@ -153,4 +153,4 @@ namespace det
 		float width = 0;
 	};
 }
-#endif //DETECTION_NORMAL_COMMON_HPP
+#endif //DETECT_NORMAL_COMMON_HPP
--- a/csrc/detect/normal/include/yolov8.hpp
+++ b/csrc/detect/normal/include/yolov8.hpp
@ -1,6 +1,8 @@
 //
 // Created by ubuntu on 1/20/23.
 //
+#ifndef DETECT_NORMAL_YOLOV8_HPP
+#define DETECT_NORMAL_YOLOV8_HPP
 #include "fstream"
 #include "common.hpp"
 #include "NvInferPlugin.h"
@ -187,7 +189,11 @@ void YOLOv8::make_pipe(bool warmup)
 	}
 }

-void YOLOv8::letterbox(const cv::Mat& image, cv::Mat& out, cv::Size& size)
+void YOLOv8::letterbox(
+	const cv::Mat& image,
+	cv::Mat& out,
+	cv::Size& size
+)
 {
 	const float inp_h = size.height;
 	const float inp_w = size.width;
@ -489,3 +495,4 @@ void YOLOv8::draw_objects(
 		);
 	}
 }
+#endif //DETECT_NORMAL_YOLOV8_HPP
--- a/csrc/detection/CMakeLists.txt
+++ b/csrc/detection/CMakeLists.txt
@ -1,55 +0,0 @@
-cmake_minimum_required(VERSION 2.8.12)
-
-set(CMAKE_CUDA_ARCHITECTURES 60 61 62 70 72 75 86)
-set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)
-
-project(yolov8 LANGUAGES CXX CUDA)
-
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -O3 -g")
-set(CMAKE_CXX_STANDARD 14)
-set(CMAKE_BUILD_TYPE Release)
-option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
-
-# CUDA
-find_package(CUDA REQUIRED)
-message(STATUS "CUDA Libs: \n${CUDA_LIBRARIES}\n")
-message(STATUS "CUDA Headers: \n${CUDA_INCLUDE_DIRS}\n")
-
-# OpenCV
-find_package(OpenCV REQUIRED)
-message(STATUS "OpenCV Libs: \n${OpenCV_LIBS}\n")
-message(STATUS "OpenCV Libraries: \n${OpenCV_LIBRARIES}\n")
-message(STATUS "OpenCV Headers: \n${OpenCV_INCLUDE_DIRS}\n")
-
-# TensorRT
-set(TensorRT_INCLUDE_DIRS /usr/include/x86_64-linux-gnu)
-set(TensorRT_LIBRARIES /usr/lib/x86_64-linux-gnu)
-
-
-message(STATUS "TensorRT Libs: \n${TensorRT_LIBRARIES}\n")
-message(STATUS "TensorRT Headers: \n${TensorRT_INCLUDE_DIRS}\n")
-
-list(APPEND INCLUDE_DIRS
-        ${CUDA_INCLUDE_DIRS}
-        ${OpenCV_INCLUDE_DIRS}
-        ${TensorRT_INCLUDE_DIRS}
-        ./include
-        )
-
-list(APPEND ALL_LIBS
-        ${CUDA_LIBRARIES}
-        ${OpenCV_LIBRARIES}
-        ${TensorRT_LIBRARIES}
-        )
-
-include_directories(${INCLUDE_DIRS})
-
-add_executable(${PROJECT_NAME}
-        main.cpp
-        include/yolov8.hpp
-        include/config.h
-        include/utils.h
-        )
-
-target_link_directories(${PROJECT_NAME} PUBLIC ${ALL_LIBS})
-target_link_libraries(${PROJECT_NAME} PRIVATE nvinfer nvinfer_plugin cudart ${OpenCV_LIBS})
--- a/csrc/detection/include/config.h
+++ b/csrc/detection/include/config.h
@ -1,84 +0,0 @@
-//
-// Created by ubuntu on 1/10/23.
-//
-
-#ifndef YOLOV8_CSRC_DETECT_INCLUDE_CONFIG_H
-#define YOLOV8_CSRC_DETECT_INCLUDE_CONFIG_H
-#include "opencv2/opencv.hpp"
-namespace det
-{
-	const int DEVICE = 0;
-
-	const int INPUT_W = 640;
-	const int INPUT_H = 640;
-	const int NUM_INPUT = 1;
-	const int NUM_OUTPUT = 4;
-
-	const int NUM_BINDINGS = NUM_INPUT + NUM_OUTPUT;
-	const cv::Scalar PAD_COLOR = { 114, 114, 114 };
-	const cv::Scalar RECT_COLOR = cv::Scalar(0, 0, 255);
-	const cv::Scalar TXT_COLOR = cv::Scalar(255, 255, 255);
-
-	const char* INPUT = "images";
-	const char* NUM_DETS = "num_dets";
-	const char* BBOXES = "bboxes";
-	const char* SCORES = "scores";
-	const char* LABELS = "labels";
-
-	const char* CLASS_NAMES[] = {
-		"person", "bicycle", "car", "motorcycle", "airplane", "bus",
-		"train", "truck", "boat", "traffic light", "fire hydrant",
-		"stop sign", "parking meter", "bench", "bird", "cat",
-		"dog", "horse", "sheep", "cow", "elephant",
-		"bear", "zebra", "giraffe", "backpack", "umbrella",
-		"handbag", "tie", "suitcase", "frisbee", "skis",
-		"snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
-		"skateboard", "surfboard", "tennis racket", "bottle", "wine glass",
-		"cup", "fork", "knife", "spoon", "bowl",
-		"banana", "apple", "sandwich", "orange", "broccoli",
-		"carrot", "hot dog", "pizza", "donut", "cake",
-		"chair", "couch", "potted plant", "bed", "dining table",
-		"toilet", "tv", "laptop", "mouse", "remote",
-		"keyboard", "cell phone", "microwave", "oven",
-		"toaster", "sink", "refrigerator", "book", "clock", "vase",
-		"scissors", "teddy bear", "hair drier", "toothbrush" };
-
-	const unsigned int COLORS[80][3] = {
-		{ 0, 114, 189 }, { 217, 83, 25 }, { 237, 177, 32 },
-		{ 126, 47, 142 }, { 119, 172, 48 }, { 77, 190, 238 },
-		{ 162, 20, 47 }, { 76, 76, 76 }, { 153, 153, 153 },
-		{ 255, 0, 0 }, { 255, 128, 0 }, { 191, 191, 0 },
-		{ 0, 255, 0 }, { 0, 0, 255 }, { 170, 0, 255 },
-		{ 85, 85, 0 }, { 85, 170, 0 }, { 85, 255, 0 },
-		{ 170, 85, 0 }, { 170, 170, 0 }, { 170, 255, 0 },
-		{ 255, 85, 0 }, { 255, 170, 0 }, { 255, 255, 0 },
-		{ 0, 85, 128 }, { 0, 170, 128 }, { 0, 255, 128 },
-		{ 85, 0, 128 }, { 85, 85, 128 }, { 85, 170, 128 },
-		{ 85, 255, 128 }, { 170, 0, 128 }, { 170, 85, 128 },
-		{ 170, 170, 128 }, { 170, 255, 128 }, { 255, 0, 128 },
-		{ 255, 85, 128 }, { 255, 170, 128 }, { 255, 255, 128 },
-		{ 0, 85, 255 }, { 0, 170, 255 }, { 0, 255, 255 },
-		{ 85, 0, 255 }, { 85, 85, 255 }, { 85, 170, 255 },
-		{ 85, 255, 255 }, { 170, 0, 255 }, { 170, 85, 255 },
-		{ 170, 170, 255 }, { 170, 255, 255 }, { 255, 0, 255 },
-		{ 255, 85, 255 }, { 255, 170, 255 }, { 85, 0, 0 },
-		{ 128, 0, 0 }, { 170, 0, 0 }, { 212, 0, 0 },
-		{ 255, 0, 0 }, { 0, 43, 0 }, { 0, 85, 0 },
-		{ 0, 128, 0 }, { 0, 170, 0 }, { 0, 212, 0 },
-		{ 0, 255, 0 }, { 0, 0, 43 }, { 0, 0, 85 },
-		{ 0, 0, 128 }, { 0, 0, 170 }, { 0, 0, 212 },
-		{ 0, 0, 255 }, { 0, 0, 0 }, { 36, 36, 36 },
-		{ 73, 73, 73 }, { 109, 109, 109 }, { 146, 146, 146 },
-		{ 182, 182, 182 }, { 219, 219, 219 }, { 0, 114, 189 },
-		{ 80, 183, 189 }, { 128, 128, 0 }
-	};
-
-	struct Object
-	{
-		cv::Rect_<float> rect;
-		int label = 0;
-		float prob = 0.0;
-	};
-
-}
-#endif //YOLOV8_CSRC_DETECT_INCLUDE_CONFIG_H
--- a/csrc/detection/include/utils.h
+++ b/csrc/detection/include/utils.h
@ -1,133 +0,0 @@
-//
-// Created by ubuntu on 1/10/23.
-//
-
-#ifndef YOLOV8_CSRC_DETECT_INCLUDE_UTILS_H
-#define YOLOV8_CSRC_DETECT_INCLUDE_UTILS_H
-#include <sys/stat.h>
-#include <iostream>
-#include <string>
-#include <assert.h>
-#include <unistd.h>
-#include "NvInfer.h"
-
-#define CHECK(call)                                   \
-do                                                    \
-{                                                     \
-    const cudaError_t error_code = call;              \
-    if (error_code != cudaSuccess)                    \
-    {                                                 \
-        printf("CUDA Error:\n");                      \
-        printf("    File:       %s\n", __FILE__);     \
-        printf("    Line:       %d\n", __LINE__);     \
-        printf("    Error code: %d\n", error_code);   \
-        printf("    Error text: %s\n",                \
-            cudaGetErrorString(error_code));          \
-        exit(1);                                      \
-    }                                                 \
-} while (0)
-
-class Logger : public nvinfer1::ILogger
-{
-public:
-	nvinfer1::ILogger::Severity reportableSeverity;
-
-	explicit Logger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kINFO) :
-		reportableSeverity(severity)
-	{
-	}
-
-	void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override
-	{
-		if (severity > reportableSeverity)
-		{
-			return;
-		}
-		switch (severity)
-		{
-		case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
-			std::cerr << "INTERNAL_ERROR: ";
-			break;
-		case nvinfer1::ILogger::Severity::kERROR:
-			std::cerr << "ERROR: ";
-			break;
-		case nvinfer1::ILogger::Severity::kWARNING:
-			std::cerr << "WARNING: ";
-			break;
-		case nvinfer1::ILogger::Severity::kINFO:
-			std::cerr << "INFO: ";
-			break;
-		default:
-			std::cerr << "VERBOSE: ";
-			break;
-		}
-		std::cerr << msg << std::endl;
-	}
-};
-
-inline int get_size_by_dims(const nvinfer1::Dims& dims)
-{
-	int size = 1;
-	for (int i = 0; i < dims.nbDims; i++)
-	{
-		size *= dims.d[i];
-	}
-	return size;
-}
-
-inline int DataTypeToSize(const nvinfer1::DataType& dataType)
-{
-	switch (dataType)
-	{
-	case nvinfer1::DataType::kFLOAT:
-		return sizeof(float);
-	case nvinfer1::DataType::kHALF:
-		return 2;
-	case nvinfer1::DataType::kINT8:
-		return sizeof(int8_t);
-	case nvinfer1::DataType::kINT32:
-		return sizeof(int32_t);
-	case nvinfer1::DataType::kBOOL:
-		return sizeof(bool);
-	default:
-		return sizeof(float);
-	}
-}
-
-inline float clamp(const float val, const float minVal = 0.f, const float maxVal = 1280.f)
-{
-	assert(minVal <= maxVal);
-	return std::min(maxVal, std::max(minVal, val));
-}
-
-inline bool IsPathExist(const std::string& path)
-{
-	if (access(path.c_str(), 0) == F_OK)
-	{
-		return true;
-	}
-	return false;
-}
-
-inline bool IsFile(const std::string& path)
-{
-	if (!IsPathExist(path))
-	{
-		printf("%s:%d %s not exist\n", __FILE__, __LINE__, path.c_str());
-		return false;
-	}
-	struct stat buffer;
-	return (stat(path.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
-}
-
-inline bool IsFolder(const std::string& path)
-{
-	if (!IsPathExist(path))
-	{
-		return false;
-	}
-	struct stat buffer;
-	return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode));
-}
-
-#endif //YOLOV8_CSRC_DETECT_INCLUDE_UTILS_H
--- a/csrc/detection/include/yolov8.hpp
+++ b/csrc/detection/include/yolov8.hpp
@ -1,266 +0,0 @@
-//
-// Created by ubuntu on 1/8/23.
-//
-#include "config.h"
-#include "utils.h"
-#include <fstream>
-#include "NvInferPlugin.h"
-
-using namespace det;
-
-class YOLOv8
-{
-public:
-	explicit YOLOv8(const std::string& engine_file_path);
-	~YOLOv8();
-
-	void make_pipe(bool warmup = true);
-	void copy_from_Mat(const cv::Mat& image);
-	void infer();
-	void postprocess(std::vector<Object>& objs);
-
-	size_t in_size = 1 * 3 * INPUT_W * INPUT_H;
-	float w = INPUT_W;
-	float h = INPUT_H;
-	float ratio = 1.0f;
-	float dw = 0.f;
-	float dh = 0.f;
-	std::array<std::pair<int, int>, NUM_OUTPUT> out_sizes{};
-	std::array<void*, NUM_OUTPUT> outputs{};
-private:
-	nvinfer1::ICudaEngine* engine = nullptr;
-	nvinfer1::IRuntime* runtime = nullptr;
-	nvinfer1::IExecutionContext* context = nullptr;
-	cudaStream_t stream = nullptr;
-	std::array<void*, NUM_BINDINGS> buffs{};
-	Logger gLogger{ nvinfer1::ILogger::Severity::kERROR };
-
-};
-
-YOLOv8::YOLOv8(const std::string& engine_file_path)
-{
-	std::ifstream file(engine_file_path, std::ios::binary);
-	assert(file.good());
-	file.seekg(0, std::ios::end);
-	auto size = file.tellg();
-	std::ostringstream fmt;
-
-	file.seekg(0, std::ios::beg);
-	char* trtModelStream = new char[size];
-	assert(trtModelStream);
-	file.read(trtModelStream, size);
-	file.close();
-	initLibNvInferPlugins(&this->gLogger, "");
-	this->runtime = nvinfer1::createInferRuntime(this->gLogger);
-	assert(this->runtime != nullptr);
-
-	this->engine = this->runtime->deserializeCudaEngine(trtModelStream, size);
-	assert(this->engine != nullptr);
-
-	this->context = this->engine->createExecutionContext();
-
-	assert(this->context != nullptr);
-	cudaStreamCreate(&this->stream);
-
-}
-
-YOLOv8::~YOLOv8()
-{
-	this->context->destroy();
-	this->engine->destroy();
-	this->runtime->destroy();
-	cudaStreamDestroy(this->stream);
-	for (auto& ptr : this->buffs)
-	{
-		CHECK(cudaFree(ptr));
-	}
-
-	for (auto& ptr : this->outputs)
-	{
-		CHECK(cudaFreeHost(ptr));
-	}
-
-}
-void YOLOv8::make_pipe(bool warmup)
-{
-	const nvinfer1::Dims input_dims = this->engine->getBindingDimensions(
-		this->engine->getBindingIndex(INPUT)
-	);
-	this->in_size = get_size_by_dims(input_dims);
-	CHECK(cudaMalloc(&this->buffs[0], this->in_size * sizeof(float)));
-
-	this->context->setBindingDimensions(0, input_dims);
-	const int32_t num_dets_idx = this->engine->getBindingIndex(NUM_DETS);
-	const nvinfer1::Dims num_dets_dims = this->context->getBindingDimensions(num_dets_idx);
-	this->out_sizes[num_dets_idx - NUM_INPUT].first = get_size_by_dims(num_dets_dims);
-	this->out_sizes[num_dets_idx - NUM_INPUT].second = DataTypeToSize(
-		this->engine->getBindingDataType(num_dets_idx));
-
-	const int32_t bboxes_idx = this->engine->getBindingIndex(BBOXES);
-	const nvinfer1::Dims bboxes_dims = this->context->getBindingDimensions(bboxes_idx);
-
-	this->out_sizes[bboxes_idx - NUM_INPUT].first = get_size_by_dims(bboxes_dims);
-	this->out_sizes[bboxes_idx - NUM_INPUT].second = DataTypeToSize(
-		this->engine->getBindingDataType(bboxes_idx));
-
-	const int32_t scores_idx = this->engine->getBindingIndex(SCORES);
-	const nvinfer1::Dims scores_dims = this->context->getBindingDimensions(scores_idx);
-	this->out_sizes[scores_idx - NUM_INPUT].first = get_size_by_dims(scores_dims);
-	this->out_sizes[scores_idx - NUM_INPUT].second = DataTypeToSize(
-		this->engine->getBindingDataType(scores_idx));
-
-	const int32_t labels_idx = this->engine->getBindingIndex(LABELS);
-	const nvinfer1::Dims labels_dims = this->context->getBindingDimensions(labels_idx);
-	this->out_sizes[labels_idx - NUM_INPUT].first = get_size_by_dims(labels_dims);
-	this->out_sizes[labels_idx - NUM_INPUT].second = DataTypeToSize(
-		this->engine->getBindingDataType(labels_idx));
-
-	for (int i = 0; i < NUM_OUTPUT; i++)
-	{
-		const int osize = this->out_sizes[i].first * out_sizes[i].second;
-		CHECK(cudaHostAlloc(&this->outputs[i], osize, 0));
-		CHECK(cudaMalloc(&this->buffs[NUM_INPUT + i], osize));
-	}
-	if (warmup)
-	{
-		for (int i = 0; i < 10; i++)
-		{
-			size_t isize = this->in_size * sizeof(float);
-			auto* tmp = new float[isize];
-
-			CHECK(cudaMemcpyAsync(this->buffs[0],
-				tmp,
-				isize,
-				cudaMemcpyHostToDevice,
-				this->stream));
-			this->infer();
-		}
-		printf("model warmup 10 times\n");
-
-	}
-}
-
-void YOLOv8::copy_from_Mat(const cv::Mat& image)
-{
-	float height = (float)image.rows;
-	float width = (float)image.cols;
-
-	float r = std::min(INPUT_H / height, INPUT_W / width);
-
-	int padw = (int)std::round(width * r);
-	int padh = (int)std::round(height * r);
-
-	cv::Mat tmp;
-	if ((int)width != padw || (int)height != padh)
-	{
-		cv::resize(image, tmp, cv::Size(padw, padh));
-	}
-	else
-	{
-		tmp = image.clone();
-	}
-
-	float _dw = INPUT_W - padw;
-	float _dh = INPUT_H - padh;
-
-	_dw /= 2.0f;
-	_dh /= 2.0f;
-	int top = int(std::round(_dh - 0.1f));
-	int bottom = int(std::round(_dh + 0.1f));
-	int left = int(std::round(_dw - 0.1f));
-	int right = int(std::round(_dw + 0.1f));
-	cv::copyMakeBorder(tmp, tmp, top, bottom, left, right, cv::BORDER_CONSTANT, PAD_COLOR);
-	cv::dnn::blobFromImage(tmp,
-		tmp,
-		1 / 255.f,
-		cv::Size(),
-		cv::Scalar(0, 0, 0),
-		true,
-		false,
-		CV_32F);
-	CHECK(cudaMemcpyAsync(this->buffs[0],
-		tmp.ptr<float>(),
-		this->in_size * sizeof(float),
-		cudaMemcpyHostToDevice,
-		this->stream));
-
-	this->ratio = 1 / r;
-	this->dw = _dw;
-	this->dh = _dh;
-	this->w = width;
-	this->h = height;
-}
-
-void YOLOv8::infer()
-{
-	this->context->enqueueV2(buffs.data(), this->stream, nullptr);
-	for (int i = 0; i < NUM_OUTPUT; i++)
-	{
-		const int osize = this->out_sizes[i].first * out_sizes[i].second;
-		CHECK(cudaMemcpyAsync(this->outputs[i],
-			this->buffs[NUM_INPUT + i],
-			osize,
-			cudaMemcpyDeviceToHost,
-			this->stream));
-	}
-	cudaStreamSynchronize(this->stream);
-
-}
-
-void YOLOv8::postprocess(std::vector<Object>& objs)
-{
-	int* num_dets = static_cast<int*>(this->outputs[0]);
-	auto* boxes = static_cast<float*>(this->outputs[1]);
-	auto* scores = static_cast<float*>(this->outputs[2]);
-	int* labels = static_cast<int*>(this->outputs[3]);
-	for (int i = 0; i < num_dets[0]; i++)
-	{
-		float* ptr = boxes + i * 4;
-		Object obj;
-		float x0 = *ptr++ - this->dw;
-		float y0 = *ptr++ - this->dh;
-		float x1 = *ptr++ - this->dw;
-		float y1 = *ptr++ - this->dh;
-
-		x0 = clamp(x0 * this->ratio, 0.f, this->w);
-		y0 = clamp(y0 * this->ratio, 0.f, this->h);
-		x1 = clamp(x1 * this->ratio, 0.f, this->w);
-		y1 = clamp(y1 * this->ratio, 0.f, this->h);
-		obj.rect.x = x0;
-		obj.rect.y = y0;
-		obj.rect.width = x1 - x0;
-		obj.rect.height = y1 - y0;
-		obj.prob = *(scores + i);
-		obj.label = *(labels + i);
-
-		objs.push_back(obj);
-
-	}
-}
-
-static void draw_objects(const cv::Mat& image, cv::Mat& res, const std::vector<Object>& objs)
-{
-	res = image.clone();
-	for (auto& obj : objs)
-	{
-		cv::Scalar color = cv::Scalar(COLORS[obj.label][0], COLORS[obj.label][1], COLORS[obj.label][2]);
-		cv::rectangle(res, obj.rect, color, 2);
-
-		char text[256];
-		sprintf(text, "%s %.1f%%", CLASS_NAMES[obj.label], obj.prob * 100);
-
-		int baseLine = 0;
-		cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
-
-		int x = (int)obj.rect.x;
-		int y = (int)obj.rect.y + 1;
-
-		if (y > res.rows)
-			y = res.rows;
-
-		cv::rectangle(res, cv::Rect(x, y, label_size.width, label_size.height + baseLine), RECT_COLOR, -1);
-
-		cv::putText(res, text, cv::Point(x, y + label_size.height),
-			cv::FONT_HERSHEY_SIMPLEX, 0.4, TXT_COLOR, 1);
-	}
-}
--- a/csrc/detection/main.cpp
+++ b/csrc/detection/main.cpp
@ -1,86 +0,0 @@
-//
-// Created by ubuntu on 1/8/23.
-//
-#include "include/yolov8.hpp"
-int main(int argc, char** argv)
-{
-	cudaSetDevice(DEVICE);
-
-	const std::string engine_file_path{ argv[1] };
-	const std::string path{ argv[2] };
-	std::vector<cv::String> imagePathList;
-	bool isVideo{ false };
-	if (IsFile(path))
-	{
-		std::string suffix = path.substr(path.find_last_of('.') + 1);
-		if (suffix == "jpg")
-		{
-			imagePathList.push_back(path);
-		}
-		else if (suffix == "mp4")
-		{
-			isVideo = true;
-		}
-	}
-	else if (IsFolder(path))
-	{
-		cv::glob(path + "/*.jpg", imagePathList);
-	}
-
-	auto* yolov8 = new YOLOv8(engine_file_path);
-	yolov8->make_pipe(true);
-	cv::Mat res;
-	cv::namedWindow("result", cv::WINDOW_AUTOSIZE);
-	if (isVideo)
-	{
-		cv::VideoCapture cap(path);
-		cv::Mat image;
-		if (!cap.isOpened())
-		{
-			printf("can not open ...\n");
-			return -1;
-		}
-		double fp_ = cap.get(cv::CAP_PROP_FPS);
-		int fps = round(1000.0 / fp_);
-		while (cap.read(image))
-		{
-			auto start = std::chrono::system_clock::now();
-			yolov8->copy_from_Mat(image);
-			yolov8->infer();
-			std::vector<Object> objs;
-			yolov8->postprocess(objs);
-			draw_objects(image, res, objs);
-			auto end = std::chrono::system_clock::now();
-			auto tc = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.f;
-			cv::imshow("result", res);
-			printf("cost %2.4f ms\n", tc);
-			if (cv::waitKey(fps) == 'q')
-			{
-				break;
-			}
-		}
-	}
-	else
-	{
-		for (auto path : imagePathList)
-		{
-			cv::Mat image = cv::imread(path);
-			yolov8->copy_from_Mat(image);
-			auto start = std::chrono::system_clock::now();
-			yolov8->infer();
-			auto end = std::chrono::system_clock::now();
-			auto tc = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.f;
-
-			printf("infer %-20s\tcost %2.4f ms\n", path.c_str(), tc);
-
-			std::vector<Object> objs;
-			yolov8->postprocess(objs);
-			draw_objects(image, res, objs);
-			cv::imshow("result", res);
-			cv::waitKey(0);
-		}
-	}
-	cv::destroyAllWindows();
-	delete yolov8;
-	return 0;
-}
--- a/csrc/segment/CMakeLists.txt
+++ b/csrc/segment/CMakeLists.txt
@ -47,8 +47,7 @@ include_directories(${INCLUDE_DIRS})
 add_executable(${PROJECT_NAME}
        main.cpp
        include/yolov8-seg.hpp
-        include/config.h
-        include/utils.h
+        include/common.hpp
        )

 target_link_directories(${PROJECT_NAME} PUBLIC ${ALL_LIBS})
--- a/csrc/segment/include/common.hpp
+++ b/csrc/segment/include/common.hpp
@ -1,13 +1,11 @@
 //
-// Created by ubuntu on 1/10/23.
+// Created by ubuntu on 1/24/23.
 //

-#ifndef YOLOV8_CSRC_SEGMENT_INCLUDE_UTILS_H
-#define YOLOV8_CSRC_SEGMENT_INCLUDE_UTILS_H
+#ifndef SEGMENT_COMMON_HPP
+#define SEGMENT_COMMON_HPP
+#include "opencv2/opencv.hpp"
 #include <sys/stat.h>
-#include <iostream>
-#include <string>
-#include <assert.h>
 #include <unistd.h>
 #include "NvInfer.h"

@ -75,29 +73,28 @@ inline int get_size_by_dims(const nvinfer1::Dims& dims)
 	return size;
 }

-inline int DataTypeToSize(const nvinfer1::DataType& dataType)
+inline int type_to_size(const nvinfer1::DataType& dataType)
 {
 	switch (dataType)
 	{
 	case nvinfer1::DataType::kFLOAT:
-		return sizeof(float);
+		return 4;
 	case nvinfer1::DataType::kHALF:
 		return 2;
-	case nvinfer1::DataType::kINT8:
-		return sizeof(int8_t);
 	case nvinfer1::DataType::kINT32:
-		return sizeof(int32_t);
+		return 4;
+	case nvinfer1::DataType::kINT8:
+		return 1;
 	case nvinfer1::DataType::kBOOL:
-		return sizeof(bool);
+		return 1;
 	default:
-		return sizeof(float);
+		return 4;
 	}
 }

-inline float clamp(const float val, const float minVal = 0.f, const float maxVal = 1280.f)
+inline static float clamp(float val, float min, float max)
 {
-	assert(minVal <= maxVal);
-	return std::min(maxVal, std::max(minVal, val));
+	return val > min ? (val < max ? val : max) : min;
 }

 inline bool IsPathExist(const std::string& path)
@ -130,4 +127,31 @@ inline bool IsFolder(const std::string& path)
 	return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode));
 }

-#endif //YOLOV8_CSRC_SEGMENT_INCLUDE_UTILS_H
+namespace seg
+{
+	struct Binding
+	{
+		size_t size = 1;
+		size_t dsize = 1;
+		nvinfer1::Dims dims;
+		std::string name;
+	};
+
+	struct Object
+	{
+		cv::Rect_<float> rect;
+		int label = 0;
+		float prob = 0.0;
+		cv::Mat boxMask;
+	};
+
+	struct PreParam
+	{
+		float ratio = 1.0f;
+		float dw = 0.0f;
+		float dh = 0.0f;
+		float height = 0;
+		float width = 0;
+	};
+}
+#endif //SEGMENT_COMMON_HPP
--- a/csrc/segment/include/config.h
+++ b/csrc/segment/include/config.h
@ -1,107 +0,0 @@
-//
-// Created by ubuntu on 1/16/23.
-//
-
-#ifndef YOLOV8_TENSORRT_CSRC_SEGMENT_INCLUDE_CONFIG_H
-#define YOLOV8_TENSORRT_CSRC_SEGMENT_INCLUDE_CONFIG_H
-#include "opencv2/opencv.hpp"
-namespace seg
-{
-	const int DEVICE = 0;
-
-	const int INPUT_W = 640;
-	const int INPUT_H = 640;
-	const int NUM_INPUT = 1;
-	const int NUM_OUTPUT = 2;
-	const int NUM_PROPOSAL = 8400; // feature map 20*20+40*40+80*80
-	const int NUM_SEG_C = 32; // seg channel
-	const int NUM_COLS = 6 + NUM_SEG_C; // x0 y0 x1 y1 score label 32
-
-	const int SEG_W = 160;
-	const int SEG_H = 160;
-
-	// thresholds
-	const float CONF_THRES = 0.25;
-	const float IOU_THRES = 0.65;
-	const float MASK_THRES = 0.5;
-
-	// distance
-	const float DIS = 7680.f;
-
-	const int NUM_BINDINGS = NUM_INPUT + NUM_OUTPUT;
-	const cv::Scalar PAD_COLOR = { 114, 114, 114 };
-	const cv::Scalar RECT_COLOR = cv::Scalar(0, 0, 255);
-	const cv::Scalar TXT_COLOR = cv::Scalar(255, 255, 255);
-
-	const char* INPUT = "images";
-	const char* OUTPUT = "outputs";
-	const char* PROTO = "proto";
-
-	const char* CLASS_NAMES[] = {
-		"person", "bicycle", "car", "motorcycle", "airplane", "bus",
-		"train", "truck", "boat", "traffic light", "fire hydrant",
-		"stop sign", "parking meter", "bench", "bird", "cat",
-		"dog", "horse", "sheep", "cow", "elephant",
-		"bear", "zebra", "giraffe", "backpack", "umbrella",
-		"handbag", "tie", "suitcase", "frisbee", "skis",
-		"snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
-		"skateboard", "surfboard", "tennis racket", "bottle", "wine glass",
-		"cup", "fork", "knife", "spoon", "bowl",
-		"banana", "apple", "sandwich", "orange", "broccoli",
-		"carrot", "hot dog", "pizza", "donut", "cake",
-		"chair", "couch", "potted plant", "bed", "dining table",
-		"toilet", "tv", "laptop", "mouse", "remote",
-		"keyboard", "cell phone", "microwave", "oven",
-		"toaster", "sink", "refrigerator", "book", "clock", "vase",
-		"scissors", "teddy bear", "hair drier", "toothbrush" };
-
-	const unsigned int COLORS[80][3] = {
-		{ 0, 114, 189 }, { 217, 83, 25 }, { 237, 177, 32 },
-		{ 126, 47, 142 }, { 119, 172, 48 }, { 77, 190, 238 },
-		{ 162, 20, 47 }, { 76, 76, 76 }, { 153, 153, 153 },
-		{ 255, 0, 0 }, { 255, 128, 0 }, { 191, 191, 0 },
-		{ 0, 255, 0 }, { 0, 0, 255 }, { 170, 0, 255 },
-		{ 85, 85, 0 }, { 85, 170, 0 }, { 85, 255, 0 },
-		{ 170, 85, 0 }, { 170, 170, 0 }, { 170, 255, 0 },
-		{ 255, 85, 0 }, { 255, 170, 0 }, { 255, 255, 0 },
-		{ 0, 85, 128 }, { 0, 170, 128 }, { 0, 255, 128 },
-		{ 85, 0, 128 }, { 85, 85, 128 }, { 85, 170, 128 },
-		{ 85, 255, 128 }, { 170, 0, 128 }, { 170, 85, 128 },
-		{ 170, 170, 128 }, { 170, 255, 128 }, { 255, 0, 128 },
-		{ 255, 85, 128 }, { 255, 170, 128 }, { 255, 255, 128 },
-		{ 0, 85, 255 }, { 0, 170, 255 }, { 0, 255, 255 },
-		{ 85, 0, 255 }, { 85, 85, 255 }, { 85, 170, 255 },
-		{ 85, 255, 255 }, { 170, 0, 255 }, { 170, 85, 255 },
-		{ 170, 170, 255 }, { 170, 255, 255 }, { 255, 0, 255 },
-		{ 255, 85, 255 }, { 255, 170, 255 }, { 85, 0, 0 },
-		{ 128, 0, 0 }, { 170, 0, 0 }, { 212, 0, 0 },
-		{ 255, 0, 0 }, { 0, 43, 0 }, { 0, 85, 0 },
-		{ 0, 128, 0 }, { 0, 170, 0 }, { 0, 212, 0 },
-		{ 0, 255, 0 }, { 0, 0, 43 }, { 0, 0, 85 },
-		{ 0, 0, 128 }, { 0, 0, 170 }, { 0, 0, 212 },
-		{ 0, 0, 255 }, { 0, 0, 0 }, { 36, 36, 36 },
-		{ 73, 73, 73 }, { 109, 109, 109 }, { 146, 146, 146 },
-		{ 182, 182, 182 }, { 219, 219, 219 }, { 0, 114, 189 },
-		{ 80, 183, 189 }, { 128, 128, 0 }
-	};
-
-	const unsigned int MASK_COLORS[20][3] = {
-		{ 255, 56, 56 }, { 255, 157, 151 }, { 255, 112, 31 },
-		{ 255, 178, 29 }, { 207, 210, 49 }, { 72, 249, 10 },
-		{ 146, 204, 23 }, { 61, 219, 134 }, { 26, 147, 52 },
-		{ 0, 212, 187 }, { 44, 153, 168 }, { 0, 194, 255 },
-		{ 52, 69, 147 }, { 100, 115, 255 }, { 0, 24, 236 },
-		{ 132, 56, 255 }, { 82, 0, 133 }, { 203, 56, 255 },
-		{ 255, 149, 200 }, { 255, 55, 199 }
-	};
-
-	struct Object
-	{
-		cv::Rect_<float> rect;
-		int label = 0;
-		float prob = 0.0;
-		cv::Mat boxMask;
-	};
-
-}
-#endif //YOLOV8_TENSORRT_CSRC_SEGMENT_INCLUDE_CONFIG_H
--- a/csrc/segment/include/yolov8-seg.hpp
+++ b/csrc/segment/include/yolov8-seg.hpp
@ -1,9 +1,10 @@
 //
-// Created by ubuntu on 1/8/23.
+// Created by ubuntu on 1/24/23.
 //
-#include "config.h"
-#include "utils.h"
+#ifndef SEGMENT_YOLOV8_SEG_HPP
+#define SEGMENT_YOLOV8_SEG_HPP
 #include <fstream>
+#include "common.hpp"
 #include "NvInferPlugin.h"

 using namespace seg;
@ -16,23 +17,44 @@ public:

 	void make_pipe(bool warmup = true);
 	void copy_from_Mat(const cv::Mat& image);
+	void copy_from_Mat(const cv::Mat& image, cv::Size& size);
+	void letterbox(
+		const cv::Mat& image,
+		cv::Mat& out,
+		cv::Size& size
+	);
 	void infer();
-	void postprocess(std::vector<Object>& objs);
-
-	size_t in_size = 1 * 3 * INPUT_W * INPUT_H;
-	float w = INPUT_W;
-	float h = INPUT_H;
-	float ratio = 1.0f;
-	float dw = 0.f;
-	float dh = 0.f;
-	std::array<std::pair<int, int>, NUM_OUTPUT> out_sizes{};
-	std::array<void*, NUM_OUTPUT> outputs{};
+	void postprocess(
+		std::vector<Object>& objs,
+		float score_thres = 0.25f,
+		float iou_thres = 0.65f,
+		int topk = 100,
+		int seg_channels = 32,
+		int seg_h = 160,
+		int seg_w = 160
+	);
+	static void draw_objects(
+		const cv::Mat& image,
+		cv::Mat& res,
+		const std::vector<Object>& objs,
+		const std::vector<std::string>& CLASS_NAMES,
+		const std::vector<std::vector<unsigned int>>& COLORS,
+		const std::vector<std::vector<unsigned int>>& MASK_COLORS
+	);
+	int num_bindings;
+	int num_inputs = 0;
+	int num_outputs = 0;
+	std::vector<Binding> input_bindings;
+	std::vector<Binding> output_bindings;
+	std::vector<void*> host_ptrs;
+	std::vector<void*> device_ptrs;
+
+	PreParam pparam;
 private:
 	nvinfer1::ICudaEngine* engine = nullptr;
 	nvinfer1::IRuntime* runtime = nullptr;
 	nvinfer1::IExecutionContext* context = nullptr;
 	cudaStream_t stream = nullptr;
-	std::array<void*, NUM_BINDINGS> buffs{};
 	Logger gLogger{ nvinfer1::ILogger::Severity::kERROR };

 };
@ -43,8 +65,6 @@ YOLOv8_seg::YOLOv8_seg(const std::string& engine_file_path)
 	assert(file.good());
 	file.seekg(0, std::ios::end);
 	auto size = file.tellg();
-	std::ostringstream fmt;
-
 	file.seekg(0, std::ios::beg);
 	char* trtModelStream = new char[size];
 	assert(trtModelStream);
@ -61,6 +81,41 @@ YOLOv8_seg::YOLOv8_seg(const std::string& engine_file_path)

 	assert(this->context != nullptr);
 	cudaStreamCreate(&this->stream);
+	this->num_bindings = this->engine->getNbBindings();
+
+	for (int i = 0; i < this->num_bindings; ++i)
+	{
+		Binding binding;
+		nvinfer1::Dims dims;
+		nvinfer1::DataType dtype = this->engine->getBindingDataType(i);
+		std::string name = this->engine->getBindingName(i);
+		binding.name = name;
+		binding.dsize = type_to_size(dtype);
+
+		bool IsInput = engine->bindingIsInput(i);
+		if (IsInput)
+		{
+			this->num_inputs += 1;
+			dims = this->engine->getProfileDimensions(
+				i,
+				0,
+				nvinfer1::OptProfileSelector::kMAX);
+			binding.size = get_size_by_dims(dims);
+			binding.dims = dims;
+			this->input_bindings.push_back(binding);
+			// set max opt shape
+			this->context->setBindingDimensions(i, dims);
+
+		}
+		else
+		{
+			dims = this->context->getBindingDimensions(i);
+			binding.size = get_size_by_dims(dims);
+			binding.dims = dims;
+			this->output_bindings.push_back(binding);
+			this->num_outputs += 1;
+		}
+	}

 }

@ -70,58 +125,67 @@ YOLOv8_seg::~YOLOv8_seg()
 	this->engine->destroy();
 	this->runtime->destroy();
 	cudaStreamDestroy(this->stream);
-	for (auto& ptr : this->buffs)
+	for (auto& ptr : this->device_ptrs)
 	{
 		CHECK(cudaFree(ptr));
 	}

-	for (auto& ptr : this->outputs)
+	for (auto& ptr : this->host_ptrs)
 	{
 		CHECK(cudaFreeHost(ptr));
 	}
-
 }
+
 void YOLOv8_seg::make_pipe(bool warmup)
 {
-	const nvinfer1::Dims input_dims = this->engine->getBindingDimensions(
-		this->engine->getBindingIndex(INPUT)
-	);
-	this->in_size = get_size_by_dims(input_dims);
-	CHECK(cudaMalloc(&this->buffs[0], this->in_size * sizeof(float)));
-
-	this->context->setBindingDimensions(0, input_dims);
-
-	const int32_t output_idx = this->engine->getBindingIndex(OUTPUT);
-	const nvinfer1::Dims output_dims = this->context->getBindingDimensions(output_idx);
-	this->out_sizes[output_idx - NUM_INPUT].first = get_size_by_dims(output_dims);
-	this->out_sizes[output_idx - NUM_INPUT].second = DataTypeToSize(
-		this->engine->getBindingDataType(output_idx));

-	const int32_t proto_idx = this->engine->getBindingIndex(PROTO);
-	const nvinfer1::Dims proto_dims = this->context->getBindingDimensions(proto_idx);
-
-	this->out_sizes[proto_idx - NUM_INPUT].first = get_size_by_dims(proto_dims);
-	this->out_sizes[proto_idx - NUM_INPUT].second = DataTypeToSize(
-		this->engine->getBindingDataType(proto_idx));
+	for (auto& bindings : this->input_bindings)
+	{
+		void* d_ptr;
+		CHECK(cudaMallocAsync(
+			&d_ptr,
+			bindings.size * bindings.dsize,
+			this->stream)
+		);
+		this->device_ptrs.push_back(d_ptr);
+	}

-	for (int i = 0; i < NUM_OUTPUT; i++)
+	for (auto& bindings : this->output_bindings)
 	{
-		const int osize = this->out_sizes[i].first * out_sizes[i].second;
-		CHECK(cudaHostAlloc(&this->outputs[i], osize, 0));
-		CHECK(cudaMalloc(&this->buffs[NUM_INPUT + i], osize));
+		void* d_ptr, * h_ptr;
+		size_t size = bindings.size * bindings.dsize;
+		CHECK(cudaMallocAsync(
+			&d_ptr,
+			size,
+			this->stream)
+		);
+		CHECK(cudaHostAlloc(
+			&h_ptr,
+			size,
+			0)
+		);
+		this->device_ptrs.push_back(d_ptr);
+		this->host_ptrs.push_back(h_ptr);
 	}
+
 	if (warmup)
 	{
 		for (int i = 0; i < 10; i++)
 		{
-			size_t isize = this->in_size * sizeof(float);
-			auto* tmp = new float[isize];
-
-			CHECK(cudaMemcpyAsync(this->buffs[0],
-				tmp,
-				isize,
+			for (auto& bindings : this->input_bindings)
+			{
+				size_t size = bindings.size * bindings.dsize;
+				void* h_ptr = malloc(size);
+				memset(h_ptr, 0, size);
+				CHECK(cudaMemcpyAsync(
+					this->device_ptrs[0],
+					h_ptr,
+					size,
 					cudaMemcpyHostToDevice,
-				this->stream));
+					this->stream)
+				);
+				free(h_ptr);
+			}
 			this->infer();
 		}
 		printf("model warmup 10 times\n");
@ -129,158 +193,257 @@ void YOLOv8_seg::make_pipe(bool warmup)
 	}
 }

-void YOLOv8_seg::copy_from_Mat(const cv::Mat& image)
+void YOLOv8_seg::letterbox(
+	const cv::Mat& image,
+	cv::Mat& out,
+	cv::Size& size
+)
 {
-	float height = (float)image.rows;
-	float width = (float)image.cols;
-
-	float r = std::min(INPUT_H / height, INPUT_W / width);
+	const float inp_h = size.height;
+	const float inp_w = size.width;
+	float height = image.rows;
+	float width = image.cols;

-	int padw = (int)std::round(width * r);
-	int padh = (int)std::round(height * r);
+	float r = std::min(inp_h / height, inp_w / width);
+	int padw = std::round(width * r);
+	int padh = std::round(height * r);

 	cv::Mat tmp;
 	if ((int)width != padw || (int)height != padh)
 	{
-		cv::resize(image, tmp, cv::Size(padw, padh));
+		cv::resize(
+			image,
+			tmp,
+			cv::Size(padw, padh)
+		);
 	}
 	else
 	{
 		tmp = image.clone();
 	}

-	float _dw = INPUT_W - padw;
-	float _dh = INPUT_H - padh;
+	float dw = inp_w - padw;
+	float dh = inp_h - padh;

-	_dw /= 2.0f;
-	_dh /= 2.0f;
-	int top = int(std::round(_dh - 0.1f));
-	int bottom = int(std::round(_dh + 0.1f));
-	int left = int(std::round(_dw - 0.1f));
-	int right = int(std::round(_dw + 0.1f));
-	cv::copyMakeBorder(tmp, tmp, top, bottom, left, right, cv::BORDER_CONSTANT, PAD_COLOR);
-	cv::dnn::blobFromImage(tmp,
+	dw /= 2.0f;
+	dh /= 2.0f;
+	int top = int(std::round(dh - 0.1f));
+	int bottom = int(std::round(dh + 0.1f));
+	int left = int(std::round(dw - 0.1f));
+	int right = int(std::round(dw + 0.1f));
+
+	cv::copyMakeBorder(
+		tmp,
 		tmp,
+		top,
+		bottom,
+		left,
+		right,
+		cv::BORDER_CONSTANT,
+		{ 114, 114, 114 }
+	);
+
+	cv::dnn::blobFromImage(tmp,
+		out,
 		1 / 255.f,
 		cv::Size(),
 		cv::Scalar(0, 0, 0),
 		true,
 		false,
-		CV_32F);
-	CHECK(cudaMemcpyAsync(this->buffs[0],
-		tmp.ptr<float>(),
-		this->in_size * sizeof(float),
+		CV_32F
+	);
+	this->pparam.ratio = 1 / r;
+	this->pparam.dw = dw;
+	this->pparam.dh = dh;
+	this->pparam.height = height;
+	this->pparam.width = width;;
+}
+
+void YOLOv8_seg::copy_from_Mat(const cv::Mat& image)
+{
+	cv::Mat nchw;
+	auto& in_binding = this->input_bindings[0];
+	auto width = in_binding.dims.d[3];
+	auto height = in_binding.dims.d[2];
+	cv::Size size{ width, height };
+	this->letterbox(
+		image,
+		nchw,
+		size
+	);
+
+	this->context->setBindingDimensions(
+		0,
+		nvinfer1::Dims
+			{
+				4,
+				{ 1, 3, height, width }
+			}
+	);
+
+	CHECK(cudaMemcpyAsync(
+		this->device_ptrs[0],
+		nchw.ptr<float>(),
+		nchw.total() * nchw.elemSize(),
 		cudaMemcpyHostToDevice,
-		this->stream));
+		this->stream)
+	);
+}

-	this->ratio = 1 / r;
-	this->dw = _dw;
-	this->dh = _dh;
-	this->w = width;
-	this->h = height;
+void YOLOv8_seg::copy_from_Mat(const cv::Mat& image, cv::Size& size)
+{
+	cv::Mat nchw;
+	this->letterbox(
+		image,
+		nchw,
+		size
+	);
+	this->context->setBindingDimensions(
+		0,
+		nvinfer1::Dims
+			{ 4,
+			  { 1, 3, size.height, size.width }
+			}
+	);
+	CHECK(cudaMemcpyAsync(
+		this->device_ptrs[0],
+		nchw.ptr<float>(),
+		nchw.total() * nchw.elemSize(),
+		cudaMemcpyHostToDevice,
+		this->stream)
+	);
 }

 void YOLOv8_seg::infer()
 {
-	this->context->enqueueV2(buffs.data(), this->stream, nullptr);
-	for (int i = 0; i < NUM_OUTPUT; i++)
+
+	this->context->enqueueV2(
+		this->device_ptrs.data(),
+		this->stream,
+		nullptr
+	);
+	for (int i = 0; i < this->num_outputs; i++)
 	{
-		const int osize = this->out_sizes[i].first * out_sizes[i].second;
-		CHECK(cudaMemcpyAsync(this->outputs[i],
-			this->buffs[NUM_INPUT + i],
+		size_t osize = this->output_bindings[i].size * this->output_bindings[i].dsize;
+		CHECK(cudaMemcpyAsync(this->host_ptrs[i],
+			this->device_ptrs[i + this->num_inputs],
 			osize,
 			cudaMemcpyDeviceToHost,
-			this->stream));
+			this->stream)
+		);
+
 	}
 	cudaStreamSynchronize(this->stream);

 }

-void YOLOv8_seg::postprocess(std::vector<Object>& objs)
+void YOLOv8_seg::postprocess(std::vector<Object>& objs,
+	float score_thres,
+	float iou_thres,
+	int topk,
+	int seg_channels,
+	int seg_h,
+	int seg_w
+)
 {
 	objs.clear();
-	auto* output = static_cast<float*>(this->outputs[0]); // x0 y0 x1 y1 s l *32
-	cv::Mat protos = cv::Mat(NUM_SEG_C, SEG_W * SEG_H, CV_32F,
-		static_cast<float*>(this->outputs[1]));
+	auto input_h = this->input_bindings[0].dims.d[2];
+	auto input_w = this->input_bindings[0].dims.d[3];
+	auto num_anchors = this->output_bindings[0].dims.d[1];
+	auto num_channels = this->output_bindings[0].dims.d[2];
+
+	auto& dw = this->pparam.dw;
+	auto& dh = this->pparam.dh;
+	auto& width = this->pparam.width;
+	auto& height = this->pparam.height;
+	auto& ratio = this->pparam.ratio;
+
+	auto* output = static_cast<float*>(this->host_ptrs[0]);
+	cv::Mat protos = cv::Mat(seg_channels, seg_h * seg_w, CV_32F,
+		static_cast<float*>(this->host_ptrs[1]));

 	std::vector<int> labels;
 	std::vector<float> scores;
 	std::vector<cv::Rect> bboxes;
 	std::vector<cv::Mat> mask_confs;
+	std::vector<int> indices;

-	for (int i = 0; i < NUM_PROPOSAL; i++)
+	for (int i = 0; i < num_anchors; i++)
 	{
-		float* ptr = output + i * NUM_COLS;
+		float* ptr = output + i * num_channels;
 		float score = *(ptr + 4);
-		if (score > CONF_THRES)
+		if (score > score_thres)
 		{
-			float x0 = *ptr++ - this->dw;
-			float y0 = *ptr++ - this->dh;
-			float x1 = *ptr++ - this->dw;
-			float y1 = *ptr++ - this->dh;
+			float x0 = *ptr++ - dw;
+			float y0 = *ptr++ - dh;
+			float x1 = *ptr++ - dw;
+			float y1 = *ptr++ - dh;

-			x0 = clamp(x0 * this->ratio, 0.f, this->w);
-			y0 = clamp(y0 * this->ratio, 0.f, this->h);
-			x1 = clamp(x1 * this->ratio, 0.f, this->w);
-			y1 = clamp(y1 * this->ratio, 0.f, this->h);
+			x0 = clamp(x0 * ratio, 0.f, width);
+			y0 = clamp(y0 * ratio, 0.f, height);
+			x1 = clamp(x1 * ratio, 0.f, width);
+			y1 = clamp(y1 * ratio, 0.f, height);

 			int label = *(++ptr);
-			cv::Mat mask_conf = cv::Mat(1, NUM_SEG_C, CV_32F, ++ptr);
+			cv::Mat mask_conf = cv::Mat(1, seg_channels, CV_32F, ++ptr);
 			mask_confs.push_back(mask_conf);
 			labels.push_back(label);
 			scores.push_back(score);
-
-#if defined(BATCHED_NMS)
 			bboxes.push_back(cv::Rect_<float>(x0, y0, x1 - x0, y1 - y0));
-#else
-			bboxes.push_back(cv::Rect_<float>(x0 + label * DIS,
-				y0 + label * DIS,
-				x1 - x0,
-				y1 - y0));
-#endif
+
 		}
 	}
-	std::vector<int> indices;
+
 #if defined(BATCHED_NMS)
-	cv::dnn::NMSBoxesBatched(bboxes, scores, labels, CONF_THRES, IOU_THRES, indices);
+	cv::dnn::NMSBoxesBatched(
+		bboxes,
+		scores,
+		labels,
+		score_thres,
+		iou_thres,
+		indices
+	);
 #else
-	cv::dnn::NMSBoxes(bboxes, scores, CONF_THRES, IOU_THRES, indices);
+	cv::dnn::NMSBoxes(
+		bboxes,
+		scores,
+		score_thres,
+		iou_thres,
+		indices
+	);
 #endif

 	cv::Mat masks;
-
+	int cnt = 0;
 	for (auto& i : indices)
 	{
-#if defined(BATCHED_NMS)
+		if (cnt >= topk)
+		{
+			break;
+		}
 		cv::Rect tmp = bboxes[i];
-#else
-		cv::Rect tmp = { (int)(bboxes[i].x - labels[i] * DIS),
-						 (int)(bboxes[i].y - labels[i] * DIS),
-						 bboxes[i].width,
-						 bboxes[i].height };
-#endif
-
 		Object obj;
 		obj.label = labels[i];
 		obj.rect = tmp;
 		obj.prob = scores[i];
 		masks.push_back(mask_confs[i]);
 		objs.push_back(obj);
+		cnt += 1;
 	}

 	cv::Mat matmulRes = (masks * protos).t();
-	cv::Mat maskMat = matmulRes.reshape(indices.size(), { SEG_W, SEG_H });
+	cv::Mat maskMat = matmulRes.reshape(indices.size(), { seg_w, seg_h });

 	std::vector<cv::Mat> maskChannels;
 	cv::split(maskMat, maskChannels);
-	int scale_dw = this->dw / INPUT_W * SEG_W;
-	int scale_dh = this->dh / INPUT_H * SEG_H;
+	int scale_dw = dw / input_w * seg_w;
+	int scale_dh = dh / input_h * seg_h;

 	cv::Rect roi(
 		scale_dw,
 		scale_dh,
-		SEG_W - 2 * scale_dw,
-		SEG_H - 2 * scale_dh);
+		seg_w - 2 * scale_dw,
+		seg_h - 2 * scale_dh);

 	for (int i = 0; i < indices.size(); i++)
 	{
@ -288,30 +451,64 @@ void YOLOv8_seg::postprocess(std::vector<Object>& objs)
 		cv::exp(-maskChannels[i], dest);
 		dest = 1.0 / (1.0 + dest);
 		dest = dest(roi);
-		cv::resize(dest, mask, cv::Size((int)this->w, (int)this->h), cv::INTER_LINEAR);
-		objs[i].boxMask = mask(objs[i].rect) > MASK_THRES;
+		cv::resize(
+			dest,
+			mask,
+			cv::Size((int)width, (int)height),
+			cv::INTER_LINEAR
+		);
+		objs[i].boxMask = mask(objs[i].rect) > 0.5f;
 	}

 }

-static void draw_objects(const cv::Mat& image, cv::Mat& res, const std::vector<Object>& objs)
+void YOLOv8_seg::draw_objects(const cv::Mat& image,
+	cv::Mat& res,
+	const std::vector<Object>& objs,
+	const std::vector<std::string>& CLASS_NAMES,
+	const std::vector<std::vector<unsigned int>>& COLORS,
+	const std::vector<std::vector<unsigned int>>& MASK_COLORS
+)
 {
 	res = image.clone();
 	cv::Mat mask = image.clone();
 	for (auto& obj : objs)
 	{
 		int idx = obj.label;
-		cv::Scalar color = cv::Scalar(COLORS[idx][0], COLORS[idx][1], COLORS[idx][2]);
+		cv::Scalar color = cv::Scalar(
+			COLORS[idx][0],
+			COLORS[idx][1],
+			COLORS[idx][2]
+		);
 		cv::Scalar mask_color = cv::Scalar(
-			MASK_COLORS[idx % 20][0], MASK_COLORS[idx % 20][1], MASK_COLORS[idx % 20][2]);
-		cv::rectangle(res, obj.rect, color, 2);
+			MASK_COLORS[idx % 20][0],
+			MASK_COLORS[idx % 20][1],
+			MASK_COLORS[idx % 20][2]
+		);
+		cv::rectangle(
+			res,
+			obj.rect,
+			color,
+			2
+		);

 		char text[256];
-		sprintf(text, "%s %.1f%%", CLASS_NAMES[idx], obj.prob * 100);
+		sprintf(
+			text,
+			"%s %.1f%%",
+			CLASS_NAMES[idx].c_str(),
+			obj.prob * 100
+		);
 		mask(obj.rect).setTo(mask_color, obj.boxMask);

 		int baseLine = 0;
-		cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
+		cv::Size label_size = cv::getTextSize(
+			text,
+			cv::FONT_HERSHEY_SIMPLEX,
+			0.4,
+			1,
+			&baseLine
+		);

 		int x = (int)obj.rect.x;
 		int y = (int)obj.rect.y + 1;
@ -319,11 +516,30 @@ static void draw_objects(const cv::Mat& image, cv::Mat& res, const std::vector<O
 		if (y > res.rows)
 			y = res.rows;

-		cv::rectangle(res, cv::Rect(x, y, label_size.width, label_size.height + baseLine), RECT_COLOR, -1);
+		cv::rectangle(
+			res,
+			cv::Rect(x, y, label_size.width, label_size.height + baseLine),
+			{ 0, 0, 255 },
+			-1
+		);

-		cv::putText(res, text, cv::Point(x, y + label_size.height),
-			cv::FONT_HERSHEY_SIMPLEX, 0.4, TXT_COLOR, 1);
+		cv::putText(
+			res,
+			text,
+			cv::Point(x, y + label_size.height),
+			cv::FONT_HERSHEY_SIMPLEX,
+			0.4,
+			{ 255, 255, 255 },
+			1
+		);
 	}
-	cv::addWeighted(res, 0.5, mask, 0.8, 1, res);
-
+	cv::addWeighted(
+		res,
+		0.5,
+		mask,
+		0.8,
+		1,
+		res
+	);
 }
+#endif //SEGMENT_YOLOV8_SEG_HPP
--- a/csrc/segment/main.cpp
+++ b/csrc/segment/main.cpp
@ -1,60 +1,153 @@
 //
-// Created by ubuntu on 1/8/23.
+// Created by ubuntu on 1/20/23.
 //
-#include "include/yolov8-seg.hpp"
+#include "chrono"
+#include "yolov8-seg.hpp"
+#include "opencv2/opencv.hpp"
+
+const std::vector<std::string> CLASS_NAMES = {
+	"person", "bicycle", "car", "motorcycle", "airplane", "bus",
+	"train", "truck", "boat", "traffic light", "fire hydrant",
+	"stop sign", "parking meter", "bench", "bird", "cat",
+	"dog", "horse", "sheep", "cow", "elephant",
+	"bear", "zebra", "giraffe", "backpack", "umbrella",
+	"handbag", "tie", "suitcase", "frisbee", "skis",
+	"snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
+	"skateboard", "surfboard", "tennis racket", "bottle", "wine glass",
+	"cup", "fork", "knife", "spoon", "bowl",
+	"banana", "apple", "sandwich", "orange", "broccoli",
+	"carrot", "hot dog", "pizza", "donut", "cake",
+	"chair", "couch", "potted plant", "bed", "dining table",
+	"toilet", "tv", "laptop", "mouse", "remote",
+	"keyboard", "cell phone", "microwave", "oven",
+	"toaster", "sink", "refrigerator", "book", "clock", "vase",
+	"scissors", "teddy bear", "hair drier", "toothbrush" };
+
+const std::vector<std::vector<unsigned int>> COLORS = {
+	{ 0, 114, 189 }, { 217, 83, 25 }, { 237, 177, 32 },
+	{ 126, 47, 142 }, { 119, 172, 48 }, { 77, 190, 238 },
+	{ 162, 20, 47 }, { 76, 76, 76 }, { 153, 153, 153 },
+	{ 255, 0, 0 }, { 255, 128, 0 }, { 191, 191, 0 },
+	{ 0, 255, 0 }, { 0, 0, 255 }, { 170, 0, 255 },
+	{ 85, 85, 0 }, { 85, 170, 0 }, { 85, 255, 0 },
+	{ 170, 85, 0 }, { 170, 170, 0 }, { 170, 255, 0 },
+	{ 255, 85, 0 }, { 255, 170, 0 }, { 255, 255, 0 },
+	{ 0, 85, 128 }, { 0, 170, 128 }, { 0, 255, 128 },
+	{ 85, 0, 128 }, { 85, 85, 128 }, { 85, 170, 128 },
+	{ 85, 255, 128 }, { 170, 0, 128 }, { 170, 85, 128 },
+	{ 170, 170, 128 }, { 170, 255, 128 }, { 255, 0, 128 },
+	{ 255, 85, 128 }, { 255, 170, 128 }, { 255, 255, 128 },
+	{ 0, 85, 255 }, { 0, 170, 255 }, { 0, 255, 255 },
+	{ 85, 0, 255 }, { 85, 85, 255 }, { 85, 170, 255 },
+	{ 85, 255, 255 }, { 170, 0, 255 }, { 170, 85, 255 },
+	{ 170, 170, 255 }, { 170, 255, 255 }, { 255, 0, 255 },
+	{ 255, 85, 255 }, { 255, 170, 255 }, { 85, 0, 0 },
+	{ 128, 0, 0 }, { 170, 0, 0 }, { 212, 0, 0 },
+	{ 255, 0, 0 }, { 0, 43, 0 }, { 0, 85, 0 },
+	{ 0, 128, 0 }, { 0, 170, 0 }, { 0, 212, 0 },
+	{ 0, 255, 0 }, { 0, 0, 43 }, { 0, 0, 85 },
+	{ 0, 0, 128 }, { 0, 0, 170 }, { 0, 0, 212 },
+	{ 0, 0, 255 }, { 0, 0, 0 }, { 36, 36, 36 },
+	{ 73, 73, 73 }, { 109, 109, 109 }, { 146, 146, 146 },
+	{ 182, 182, 182 }, { 219, 219, 219 }, { 0, 114, 189 },
+	{ 80, 183, 189 }, { 128, 128, 0 }
+};
+
+const std::vector<std::vector<unsigned int>> MASK_COLORS = {
+	{ 255, 56, 56 }, { 255, 157, 151 }, { 255, 112, 31 },
+	{ 255, 178, 29 }, { 207, 210, 49 }, { 72, 249, 10 },
+	{ 146, 204, 23 }, { 61, 219, 134 }, { 26, 147, 52 },
+	{ 0, 212, 187 }, { 44, 153, 168 }, { 0, 194, 255 },
+	{ 52, 69, 147 }, { 100, 115, 255 }, { 0, 24, 236 },
+	{ 132, 56, 255 }, { 82, 0, 133 }, { 203, 56, 255 },
+	{ 255, 149, 200 }, { 255, 55, 199 }
+};
+
 int main(int argc, char** argv)
 {
-	cudaSetDevice(DEVICE);
+	// cuda:0
+	cudaSetDevice(0);

 	const std::string engine_file_path{ argv[1] };
 	const std::string path{ argv[2] };
-	std::vector<cv::String> imagePathList;
+
+	std::vector<std::string> imagePathList;
 	bool isVideo{ false };
+
+	assert(argc == 3);
+
+	auto yolov8 = new YOLOv8_seg(engine_file_path);
+	yolov8->make_pipe(true);
+
 	if (IsFile(path))
 	{
 		std::string suffix = path.substr(path.find_last_of('.') + 1);
-		if (suffix == "jpg")
+		if (
+			suffix == "jpg" ||
+				suffix == "jpeg" ||
+				suffix == "png"
+			)
 		{
 			imagePathList.push_back(path);
 		}
-		else if (suffix == "mp4")
+		else if (
+			suffix == "mp4" ||
+				suffix == "avi" ||
+				suffix == "m4v" ||
+				suffix == "mpeg" ||
+				suffix == "mov" ||
+				suffix == "mkv"
+			)
 		{
 			isVideo = true;
 		}
+		else
+		{
+			printf("suffix %s is wrong !!!\n", suffix.c_str());
+			std::abort();
+		}
 	}
 	else if (IsFolder(path))
 	{
 		cv::glob(path + "/*.jpg", imagePathList);
 	}

-	auto* yolov8 = new YOLOv8_seg(engine_file_path);
-	yolov8->make_pipe(true);
-	cv::Mat res;
+	cv::Mat res, image;
+	cv::Size size = cv::Size{ 640, 640 };
+	int topk = 100;
+	int seg_h = 160;
+	int seg_w = 160;
+	int seg_channels = 32;
+	float score_thres = 0.25f;
+	float iou_thres = 0.65f;
+
+	std::vector<Object> objs;
+
 	cv::namedWindow("result", cv::WINDOW_AUTOSIZE);
+
 	if (isVideo)
 	{
 		cv::VideoCapture cap(path);
-		cv::Mat image;
+
 		if (!cap.isOpened())
 		{
-			printf("can not open ...\n");
+			printf("can not open %s\n", path.c_str());
 			return -1;
 		}
-		double fp_ = cap.get(cv::CAP_PROP_FPS);
-		int fps = round(1000.0 / fp_);
 		while (cap.read(image))
 		{
+			objs.clear();
+			yolov8->copy_from_Mat(image, size);
 			auto start = std::chrono::system_clock::now();
-			yolov8->copy_from_Mat(image);
 			yolov8->infer();
-			std::vector<Object> objs;
-			yolov8->postprocess(objs);
-			draw_objects(image, res, objs);
 			auto end = std::chrono::system_clock::now();
-			auto tc = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.f;
+			yolov8->postprocess(objs, score_thres, iou_thres, topk, seg_channels, seg_h, seg_w);
+			yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS, MASK_COLORS);
+			auto tc = (double)
+				std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
+			printf("cost %2.4lf ms\n", tc);
 			cv::imshow("result", res);
-			printf("cost %2.4f ms\n", tc);
-			if (cv::waitKey(fps) == 'q')
+			if (cv::waitKey(10) == 'q')
 			{
 				break;
 			}
@ -62,20 +155,19 @@ int main(int argc, char** argv)
 	}
 	else
 	{
-		for (auto path : imagePathList)
+		for (auto& path : imagePathList)
 		{
-			cv::Mat image = cv::imread(path);
-			yolov8->copy_from_Mat(image);
+			objs.clear();
+			image = cv::imread(path);
+			yolov8->copy_from_Mat(image, size);
 			auto start = std::chrono::system_clock::now();
 			yolov8->infer();
 			auto end = std::chrono::system_clock::now();
-			auto tc = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.f;
-
-			printf("infer %-20s\tcost %2.4f ms\n", path.c_str(), tc);
-
-			std::vector<Object> objs;
-			yolov8->postprocess(objs);
-			draw_objects(image, res, objs);
+			yolov8->postprocess(objs, score_thres, iou_thres, topk, seg_channels, seg_h, seg_w);
+			yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS, MASK_COLORS);
+			auto tc = (double)
+				std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
+			printf("cost %2.4lf ms\n", tc);
 			cv::imshow("result", res);
 			cv::waitKey(0);
 		}
--- a/docs/Segment.md
+++ b/docs/Segment.md
@ -100,7 +100,16 @@ You can infer segment engine with c++ in [`csrc/segment`](../csrc/segment) .

 ### Build:

-Please set you own librarys in [`CMakeLists.txt`](../csrc/segment/CMakeLists.txt) and modify you own config in [`config.h`](../csrc/segment/include/config.h) such as `CLASS_NAMES`, `COLORS` and others .
+Please set you own librarys in [`CMakeLists.txt`](../csrc/segment/CMakeLists.txt) and modify you own config in [`main.cpp`](../csrc/segment/main.cpp) such as `CLASS_NAMES`, `COLORS`, `MASK_COLORS` and postprocess parameters .
+
+```c++
+int topk = 100;
+int seg_h = 160; // yolov8 model proto height
+int seg_w = 160; // yolov8 model proto width
+int seg_channels = 32; // yolov8 model proto channels
+float score_thres = 0.25f;
+float iou_thres = 0.65f;
+```

 ``` shell
 export root=${PWD}