From d9687b12c1a2756d48529d4f3f1fdd638630e361 Mon Sep 17 00:00:00 2001
From: triple-Mu <gpu@163.com>
Date: Mon, 15 May 2023 15:59:31 +0800
Subject: [PATCH] Support Jetson pose model

---
 csrc/jetson/pose/CMakeLists.txt          |  60 +++
 csrc/jetson/pose/include/common.hpp      | 157 +++++++
 csrc/jetson/pose/include/yolov8-pose.hpp | 515 +++++++++++++++++++++++
 csrc/jetson/pose/main.cpp                | 161 +++++++
 csrc/pose/normal/include/yolov8-pose.hpp |   4 +-
 docs/Jetson.md                           |  67 +++
 6 files changed, 962 insertions(+), 2 deletions(-)
 create mode 100644 csrc/jetson/pose/CMakeLists.txt
 create mode 100644 csrc/jetson/pose/include/common.hpp
 create mode 100644 csrc/jetson/pose/include/yolov8-pose.hpp
 create mode 100644 csrc/jetson/pose/main.cpp

diff --git a/csrc/jetson/pose/CMakeLists.txt b/csrc/jetson/pose/CMakeLists.txt
new file mode 100644
index 0000000..41675f6
--- /dev/null
+++ b/csrc/jetson/pose/CMakeLists.txt
@@ -0,0 +1,60 @@
+cmake_minimum_required(VERSION 2.8.12)
+
+set(CMAKE_CUDA_ARCHITECTURES 60 61 62 70 72 75 86)
+set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)
+
+project(yolov8-pose LANGUAGES CXX CUDA)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -O3 -g")
+set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_BUILD_TYPE Release)
+option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
+
+# CUDA
+find_package(CUDA REQUIRED)
+message(STATUS "CUDA Libs: \n${CUDA_LIBRARIES}\n")
+message(STATUS "CUDA Headers: \n${CUDA_INCLUDE_DIRS}\n")
+
+# OpenCV
+find_package(OpenCV REQUIRED)
+message(STATUS "OpenCV Libs: \n${OpenCV_LIBS}\n")
+message(STATUS "OpenCV Libraries: \n${OpenCV_LIBRARIES}\n")
+message(STATUS "OpenCV Headers: \n${OpenCV_INCLUDE_DIRS}\n")
+
+# TensorRT
+set(TensorRT_INCLUDE_DIRS /usr/include/aarch64-linux-gnu)
+set(TensorRT_LIBRARIES /usr/lib/aarch64-linux-gnu)
+
+
+message(STATUS "TensorRT Libs: \n${TensorRT_LIBRARIES}\n")
+message(STATUS "TensorRT Headers: \n${TensorRT_INCLUDE_DIRS}\n")
+
+list(APPEND INCLUDE_DIRS
+        ${CUDA_INCLUDE_DIRS}
+        ${OpenCV_INCLUDE_DIRS}
+        ${TensorRT_INCLUDE_DIRS}
+        ./include
+        )
+
+list(APPEND ALL_LIBS
+        ${CUDA_LIBRARIES}
+        ${OpenCV_LIBRARIES}
+        ${TensorRT_LIBRARIES}
+        )
+
+include_directories(${INCLUDE_DIRS})
+
+add_executable(${PROJECT_NAME}
+        main.cpp
+        include/yolov8-pose.hpp
+        include/common.hpp
+        )
+
+link_directories(${ALL_LIBS})
+target_link_libraries(${PROJECT_NAME} PRIVATE nvinfer nvinfer_plugin ${CUDA_LIBRARIES} ${OpenCV_LIBS})
+
+
+if(${OpenCV_VERSION} VERSION_GREATER_EQUAL 4.7.0)
+    message(STATUS "Build with -DBATCHED_NMS")
+    add_definitions(-DBATCHED_NMS)
+endif()
diff --git a/csrc/jetson/pose/include/common.hpp b/csrc/jetson/pose/include/common.hpp
new file mode 100644
index 0000000..e23ac01
--- /dev/null
+++ b/csrc/jetson/pose/include/common.hpp
@@ -0,0 +1,157 @@
+//
+// Created by ubuntu on 5/15/23.
+//
+
+#ifndef JETSON_POSE_COMMON_HPP
+#define JETSON_POSE_COMMON_HPP
+#include "opencv2/opencv.hpp"
+#include <sys/stat.h>
+#include <unistd.h>
+#include "NvInfer.h"
+
+#define CHECK(call)                                   \
+do                                                    \
+{                                                     \
+    const cudaError_t error_code = call;              \
+    if (error_code != cudaSuccess)                    \
+    {                                                 \
+        printf("CUDA Error:\n");                      \
+        printf("    File:       %s\n", __FILE__);     \
+        printf("    Line:       %d\n", __LINE__);     \
+        printf("    Error code: %d\n", error_code);   \
+        printf("    Error text: %s\n",                \
+            cudaGetErrorString(error_code));          \
+        exit(1);                                      \
+    }                                                 \
+} while (0)
+
+class Logger : public nvinfer1::ILogger
+{
+public:
+	nvinfer1::ILogger::Severity reportableSeverity;
+
+	explicit Logger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kINFO) :
+		reportableSeverity(severity)
+	{
+	}
+
+	void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override
+	{
+		if (severity > reportableSeverity)
+		{
+			return;
+		}
+		switch (severity)
+		{
+		case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
+			std::cerr << "INTERNAL_ERROR: ";
+			break;
+		case nvinfer1::ILogger::Severity::kERROR:
+			std::cerr << "ERROR: ";
+			break;
+		case nvinfer1::ILogger::Severity::kWARNING:
+			std::cerr << "WARNING: ";
+			break;
+		case nvinfer1::ILogger::Severity::kINFO:
+			std::cerr << "INFO: ";
+			break;
+		default:
+			std::cerr << "VERBOSE: ";
+			break;
+		}
+		std::cerr << msg << std::endl;
+	}
+};
+
+inline int get_size_by_dims(const nvinfer1::Dims& dims)
+{
+	int size = 1;
+	for (int i = 0; i < dims.nbDims; i++)
+	{
+		size *= dims.d[i];
+	}
+	return size;
+}
+
+inline int type_to_size(const nvinfer1::DataType& dataType)
+{
+	switch (dataType)
+	{
+	case nvinfer1::DataType::kFLOAT:
+		return 4;
+	case nvinfer1::DataType::kHALF:
+		return 2;
+	case nvinfer1::DataType::kINT32:
+		return 4;
+	case nvinfer1::DataType::kINT8:
+		return 1;
+	case nvinfer1::DataType::kBOOL:
+		return 1;
+	default:
+		return 4;
+	}
+}
+
+inline static float clamp(float val, float min, float max)
+{
+	return val > min ? (val < max ? val : max) : min;
+}
+
+inline bool IsPathExist(const std::string& path)
+{
+	if (access(path.c_str(), 0) == F_OK)
+	{
+		return true;
+	}
+	return false;
+}
+
+inline bool IsFile(const std::string& path)
+{
+	if (!IsPathExist(path))
+	{
+		printf("%s:%d %s not exist\n", __FILE__, __LINE__, path.c_str());
+		return false;
+	}
+	struct stat buffer;
+	return (stat(path.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
+}
+
+inline bool IsFolder(const std::string& path)
+{
+	if (!IsPathExist(path))
+	{
+		return false;
+	}
+	struct stat buffer;
+	return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode));
+}
+
+namespace pose
+{
+	struct Binding
+	{
+		size_t size = 1;
+		size_t dsize = 1;
+		nvinfer1::Dims dims;
+		std::string name;
+	};
+
+	struct Object
+	{
+		cv::Rect_<float> rect;
+		int label = 0;
+		float prob = 0.0;
+        std::vector<float> kps;
+	};
+
+	struct PreParam
+	{
+		float ratio = 1.0f;
+		float dw = 0.0f;
+		float dh = 0.0f;
+		float height = 0;
+		float width = 0;
+	};
+}
+#endif //JETSON_POSE_COMMON_HPP
diff --git a/csrc/jetson/pose/include/yolov8-pose.hpp b/csrc/jetson/pose/include/yolov8-pose.hpp
new file mode 100644
index 0000000..4a04584
--- /dev/null
+++ b/csrc/jetson/pose/include/yolov8-pose.hpp
@@ -0,0 +1,515 @@
+//
+// Created by ubuntu on 1/20/23.
+//
+#ifndef JETSON_POSE_YOLOV8_POSE_HPP
+#define JETSON_POSE_YOLOV8_POSE_HPP
+
+#include "fstream"
+#include "common.hpp"
+#include "NvInferPlugin.h"
+
+using namespace pose;
+
+class YOLOv8_pose {
+public:
+    explicit YOLOv8_pose(const std::string &engine_file_path);
+
+    ~YOLOv8_pose();
+
+    void make_pipe(bool warmup = true);
+
+    void copy_from_Mat(const cv::Mat &image);
+
+    void copy_from_Mat(const cv::Mat &image, cv::Size &size);
+
+    void letterbox(
+            const cv::Mat &image,
+            cv::Mat &out,
+            cv::Size &size
+    );
+
+    void infer();
+
+    void postprocess(
+            std::vector<Object> &objs,
+            float score_thres = 0.25f,
+            float iou_thres = 0.65f,
+            int topk = 100
+    );
+
+    static void draw_objects(
+            const cv::Mat &image,
+            cv::Mat &res,
+            const std::vector<Object> &objs,
+            const std::vector<std::vector<unsigned int>> &SKELETON,
+            const std::vector<std::vector<unsigned int>> &KPS_COLORS,
+            const std::vector<std::vector<unsigned int>> &LIMB_COLORS
+    );
+
+    int num_bindings;
+    int num_inputs = 0;
+    int num_outputs = 0;
+    std::vector<Binding> input_bindings;
+    std::vector<Binding> output_bindings;
+    std::vector<void *> host_ptrs;
+    std::vector<void *> device_ptrs;
+
+    PreParam pparam;
+private:
+    nvinfer1::ICudaEngine *engine = nullptr;
+    nvinfer1::IRuntime *runtime = nullptr;
+    nvinfer1::IExecutionContext *context = nullptr;
+    cudaStream_t stream = nullptr;
+    Logger gLogger{nvinfer1::ILogger::Severity::kERROR};
+
+};
+
+YOLOv8_pose::YOLOv8_pose(const std::string &engine_file_path) {
+    std::ifstream file(engine_file_path, std::ios::binary);
+    assert(file.good());
+    file.seekg(0, std::ios::end);
+    auto size = file.tellg();
+    file.seekg(0, std::ios::beg);
+    char *trtModelStream = new char[size];
+    assert(trtModelStream);
+    file.read(trtModelStream, size);
+    file.close();
+    initLibNvInferPlugins(&this->gLogger, "");
+    this->runtime = nvinfer1::createInferRuntime(this->gLogger);
+    assert(this->runtime != nullptr);
+
+    this->engine = this->runtime->deserializeCudaEngine(trtModelStream, size);
+    assert(this->engine != nullptr);
+
+    this->context = this->engine->createExecutionContext();
+
+    assert(this->context != nullptr);
+    cudaStreamCreate(&this->stream);
+    this->num_bindings = this->engine->getNbBindings();
+
+    for (int i = 0; i < this->num_bindings; ++i) {
+        Binding binding;
+        nvinfer1::Dims dims;
+        nvinfer1::DataType dtype = this->engine->getBindingDataType(i);
+        std::string name = this->engine->getBindingName(i);
+        binding.name = name;
+        binding.dsize = type_to_size(dtype);
+
+        bool IsInput = engine->bindingIsInput(i);
+        if (IsInput) {
+            this->num_inputs += 1;
+            dims = this->engine->getProfileDimensions(
+                    i,
+                    0,
+                    nvinfer1::OptProfileSelector::kMAX);
+            binding.size = get_size_by_dims(dims);
+            binding.dims = dims;
+            this->input_bindings.push_back(binding);
+            // set max opt shape
+            this->context->setBindingDimensions(i, dims);
+
+        } else {
+            dims = this->context->getBindingDimensions(i);
+            binding.size = get_size_by_dims(dims);
+            binding.dims = dims;
+            this->output_bindings.push_back(binding);
+            this->num_outputs += 1;
+        }
+    }
+
+}
+
+YOLOv8_pose::~YOLOv8_pose() {
+    this->context->destroy();
+    this->engine->destroy();
+    this->runtime->destroy();
+    cudaStreamDestroy(this->stream);
+    for (auto &ptr: this->device_ptrs) {
+        CHECK(cudaFree(ptr));
+    }
+
+    for (auto &ptr: this->host_ptrs) {
+        CHECK(cudaFreeHost(ptr));
+    }
+
+}
+
+void YOLOv8_pose::make_pipe(bool warmup) {
+
+    for (auto &bindings: this->input_bindings) {
+        void *d_ptr;
+        CHECK(cudaMalloc(
+                &d_ptr,
+                bindings.size * bindings.dsize
+                )
+        );
+        this->device_ptrs.push_back(d_ptr);
+    }
+
+    for (auto &bindings: this->output_bindings) {
+        void *d_ptr, *h_ptr;
+        size_t size = bindings.size * bindings.dsize;
+        CHECK(cudaMalloc(
+                &d_ptr,
+                size)
+        );
+        CHECK(cudaHostAlloc(
+                &h_ptr,
+                size,
+                0)
+        );
+        this->device_ptrs.push_back(d_ptr);
+        this->host_ptrs.push_back(h_ptr);
+    }
+
+    if (warmup) {
+        for (int i = 0; i < 10; i++) {
+            for (auto &bindings: this->input_bindings) {
+                size_t size = bindings.size * bindings.dsize;
+                void *h_ptr = malloc(size);
+                memset(h_ptr, 0, size);
+                CHECK(cudaMemcpyAsync(
+                        this->device_ptrs[0],
+                        h_ptr,
+                        size,
+                        cudaMemcpyHostToDevice,
+                        this->stream)
+                );
+                free(h_ptr);
+            }
+            this->infer();
+        }
+        printf("model warmup 10 times\n");
+
+    }
+}
+
+void YOLOv8_pose::letterbox(
+        const cv::Mat &image,
+        cv::Mat &out,
+        cv::Size &size
+) {
+    const float inp_h = size.height;
+    const float inp_w = size.width;
+    float height = image.rows;
+    float width = image.cols;
+
+    float r = std::min(inp_h / height, inp_w / width);
+    int padw = std::round(width * r);
+    int padh = std::round(height * r);
+
+    cv::Mat tmp;
+    if ((int) width != padw || (int) height != padh) {
+        cv::resize(
+                image,
+                tmp,
+                cv::Size(padw, padh)
+        );
+    } else {
+        tmp = image.clone();
+    }
+
+    float dw = inp_w - padw;
+    float dh = inp_h - padh;
+
+    dw /= 2.0f;
+    dh /= 2.0f;
+    int top = int(std::round(dh - 0.1f));
+    int bottom = int(std::round(dh + 0.1f));
+    int left = int(std::round(dw - 0.1f));
+    int right = int(std::round(dw + 0.1f));
+
+    cv::copyMakeBorder(
+            tmp,
+            tmp,
+            top,
+            bottom,
+            left,
+            right,
+            cv::BORDER_CONSTANT,
+            {114, 114, 114}
+    );
+
+    cv::dnn::blobFromImage(tmp,
+                           out,
+                           1 / 255.f,
+                           cv::Size(),
+                           cv::Scalar(0, 0, 0),
+                           true,
+                           false,
+                           CV_32F
+    );
+    this->pparam.ratio = 1 / r;
+    this->pparam.dw = dw;
+    this->pparam.dh = dh;
+    this->pparam.height = height;
+    this->pparam.width = width;;
+}
+
+void YOLOv8_pose::copy_from_Mat(const cv::Mat &image) {
+    cv::Mat nchw;
+    auto &in_binding = this->input_bindings[0];
+    auto width = in_binding.dims.d[3];
+    auto height = in_binding.dims.d[2];
+    cv::Size size{width, height};
+    this->letterbox(
+            image,
+            nchw,
+            size
+    );
+
+    this->context->setBindingDimensions(
+            0,
+            nvinfer1::Dims
+                    {
+                            4,
+                            {1, 3, height, width}
+                    }
+    );
+
+    CHECK(cudaMemcpyAsync(
+            this->device_ptrs[0],
+            nchw.ptr<float>(),
+            nchw.total() * nchw.elemSize(),
+            cudaMemcpyHostToDevice,
+            this->stream)
+    );
+}
+
+void YOLOv8_pose::copy_from_Mat(const cv::Mat &image, cv::Size &size) {
+    cv::Mat nchw;
+    this->letterbox(
+            image,
+            nchw,
+            size
+    );
+    this->context->setBindingDimensions(
+            0,
+            nvinfer1::Dims
+                    {4,
+                     {1, 3, size.height, size.width}
+                    }
+    );
+    CHECK(cudaMemcpyAsync(
+            this->device_ptrs[0],
+            nchw.ptr<float>(),
+            nchw.total() * nchw.elemSize(),
+            cudaMemcpyHostToDevice,
+            this->stream)
+    );
+}
+
+void YOLOv8_pose::infer() {
+
+    this->context->enqueueV2(
+            this->device_ptrs.data(),
+            this->stream,
+            nullptr
+    );
+    for (int i = 0; i < this->num_outputs; i++) {
+        size_t osize = this->output_bindings[i].size * this->output_bindings[i].dsize;
+        CHECK(cudaMemcpyAsync(this->host_ptrs[i],
+                              this->device_ptrs[i + this->num_inputs],
+                              osize,
+                              cudaMemcpyDeviceToHost,
+                              this->stream)
+        );
+
+    }
+    cudaStreamSynchronize(this->stream);
+
+}
+
+void YOLOv8_pose::postprocess(
+        std::vector<Object> &objs,
+        float score_thres,
+        float iou_thres,
+        int topk
+) {
+    objs.clear();
+    auto num_channels = this->output_bindings[0].dims.d[1];
+    auto num_anchors = this->output_bindings[0].dims.d[2];
+
+    auto &dw = this->pparam.dw;
+    auto &dh = this->pparam.dh;
+    auto &width = this->pparam.width;
+    auto &height = this->pparam.height;
+    auto &ratio = this->pparam.ratio;
+
+    std::vector<cv::Rect> bboxes;
+    std::vector<float> scores;
+    std::vector<int> labels;
+    std::vector<int> indices;
+    std::vector<std::vector<float>> kpss;
+
+    cv::Mat output = cv::Mat(
+            num_channels,
+            num_anchors,
+            CV_32F,
+            static_cast<float *>(this->host_ptrs[0])
+    );
+    output = output.t();
+    for (int i = 0; i < num_anchors; i++) {
+        auto row_ptr = output.row(i).ptr<float>();
+        auto bboxes_ptr = row_ptr;
+        auto scores_ptr = row_ptr + 4;
+        auto kps_ptr = row_ptr + 5;
+
+        float score = *scores_ptr;
+        if (score > score_thres) {
+            float x = *bboxes_ptr++ - dw;
+            float y = *bboxes_ptr++ - dh;
+            float w = *bboxes_ptr++;
+            float h = *bboxes_ptr;
+
+            float x0 = clamp((x - 0.5f * w) * ratio, 0.f, width);
+            float y0 = clamp((y - 0.5f * h) * ratio, 0.f, height);
+            float x1 = clamp((x + 0.5f * w) * ratio, 0.f, width);
+            float y1 = clamp((y + 0.5f * h) * ratio, 0.f, height);
+
+            cv::Rect_<float> bbox;
+            bbox.x = x0;
+            bbox.y = y0;
+            bbox.width = x1 - x0;
+            bbox.height = y1 - y0;
+            std::vector<float> kps;
+            for (int k = 0; k < 17; k++) {
+                float kps_x = (*(kps_ptr + 3 * k) - dw) * ratio;
+                float kps_y = (*(kps_ptr + 3 * k + 1) - dh) * ratio;
+                float kps_s = *(kps_ptr + 3 * k + 2);
+                kps_x = clamp(kps_x, 0.f, width);
+                kps_y = clamp(kps_y, 0.f, height);
+                kps.push_back(kps_x);
+                kps.push_back(kps_y);
+                kps.push_back(kps_s);
+            }
+
+            bboxes.push_back(bbox);
+            labels.push_back(0);
+            scores.push_back(score);
+            kpss.push_back(kps);
+        }
+    }
+
+#ifdef BATCHED_NMS
+    cv::dnn::NMSBoxesBatched(
+            bboxes,
+            scores,
+            labels,
+            score_thres,
+            iou_thres,
+            indices
+    );
+#else
+    cv::dnn::NMSBoxes(
+            bboxes,
+            scores,
+            score_thres,
+            iou_thres,
+            indices
+    );
+#endif
+
+    int cnt = 0;
+    for (auto &i: indices) {
+        if (cnt >= topk) {
+            break;
+        }
+        Object obj;
+        obj.rect = bboxes[i];
+        obj.prob = scores[i];
+        obj.label = labels[i];
+        obj.kps = kpss[i];
+        objs.push_back(obj);
+        cnt += 1;
+    }
+}
+
+void YOLOv8_pose::draw_objects(
+        const cv::Mat &image,
+        cv::Mat &res,
+        const std::vector<Object> &objs,
+        const std::vector<std::vector<unsigned int>> &SKELETON,
+        const std::vector<std::vector<unsigned int>> &KPS_COLORS,
+        const std::vector<std::vector<unsigned int>> &LIMB_COLORS
+) {
+    res = image.clone();
+    const int num_point = 17;
+    for (auto &obj: objs) {
+        cv::rectangle(
+                res,
+                obj.rect,
+                {0, 0, 255},
+                2
+        );
+
+        char text[256];
+        sprintf(
+                text,
+                "person %.1f%%",
+                obj.prob * 100
+        );
+
+        int baseLine = 0;
+        cv::Size label_size = cv::getTextSize(
+                text,
+                cv::FONT_HERSHEY_SIMPLEX,
+                0.4,
+                1,
+                &baseLine
+        );
+
+        int x = (int) obj.rect.x;
+        int y = (int) obj.rect.y + 1;
+
+        if (y > res.rows)
+            y = res.rows;
+
+        cv::rectangle(
+                res,
+                cv::Rect(x, y, label_size.width, label_size.height + baseLine),
+                {0, 0, 255},
+                -1
+        );
+
+        cv::putText(
+                res,
+                text,
+                cv::Point(x, y + label_size.height),
+                cv::FONT_HERSHEY_SIMPLEX,
+                0.4,
+                {255, 255, 255},
+                1
+        );
+
+        auto &kps = obj.kps;
+        for (int k = 0; k < num_point + 2; k++) {
+            if (k < num_point) {
+                int kps_x = std::round(kps[k * 3]);
+                int kps_y = std::round(kps[k * 3 + 1]);
+                float kps_s = kps[k * 3 + 2];
+                if (kps_s > 0.5f) {
+                    cv::Scalar kps_color = cv::Scalar(KPS_COLORS[k][0], KPS_COLORS[k][1], KPS_COLORS[k][2]);
+                    cv::circle(res, {kps_x, kps_y}, 5, kps_color, -1);
+                }
+            }
+            auto &ske = SKELETON[k];
+            int pos1_x = std::round(kps[(ske[0] - 1) * 3]);
+            int pos1_y = std::round(kps[(ske[0] - 1) * 3 + 1]);
+
+            int pos2_x = std::round(kps[(ske[1] - 1) * 3]);
+            int pos2_y = std::round(kps[(ske[1] - 1) * 3 + 1]);
+
+            float pos1_s = kps[(ske[0] - 1) * 3 + 2];
+            float pos2_s = kps[(ske[1] - 1) * 3 + 2];
+
+
+            if (pos1_s > 0.5f && pos2_s > 0.5f) {
+                cv::Scalar limb_color = cv::Scalar(LIMB_COLORS[k][0], LIMB_COLORS[k][1], LIMB_COLORS[k][2]);
+                cv::line(res, {pos1_x, pos1_y}, {pos2_x, pos2_y}, limb_color, 2);
+            }
+        }
+    }
+}
+
+#endif //JETSON_POSE_YOLOV8_POSE_HPP
diff --git a/csrc/jetson/pose/main.cpp b/csrc/jetson/pose/main.cpp
new file mode 100644
index 0000000..8a0720f
--- /dev/null
+++ b/csrc/jetson/pose/main.cpp
@@ -0,0 +1,161 @@
+//
+// Created by ubuntu on 4/7/23.
+//
+#include "chrono"
+#include "yolov8-pose.hpp"
+#include "opencv2/opencv.hpp"
+
+
+const std::vector<std::vector<unsigned int>> KPS_COLORS =
+        {{0,   255, 0},
+         {0,   255, 0},
+         {0,   255, 0},
+         {0,   255, 0},
+         {0,   255, 0},
+         {255, 128, 0},
+         {255, 128, 0},
+         {255, 128, 0},
+         {255, 128, 0},
+         {255, 128, 0},
+         {255, 128, 0},
+         {51,  153, 255},
+         {51,  153, 255},
+         {51,  153, 255},
+         {51,  153, 255},
+         {51,  153, 255},
+         {51,  153, 255}};
+
+const std::vector<std::vector<unsigned int>> SKELETON = {{16, 14},
+                                                         {14, 12},
+                                                         {17, 15},
+                                                         {15, 13},
+                                                         {12, 13},
+                                                         {6,  12},
+                                                         {7,  13},
+                                                         {6,  7},
+                                                         {6,  8},
+                                                         {7,  9},
+                                                         {8,  10},
+                                                         {9,  11},
+                                                         {2,  3},
+                                                         {1,  2},
+                                                         {1,  3},
+                                                         {2,  4},
+                                                         {3,  5},
+                                                         {4,  6},
+                                                         {5,  7}};
+
+const std::vector<std::vector<unsigned int>> LIMB_COLORS = {{51,  153, 255},
+                                                            {51,  153, 255},
+                                                            {51,  153, 255},
+                                                            {51,  153, 255},
+                                                            {255, 51,  255},
+                                                            {255, 51,  255},
+                                                            {255, 51,  255},
+                                                            {255, 128, 0},
+                                                            {255, 128, 0},
+                                                            {255, 128, 0},
+                                                            {255, 128, 0},
+                                                            {255, 128, 0},
+                                                            {0,   255, 0},
+                                                            {0,   255, 0},
+                                                            {0,   255, 0},
+                                                            {0,   255, 0},
+                                                            {0,   255, 0},
+                                                            {0,   255, 0},
+                                                            {0,   255, 0}};
+
+int main(int argc, char **argv) {
+    // cuda:0
+    cudaSetDevice(0);
+
+    const std::string engine_file_path{argv[1]};
+    const std::string path{argv[2]};
+
+    std::vector<std::string> imagePathList;
+    bool isVideo{false};
+
+    assert(argc == 3);
+
+    auto yolov8_pose = new YOLOv8_pose(engine_file_path);
+    yolov8_pose->make_pipe(true);
+
+    if (IsFile(path)) {
+        std::string suffix = path.substr(path.find_last_of('.') + 1);
+        if (
+                suffix == "jpg" ||
+                suffix == "jpeg" ||
+                suffix == "png"
+                ) {
+            imagePathList.push_back(path);
+        } else if (
+                suffix == "mp4" ||
+                suffix == "avi" ||
+                suffix == "m4v" ||
+                suffix == "mpeg" ||
+                suffix == "mov" ||
+                suffix == "mkv"
+                ) {
+            isVideo = true;
+        } else {
+            printf("suffix %s is wrong !!!\n", suffix.c_str());
+            std::abort();
+        }
+    } else if (IsFolder(path)) {
+        cv::glob(path + "/*.jpg", imagePathList);
+    }
+
+    cv::Mat res, image;
+    cv::Size size = cv::Size{640, 640};
+    int topk = 100;
+    float score_thres = 0.25f;
+    float iou_thres = 0.65f;
+
+    std::vector<Object> objs;
+
+    cv::namedWindow("result", cv::WINDOW_AUTOSIZE);
+
+    if (isVideo) {
+        cv::VideoCapture cap(path);
+
+        if (!cap.isOpened()) {
+            printf("can not open %s\n", path.c_str());
+            return -1;
+        }
+        while (cap.read(image)) {
+            objs.clear();
+            yolov8_pose->copy_from_Mat(image, size);
+            auto start = std::chrono::system_clock::now();
+            yolov8_pose->infer();
+            auto end = std::chrono::system_clock::now();
+            yolov8_pose->postprocess(objs, score_thres, iou_thres, topk);
+            yolov8_pose->draw_objects(image, res, objs, SKELETON, KPS_COLORS, LIMB_COLORS);
+            auto tc = (double)
+                              std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
+            printf("cost %2.4lf ms\n", tc);
+            cv::imshow("result", res);
+            if (cv::waitKey(10) == 'q') {
+                break;
+            }
+        }
+    } else {
+        for (auto &path: imagePathList) {
+            objs.clear();
+            image = cv::imread(path);
+            yolov8_pose->copy_from_Mat(image, size);
+            auto start = std::chrono::system_clock::now();
+            yolov8_pose->infer();
+            auto end = std::chrono::system_clock::now();
+            yolov8_pose->postprocess(objs, score_thres, iou_thres, topk);
+            yolov8_pose->draw_objects(image, res, objs, SKELETON, KPS_COLORS, LIMB_COLORS);
+            auto tc = (double)
+                              std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
+            printf("cost %2.4lf ms\n", tc);
+            cv::imshow("result", res);
+            cv::waitKey(0);
+        }
+    }
+    cv::destroyAllWindows();
+    delete yolov8_pose;
+    return 0;
+}
diff --git a/csrc/pose/normal/include/yolov8-pose.hpp b/csrc/pose/normal/include/yolov8-pose.hpp
index ca31310..fa3541e 100644
--- a/csrc/pose/normal/include/yolov8-pose.hpp
+++ b/csrc/pose/normal/include/yolov8-pose.hpp
@@ -1,5 +1,5 @@
 //
-// Created by ubuntu on 1/20/23.
+// Created by ubuntu on 4/7/23.
 //
 #ifndef POSE_NORMAL_YOLOv8_pose_HPP
 #define POSE_NORMAL_YOLOv8_pose_HPP
@@ -401,7 +401,7 @@ void YOLOv8_pose::postprocess(
             iou_thres,
             indices
     );
-#elif
+#else
     cv::dnn::NMSBoxes(
         bboxes,
         scores,
diff --git a/docs/Jetson.md b/docs/Jetson.md
index c0b49b9..e10b613 100644
--- a/docs/Jetson.md
+++ b/docs/Jetson.md
@@ -139,3 +139,70 @@ Usage:
 # infer video
 ./yolov8-seg yolov8s-seg.engine data/test.mp4 # the video path
 ```
+
+
+
+## Normal Posture
+
+### 1. Export Posture Normal ONNX
+
+`yolov8s-pose.pt` is your trained pytorch model, or the official pre-trained model.
+
+Do not use any model other than pytorch model.
+Do not use [`build.py`](../build.py) to export engine if you don't know how to install pytorch and other environments on jetson.
+
+***!!! Please use the PC to execute the following script !!!***
+
+```shell
+# Export yolov8s-pose.pt to yolov8s-pose.onnx
+yolo export model=yolov8s-pose.pt format=onnx simplify=True
+```
+
+***!!! Please use the Jetson to execute the following script !!!***
+
+```shell
+# Using trtexec tools for export engine
+/usr/src/tensorrt/bin/trtexec \
+--onnx=yolov8s-pose.onnx \
+--saveEngine=yolov8s-pose.engine
+```
+
+After executing the above command, you will get an engine named `yolov8s-pose.engine` .
+
+### 2. Inference with c++
+
+It is highly recommended to use C++ inference on Jetson.
+Here is a demo: [`csrc/jetson/pose`](../csrc/jetson/pose) .
+
+#### Build:
+
+Please modify `KPS_COLORS` and `SKELETON` and `LIMB_COLORS` and postprocess parameters in [`main.cpp`](../csrc/jetson/pose/main.cpp) for yourself.
+
+```c++
+int topk = 100;
+float score_thres = 0.25f;
+float iou_thres = 0.65f;
+```
+
+And build:
+
+``` shell
+export root=${PWD}
+cd src/jetson/pose
+mkdir build
+cmake ..
+make
+mv yolov8-pose ${root}
+cd ${root}
+```
+
+Usage:
+
+``` shell
+# infer image
+./yolov8-pose yolov8s-pose.engine data/bus.jpg
+# infer images
+./yolov8-pose yolov8s-pose.engine data
+# infer video
+./yolov8-pose yolov8s-pose.engine data/test.mp4 # the video path
+```