StefanoLusardi · December 31, 2024 08:38 · olibartfast · Dec 30, 2024 · StefanoLusardi · Dec 31, 2024
diff --git a/create_yolo.py b/create_yolo.py
 from ultralytics import YOLO

 model = YOLO("yolov8n.pt")
 model.export(format="onnx", opset=12, simplify=True, dynamic=False, imgsz=640)
diff --git a/yolo_onnx.cpp b/yolo_onnx.cpp
 #include <onnxruntime/core/session/onnxruntime_cxx_api.h>
 #include <opencv2/opencv.hpp>
 #include <iostream>
 #include <vector>
 #include <string>
 #include <algorithm>
 #include <numeric>
 #include <variant>
 #include <fstream>

 struct Detection
 {
 	cv::Rect bbox;
 	float score;
 	int label;
 };

 // First, define the variant type (could be in the header file)
 using TensorElement = std::variant<float, int32_t, int64_t>;


 class Detector{
 protected:	
 	float confidenceThreshold_; 
 	float nms_threshold_ = 0.4f;	  
  	size_t network_width_;
  	size_t network_height_;	
 	std::string backend_;
    int channels_{ -1 };
 public:
 	Detector(
 	float confidenceThreshold = 0.5f, 
  	size_t network_width = -1,
  	size_t network_height = -1		
 	) :	confidenceThreshold_{confidenceThreshold},
 		network_width_ {network_width},
  		network_height_ {network_height}		 
 	{
 	}

    inline float getConfidenceThreshold(){ return confidenceThreshold_; }
    inline float getNetworkWidth() { return network_width_; }
    inline float getNetworkHeight() { return network_height_; } 

 	virtual std::vector<Detection> postprocess(const std::vector<std::vector<TensorElement>>& outputs, const std::vector<std::vector<int64_t>>& shapes, const cv::Size& frame_size) = 0;
    virtual cv::Mat preprocess_image(const cv::Mat& image) = 0; 


 };

 struct Output
 {
    std::vector<cv::Rect> boxes;
    std::vector<float> confs;
    std::vector<int> classIds;
 };

 class YoloVn : public Detector
 {
 public:
    YoloVn(
        float confidenceThreshold = 0.25,
        size_t network_width = 640,
        size_t network_height = 640);

    std::vector<Detection> postprocess(const std::vector<std::vector<TensorElement>> &outputs, const std::vector<std::vector<int64_t>> &shapes, const cv::Size &frame_size) override;
    cv::Mat preprocess_image(const cv::Mat &image) override;

    cv::Rect get_rect(const cv::Size &imgSz, const std::vector<float> &bbox);

    std::tuple<std::vector<cv::Rect>, std::vector<float>, std::vector<int>> postprocess_v567(const TensorElement *output, const std::vector<int64_t> &shape, const cv::Size &frame_size);
    std::tuple<std::vector<cv::Rect>, std::vector<float>, std::vector<int>> postprocess_ultralytics(const TensorElement *output, const std::vector<int64_t> &shape, const cv::Size &frame_size);
    Output postprocess_ultralytics2(const float *output, const std::vector<int64_t> &shape, const cv::Size &frame_size);
 };

 YoloVn::YoloVn(
    float confidenceThreshold,
    size_t network_width,
    size_t network_height) : Detector{confidenceThreshold,
                                      network_width,
                                      network_height}
 {
 }

 cv::Mat YoloVn::preprocess_image(const cv::Mat &img)
 {
    int w, h, x, y;
    float r_w = network_width_ / (img.cols * 1.0);
    float r_h = network_height_ / (img.rows * 1.0);
    if (r_h > r_w)
    {
        w = network_width_;
        h = r_w * img.rows;
        x = 0;
        y = (network_height_ - h) / 2;
    }
    else
    {
        w = r_h * img.cols;
        h = network_height_;
        x = (network_width_ - w) / 2;
        y = 0;
    }
    cv::Mat re(h, w, CV_8UC3);
    cv::resize(img, re, re.size(), 0, 0, cv::INTER_LINEAR);
    cv::Mat out(network_width_, network_height_, CV_8UC3, cv::Scalar(128, 128, 128));
    re.copyTo(out(cv::Rect(x, y, re.cols, re.rows)));
    cv::cvtColor(out, out, cv::COLOR_BGR2RGB);
    out.convertTo(out, CV_32F, 1.0 / 255.0);

    return out;
 }

 cv::Rect YoloVn::get_rect(const cv::Size &imgSz, const std::vector<float> &bbox)
 {
    float r_w = network_width_ / static_cast<float>(imgSz.width);
    float r_h = network_height_ / static_cast<float>(imgSz.height);

    int l, r, t, b;
    if (r_h > r_w)
    {
        l = bbox[0] - bbox[2] / 2.f;
        r = bbox[0] + bbox[2] / 2.f;
        t = bbox[1] - bbox[3] / 2.f - (network_height_ - r_w * imgSz.height) / 2;
        b = bbox[1] + bbox[3] / 2.f - (network_height_ - r_w * imgSz.height) / 2;
        l /= r_w;
        r /= r_w;
        t /= r_w;
        b /= r_w;
    }
    else
    {
        l = bbox[0] - bbox[2] / 2.f - (network_width_ - r_h * imgSz.width) / 2;
        r = bbox[0] + bbox[2] / 2.f - (network_width_ - r_h * imgSz.width) / 2;
        t = bbox[1] - bbox[3] / 2.f;
        b = bbox[1] + bbox[3] / 2.f;
        l /= r_h;
        r /= r_h;
        t /= r_h;
        b /= r_h;
    }

    // Clamp the coordinates within the image bounds
    l = std::max(0, std::min(l, imgSz.width - 1));
    r = std::max(0, std::min(r, imgSz.width - 1));
    t = std::max(0, std::min(t, imgSz.height - 1));
    b = std::max(0, std::min(b, imgSz.height - 1));

    return cv::Rect(l, t, r - l, b - t);
 }

 std::tuple<std::vector<cv::Rect>, std::vector<float>, std::vector<int>> YoloVn::postprocess_v567(const TensorElement *output, const std::vector<int64_t> &shape, const cv::Size &frame_size)
 {
    std::vector<cv::Rect> boxes;
    std::vector<float> confs;
    std::vector<int> classIds;

    const auto offset = 5;
    const auto num_classes = shape[2] - offset; // 1 x 25200 x 85

    for (int i = 0; i < shape[1]; ++i)
    {
        const auto obj_conf = std::get<float>(output[4]);
        auto maxSPtr = std::max_element(output + 5, output + 5 + num_classes,
                                        [](const TensorElement &a, const TensorElement &b)
                                        {
                                            return std::get<float>(a) < std::get<float>(b);
                                        });

        float score = std::get<float>(*maxSPtr) * obj_conf;
        if (score > confidenceThreshold_)
        {
            std::vector<float> bbox;
            for (int j = 0; j < 4; ++j)
            {
                bbox.emplace_back(std::get<float>(output[j]));
            }
            boxes.emplace_back(get_rect(frame_size, bbox));
            int label = maxSPtr - (output + 5);
            confs.emplace_back(score);
            classIds.emplace_back(label);
        }
        output += shape[2];
    }
    return std::make_tuple(boxes, confs, classIds);
 }

 std::tuple<std::vector<cv::Rect>, std::vector<float>, std::vector<int>> YoloVn::postprocess_ultralytics(const TensorElement *output, const std::vector<int64_t> &shape, const cv::Size &frame_size)
 {
    std::vector<cv::Rect> boxes;
    std::vector<float> confs;
    std::vector<int> classIds;

    const auto offset = 4;
    const auto num_classes = shape[1] - offset;
    std::vector<std::vector<float>> output_matrix(shape[1], std::vector<float>(shape[2]));

    // Construct output matrix
    for (size_t i = 0; i < shape[1]; ++i)
    {
        for (size_t j = 0; j < shape[2]; ++j)
        {
            output_matrix[i][j] = std::get<float>(output[i * shape[2] + j]);
        }
    }

    std::vector<std::vector<float>> transposed_output(shape[2], std::vector<float>(shape[1]));

    // Transpose output matrix
    for (int i = 0; i < shape[1]; ++i)
    {
        for (int j = 0; j < shape[2]; ++j)
        {
            transposed_output[j][i] = output_matrix[i][j];
        }
    }

    // Get all the YOLO proposals
    for (int i = 0; i < shape[2]; ++i)
    {
        const auto &row = transposed_output[i];
        const float *bboxesPtr = row.data();
        const float *scoresPtr = bboxesPtr + 4;
        auto maxSPtr = std::max_element(scoresPtr, scoresPtr + num_classes);
        float score = *maxSPtr;
        if (score > confidenceThreshold_)
        {
            boxes.emplace_back(get_rect(frame_size, std::vector<float>(bboxesPtr, bboxesPtr + 4)));
            int label = maxSPtr - scoresPtr;
            confs.emplace_back(score);
            classIds.emplace_back(label);
        }
    }
    return std::make_tuple(boxes, confs, classIds);
 }

 Output YoloVn::postprocess_ultralytics2(const float *output, const std::vector<int64_t> &shape, const cv::Size &frame_size)
 {
    std::vector<cv::Rect> boxes;
    std::vector<float> confs;
    std::vector<int> classIds;

    const auto offset = 4;
    const auto num_classes = shape[1] - offset;
    std::vector<std::vector<float>> output_matrix(shape[1], std::vector<float>(shape[2]));

    // Construct output matrix
    for (size_t i = 0; i < shape[1]; ++i)
    {
        for (size_t j = 0; j < shape[2]; ++j)
        {
            output_matrix[i][j] = output[i * shape[2] + j];
        }
    }

    std::vector<std::vector<float>> transposed_output(shape[2], std::vector<float>(shape[1]));

    // Transpose output matrix
    for (int i = 0; i < shape[1]; ++i)
    {
        for (int j = 0; j < shape[2]; ++j)
        {
            transposed_output[j][i] = output_matrix[i][j];
        }
    }

    // Get all the YOLO proposals
    for (int i = 0; i < shape[2]; ++i)
    {
        const auto &row = transposed_output[i];
        const float *bboxesPtr = row.data();
        const float *scoresPtr = bboxesPtr + 4;
        auto maxSPtr = std::max_element(scoresPtr, scoresPtr + num_classes);
        float score = *maxSPtr;
        if (score > confidenceThreshold_)
        {
            boxes.emplace_back(get_rect(frame_size, std::vector<float>(bboxesPtr, bboxesPtr + 4)));
            int label = maxSPtr - scoresPtr;
            confs.emplace_back(score);
            classIds.emplace_back(label);
        }
    }

    return Output{boxes, confs, classIds};
    // return std::make_tuple(boxes, confs, classIds);
 }

 std::vector<Detection> YoloVn::postprocess(const std::vector<std::vector<TensorElement>> &outputs, const std::vector<std::vector<int64_t>> &shapes, const cv::Size &frame_size)
 {
    const TensorElement *output0 = outputs.front().data();
    const std::vector<int64_t> shape0 = shapes.front();

    const auto [boxes, confs, classIds] = (shape0[1] > shape0[2]) ? postprocess_v567(output0, shape0, frame_size) : postprocess_ultralytics(output0, shape0, frame_size);

    // Perform Non Maximum Suppression and draw predictions.
    std::vector<int> indices;
    cv::dnn::NMSBoxes(boxes, confs, confidenceThreshold_, nms_threshold_, indices);
    std::vector<Detection> detections;
    for (int i = 0; i < indices.size(); i++)
    {
        Detection det;
        int idx = indices[i];
        det.label = classIds[idx];
        det.bbox = boxes[idx];
        det.score = confs[idx];
        detections.emplace_back(det);
    }
    return detections;
 }

 std::vector<float> blob2vec(const cv::Mat& input_blob)
 {

    const auto channels = input_blob.size[1];
    const auto network_width = input_blob.size[2];
    const auto network_height = input_blob.size[3];
    size_t img_byte_size = network_width * network_height * channels * sizeof(float);  // Allocate a buffer to hold all image elements.
    std::vector<float> input_data = std::vector<float>(network_width * network_height * channels);
    std::memcpy(input_data.data(), input_blob.data, img_byte_size);

    std::vector<cv::Mat> chw;
    for (size_t i = 0; i < channels; ++i)
    {
        chw.emplace_back(cv::Mat(cv::Size(network_width, network_height), CV_32FC1, &(input_data[i * network_width * network_height])));
    }
    cv::split(input_blob, chw);

    return input_data;    
 }

 int main(int argc, char **argv)
 {
    const std::string model_path = "../../models/yolov8n.onnx";
    const std::string image_path = "../../img/dog.png";
    // const std::string image_path = "../../img/cat.jpeg";

    YoloVn yolo;
    cv::Mat img = cv::imread(image_path, cv::IMREAD_COLOR);
    cv::Mat input_image = yolo.preprocess_image(img);

    cv::Mat blob;
    cv::dnn::blobFromImage(input_image, blob, 1.0, cv::Size(), cv::Scalar(), false, false);
    auto blob_vec = blob2vec(blob);

    Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "YOLOv8");
    Ort::SessionOptions session_options;
    session_options.SetIntraOpNumThreads(1);
    Ort::Session session(env, model_path.c_str(), session_options);
    auto memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);

    auto input_shape = session.GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
    auto input_tensor = Ort::Value::CreateTensor<float>(
        memory_info,
        blob_vec.data(),
        blob_vec.size(),
        input_shape.data(),
        input_shape.size()
    );

    std::vector<const char*> input_names = {"images"};
    std::vector<const char*> output_names = {"output0"};

    std::vector<Ort::Value> infer_output = session.Run(
        Ort::RunOptions{nullptr},
        input_names.data(),
        &input_tensor,
        1, // input_count
        output_names.data(),
        1 // output_count
    );

    // Postprocess output
    float* outputs_raw = infer_output[0].GetTensorMutableData<float>();
    std::vector<int64_t> output_shape = session.GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();

    const auto output_postprocess = yolo.postprocess_ultralytics2(outputs_raw, output_shape, img.size());

    // Print detections
    std::cout << "Detection results:\n";
    for (int i = 0; i < output_postprocess.classIds.size(); ++i)
    {
        std::cout
            << "Class: " << output_postprocess.classIds[i]
            << ", Confidence: " << output_postprocess.confs[i]
            << std::endl;
    }

    return EXIT_SUCCESS;
 }
	from ultralytics import YOLO

	model = YOLO("yolov8n.pt")
	model.export(format="onnx", opset=12, simplify=True, dynamic=False, imgsz=640)
	#include <onnxruntime/core/session/onnxruntime_cxx_api.h>
	#include <opencv2/opencv.hpp>
	#include <iostream>
	#include <vector>
	#include <string>
	#include <algorithm>
	#include <numeric>
	#include <variant>
	#include <fstream>

	struct Detection
	{
	cv::Rect bbox;
	float score;
	int label;
	};

	// First, define the variant type (could be in the header file)
	using TensorElement = std::variant<float, int32_t, int64_t>;


	class Detector{
	protected:
	float confidenceThreshold_;
	float nms_threshold_ = 0.4f;
	size_t network_width_;
	size_t network_height_;
	std::string backend_;
	int channels_{ -1 };
	public:
	Detector(
	float confidenceThreshold = 0.5f,
	size_t network_width = -1,
	size_t network_height = -1
	) : confidenceThreshold_{confidenceThreshold},
	network_width_ {network_width},
	network_height_ {network_height}
	{
	}

	inline float getConfidenceThreshold(){ return confidenceThreshold_; }
	inline float getNetworkWidth() { return network_width_; }
	inline float getNetworkHeight() { return network_height_; }

	virtual std::vector<Detection> postprocess(const std::vector<std::vector<TensorElement>>& outputs, const std::vector<std::vector<int64_t>>& shapes, const cv::Size& frame_size) = 0;
	virtual cv::Mat preprocess_image(const cv::Mat& image) = 0;


	};

	struct Output
	{
	std::vector<cv::Rect> boxes;
	std::vector<float> confs;
	std::vector<int> classIds;
	};

	class YoloVn : public Detector
	{
	public:
	YoloVn(
	float confidenceThreshold = 0.25,
	size_t network_width = 640,
	size_t network_height = 640);

	std::vector<Detection> postprocess(const std::vector<std::vector<TensorElement>> &outputs, const std::vector<std::vector<int64_t>> &shapes, const cv::Size &frame_size) override;
	cv::Mat preprocess_image(const cv::Mat &image) override;

	cv::Rect get_rect(const cv::Size &imgSz, const std::vector<float> &bbox);

	std::tuple<std::vector<cv::Rect>, std::vector<float>, std::vector<int>> postprocess_v567(const TensorElement *output, const std::vector<int64_t> &shape, const cv::Size &frame_size);
	std::tuple<std::vector<cv::Rect>, std::vector<float>, std::vector<int>> postprocess_ultralytics(const TensorElement *output, const std::vector<int64_t> &shape, const cv::Size &frame_size);
	Output postprocess_ultralytics2(const float *output, const std::vector<int64_t> &shape, const cv::Size &frame_size);
	};

	YoloVn::YoloVn(
	float confidenceThreshold,
	size_t network_width,
	size_t network_height) : Detector{confidenceThreshold,
	network_width,
	network_height}
	{
	}

	cv::Mat YoloVn::preprocess_image(const cv::Mat &img)
	{
	int w, h, x, y;
	float r_w = network_width_ / (img.cols * 1.0);
	float r_h = network_height_ / (img.rows * 1.0);
	if (r_h > r_w)
	{
	w = network_width_;
	h = r_w * img.rows;
	x = 0;
	y = (network_height_ - h) / 2;
	}
	else
	{
	w = r_h * img.cols;
	h = network_height_;
	x = (network_width_ - w) / 2;
	y = 0;
	}
	cv::Mat re(h, w, CV_8UC3);
	cv::resize(img, re, re.size(), 0, 0, cv::INTER_LINEAR);
	cv::Mat out(network_width_, network_height_, CV_8UC3, cv::Scalar(128, 128, 128));
	re.copyTo(out(cv::Rect(x, y, re.cols, re.rows)));
	cv::cvtColor(out, out, cv::COLOR_BGR2RGB);
	out.convertTo(out, CV_32F, 1.0 / 255.0);

	return out;
	}

	cv::Rect YoloVn::get_rect(const cv::Size &imgSz, const std::vector<float> &bbox)
	{
	float r_w = network_width_ / static_cast<float>(imgSz.width);
	float r_h = network_height_ / static_cast<float>(imgSz.height);

	int l, r, t, b;
	if (r_h > r_w)
	{
	l = bbox[0] - bbox[2] / 2.f;
	r = bbox[0] + bbox[2] / 2.f;
	t = bbox[1] - bbox[3] / 2.f - (network_height_ - r_w * imgSz.height) / 2;
	b = bbox[1] + bbox[3] / 2.f - (network_height_ - r_w * imgSz.height) / 2;
	l /= r_w;
	r /= r_w;
	t /= r_w;
	b /= r_w;
	}
	else
	{
	l = bbox[0] - bbox[2] / 2.f - (network_width_ - r_h * imgSz.width) / 2;
	r = bbox[0] + bbox[2] / 2.f - (network_width_ - r_h * imgSz.width) / 2;
	t = bbox[1] - bbox[3] / 2.f;
	b = bbox[1] + bbox[3] / 2.f;
	l /= r_h;
	r /= r_h;
	t /= r_h;
	b /= r_h;
	}

	// Clamp the coordinates within the image bounds
	l = std::max(0, std::min(l, imgSz.width - 1));
	r = std::max(0, std::min(r, imgSz.width - 1));
	t = std::max(0, std::min(t, imgSz.height - 1));
	b = std::max(0, std::min(b, imgSz.height - 1));

	return cv::Rect(l, t, r - l, b - t);
	}

	std::tuple<std::vector<cv::Rect>, std::vector<float>, std::vector<int>> YoloVn::postprocess_v567(const TensorElement *output, const std::vector<int64_t> &shape, const cv::Size &frame_size)
	{
	std::vector<cv::Rect> boxes;
	std::vector<float> confs;
	std::vector<int> classIds;

	const auto offset = 5;
	const auto num_classes = shape[2] - offset; // 1 x 25200 x 85

	for (int i = 0; i < shape[1]; ++i)
	{
	const auto obj_conf = std::get<float>(output[4]);
	auto maxSPtr = std::max_element(output + 5, output + 5 + num_classes,
	[](const TensorElement &a, const TensorElement &b)
	{
	return std::get<float>(a) < std::get<float>(b);
	});

	float score = std::get<float>(maxSPtr) obj_conf;
	if (score > confidenceThreshold_)
	{
	std::vector<float> bbox;
	for (int j = 0; j < 4; ++j)
	{
	bbox.emplace_back(std::get<float>(output[j]));
	}
	boxes.emplace_back(get_rect(frame_size, bbox));
	int label = maxSPtr - (output + 5);
	confs.emplace_back(score);
	classIds.emplace_back(label);
	}
	output += shape[2];
	}
	return std::make_tuple(boxes, confs, classIds);
	}

	std::tuple<std::vector<cv::Rect>, std::vector<float>, std::vector<int>> YoloVn::postprocess_ultralytics(const TensorElement *output, const std::vector<int64_t> &shape, const cv::Size &frame_size)
	{
	std::vector<cv::Rect> boxes;
	std::vector<float> confs;
	std::vector<int> classIds;

	const auto offset = 4;
	const auto num_classes = shape[1] - offset;
	std::vector<std::vector<float>> output_matrix(shape[1], std::vector<float>(shape[2]));

	// Construct output matrix
	for (size_t i = 0; i < shape[1]; ++i)
	{
	for (size_t j = 0; j < shape[2]; ++j)
	{
	output_matrix[i][j] = std::get<float>(output[i * shape[2] + j]);
	}
	}

	std::vector<std::vector<float>> transposed_output(shape[2], std::vector<float>(shape[1]));

	// Transpose output matrix
	for (int i = 0; i < shape[1]; ++i)
	{
	for (int j = 0; j < shape[2]; ++j)
	{
	transposed_output[j][i] = output_matrix[i][j];
	}
	}

	// Get all the YOLO proposals
	for (int i = 0; i < shape[2]; ++i)
	{
	const auto &row = transposed_output[i];
	const float *bboxesPtr = row.data();
	const float *scoresPtr = bboxesPtr + 4;
	auto maxSPtr = std::max_element(scoresPtr, scoresPtr + num_classes);
	float score = *maxSPtr;
	if (score > confidenceThreshold_)
	{
	boxes.emplace_back(get_rect(frame_size, std::vector<float>(bboxesPtr, bboxesPtr + 4)));
	int label = maxSPtr - scoresPtr;
	confs.emplace_back(score);
	classIds.emplace_back(label);
	}
	}
	return std::make_tuple(boxes, confs, classIds);
	}

	Output YoloVn::postprocess_ultralytics2(const float *output, const std::vector<int64_t> &shape, const cv::Size &frame_size)
	{
	std::vector<cv::Rect> boxes;
	std::vector<float> confs;
	std::vector<int> classIds;

	const auto offset = 4;
	const auto num_classes = shape[1] - offset;
	std::vector<std::vector<float>> output_matrix(shape[1], std::vector<float>(shape[2]));

	// Construct output matrix
	for (size_t i = 0; i < shape[1]; ++i)
	{
	for (size_t j = 0; j < shape[2]; ++j)
	{
	output_matrix[i][j] = output[i * shape[2] + j];
	}
	}

	std::vector<std::vector<float>> transposed_output(shape[2], std::vector<float>(shape[1]));

	// Transpose output matrix
	for (int i = 0; i < shape[1]; ++i)
	{
	for (int j = 0; j < shape[2]; ++j)
	{
	transposed_output[j][i] = output_matrix[i][j];
	}
	}

	// Get all the YOLO proposals
	for (int i = 0; i < shape[2]; ++i)
	{
	const auto &row = transposed_output[i];
	const float *bboxesPtr = row.data();
	const float *scoresPtr = bboxesPtr + 4;
	auto maxSPtr = std::max_element(scoresPtr, scoresPtr + num_classes);
	float score = *maxSPtr;
	if (score > confidenceThreshold_)
	{
	boxes.emplace_back(get_rect(frame_size, std::vector<float>(bboxesPtr, bboxesPtr + 4)));
	int label = maxSPtr - scoresPtr;
	confs.emplace_back(score);
	classIds.emplace_back(label);
	}
	}

	return Output{boxes, confs, classIds};
	// return std::make_tuple(boxes, confs, classIds);
	}

	std::vector<Detection> YoloVn::postprocess(const std::vector<std::vector<TensorElement>> &outputs, const std::vector<std::vector<int64_t>> &shapes, const cv::Size &frame_size)
	{
	const TensorElement *output0 = outputs.front().data();
	const std::vector<int64_t> shape0 = shapes.front();

	const auto [boxes, confs, classIds] = (shape0[1] > shape0[2]) ? postprocess_v567(output0, shape0, frame_size) : postprocess_ultralytics(output0, shape0, frame_size);

	// Perform Non Maximum Suppression and draw predictions.
	std::vector<int> indices;
	cv::dnn::NMSBoxes(boxes, confs, confidenceThreshold_, nms_threshold_, indices);
	std::vector<Detection> detections;
	for (int i = 0; i < indices.size(); i++)
	{
	Detection det;
	int idx = indices[i];
	det.label = classIds[idx];
	det.bbox = boxes[idx];
	det.score = confs[idx];
	detections.emplace_back(det);
	}
	return detections;
	}

	std::vector<float> blob2vec(const cv::Mat& input_blob)
	{

	const auto channels = input_blob.size[1];
	const auto network_width = input_blob.size[2];
	const auto network_height = input_blob.size[3];
	size_t img_byte_size = network_width * network_height * channels * sizeof(float); // Allocate a buffer to hold all image elements.
	std::vector<float> input_data = std::vector<float>(network_width * network_height * channels);
	std::memcpy(input_data.data(), input_blob.data, img_byte_size);

	std::vector<cv::Mat> chw;
	for (size_t i = 0; i < channels; ++i)
	{
	chw.emplace_back(cv::Mat(cv::Size(network_width, network_height), CV_32FC1, &(input_data[i * network_width * network_height])));
	}
	cv::split(input_blob, chw);

	return input_data;
	}

	int main(int argc, char **argv)
	{
	const std::string model_path = "../../models/yolov8n.onnx";
	const std::string image_path = "../../img/dog.png";
	// const std::string image_path = "../../img/cat.jpeg";

	YoloVn yolo;
	cv::Mat img = cv::imread(image_path, cv::IMREAD_COLOR);
	cv::Mat input_image = yolo.preprocess_image(img);

	cv::Mat blob;
	cv::dnn::blobFromImage(input_image, blob, 1.0, cv::Size(), cv::Scalar(), false, false);
	auto blob_vec = blob2vec(blob);

	Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "YOLOv8");
	Ort::SessionOptions session_options;
	session_options.SetIntraOpNumThreads(1);
	Ort::Session session(env, model_path.c_str(), session_options);
	auto memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);

	auto input_shape = session.GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
	auto input_tensor = Ort::Value::CreateTensor<float>(
	memory_info,
	blob_vec.data(),
	blob_vec.size(),
	input_shape.data(),
	input_shape.size()
	);

	std::vector<const char*> input_names = {"images"};
	std::vector<const char*> output_names = {"output0"};

	std::vector<Ort::Value> infer_output = session.Run(
	Ort::RunOptions{nullptr},
	input_names.data(),
	&input_tensor,
	1, // input_count
	output_names.data(),
	1 // output_count
	);

	// Postprocess output
	float* outputs_raw = infer_output[0].GetTensorMutableData<float>();
	std::vector<int64_t> output_shape = session.GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();

	const auto output_postprocess = yolo.postprocess_ultralytics2(outputs_raw, output_shape, img.size());

	// Print detections
	std::cout << "Detection results:\n";
	for (int i = 0; i < output_postprocess.classIds.size(); ++i)
	{
	std::cout
	<< "Class: " << output_postprocess.classIds[i]
	<< ", Confidence: " << output_postprocess.confs[i]
	<< std::endl;
	}

	return EXIT_SUCCESS;
	}