YOLO11 旋转目标检测| | TensorRT量化加速

最新推荐文章于 2025-06-19 17:06:56 发布

小充

最新推荐文章于 2025-06-19 17:06:56 发布

阅读量212

点赞数 2

CC 4.0 BY-SA版权

分类专栏：图像加速深度学习推理 CUDA 文章标签：目标检测 TensorRT 模型量化

本文链接：https://blog.csdn.net/u011660055/article/details/148721546

图像加速同时被 3 个专栏收录

5 篇文章

订阅专栏

深度学习推理

2 篇文章

订阅专栏

CUDA

2 篇文章

订阅专栏

1.转换onnx模型，同之前推理部分介绍

2.创建校准器 Int8EntropyCalibrator2

参考：TensorRT INT8量化代码 | 奔跑的IC

#ifndef CALIBRATOR_H
#define CALIBRATOR_H

#include <string>
#include <vector>
#include <NvInfer.h>

using namespace nvinfer1;


class Int8EntropyCalibrator2 : public IInt8EntropyCalibrator2
{
public:
    Int8EntropyCalibrator2(int batch_size, int input_w, int input_h, const char* img_dir, const char* calib_table_name, bool read_cache=true);

    virtual ~Int8EntropyCalibrator2();
    int getBatchSize() const noexcept override;
    bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override;
    const void* readCalibrationCache(size_t& length) noexcept override;
    void writeCalibrationCache(const void* cache, size_t length) noexcept override;

private:
    int batch_size_;
    int input_w_;
    int input_h_;
    int img_idx_;
    std::string img_dir_;
    std::vector<std::string> img_files_;
    size_t input_count_;
    std::string calib_table_name_;
    bool read_cache_;
    float* batch_data;
    void* device_input_;
    std::vector<char> calib_cache_;
};

#endif  // CALIBRATOR_H

//实现部分

#include <iostream>
#include <fstream>
#include <iterator>
#include <filesystem>
#include <opencv2/opencv.hpp>
#include "calibrator.h"


using namespace nvinfer1;

namespace fs = std::filesystem;

static inline int read_files_in_dir(const char* p_dir_name, std::vector<std::string>& file_names)
{
    try {
        // 使用 filesystem 目录迭代器
        for (const auto& entry : fs::directory_iterator(p_dir_name)) {
            // 获取文件名（不含路径）
            std::string filename = entry.path().filename().string();
            
            // 跳过 "." 和 ".." 目录
            if (filename != "." && filename != "..") {
                file_names.push_back(std::move(filename));
            }
        }
        return 0;  // 成功
    }
    catch (const fs::filesystem_error& e) {
        // 处理文件系统错误（目录不存在/无权限等）
        return -1;
    }
    catch (...) {
        // 处理其他可能的异常
        return -1;
    }
}
std::vector<float> preprocess(cv::Mat& img, int input_w, int input_h)
{
    int elements = 3 * input_h * input_w;

    // letterbox and resize
    int w, h, x, y;
    float r_w = input_w / (img.cols * 1.0);
    float r_h = input_h / (img.rows * 1.0);
    if (r_h > r_w){
        w = input_w;
        h = r_w * img.rows;
        x = 0;
        y = (input_h - h) / 2;
    }
    else {
        w = r_h * img.cols;
        h = input_h;
        x = (input_w - w) / 2;
        y = 0;
    }
    cv::Mat re(h, w, CV_8UC3);
    cv::resize(img, re, re.size(), 0, 0, cv::INTER_LINEAR);
    cv::Mat out(input_h, input_w, CV_8UC3, cv::Scalar(128, 128, 128));
    re.copyTo(out(cv::Rect(x, y, re.cols, re.rows)));

    // HWC to CHW , BGR to RGB, Normalize
    std::vector<float> result(elements);
    float* norm_data = result.data();  // normalized data
    uchar* uc_pixel = out.data;
    for (int i = 0; i < input_h * input_w; i++)
    {
        norm_data[i] = (float)uc_pixel[2] / 255.0;
        norm_data[i + input_h * input_w] = (float)uc_pixel[1] / 255.0;
        norm_data[i + 2 * input_h * input_w] = (float)uc_pixel[0] / 255.0;
        uc_pixel += 3;
    }

    return result;
}


Int8EntropyCalibrator2::Int8EntropyCalibrator2(int batch_size, int input_w, int input_h, const char* img_dir, const char* calib_table_name, bool read_cache)
    : batch_size_(batch_size)
    , input_w_(input_w)
    , input_h_(input_h)
    , img_idx_(0)
    , img_dir_(img_dir)
    , calib_table_name_(calib_table_name)
    , read_cache_(read_cache)
{
    input_count_ = 3 * input_w * input_h * batch_size;
    // allocate memory for a batch of data, batchData is for CPU, deviceInput is for GPU
    batch_data = new float[input_count_];
    cudaMalloc(&device_input_, input_count_ * sizeof(float));
    read_files_in_dir(img_dir, img_files_);
}

Int8EntropyCalibrator2::~Int8EntropyCalibrator2()
{
    cudaFree(device_input_);
    if (batch_data) {
        delete[] batch_data;
    }
}

int Int8EntropyCalibrator2::getBatchSize() const noexcept
{
    return batch_size_;
}

bool Int8EntropyCalibrator2::getBatch(void* bindings[], const char* names[], int nbBindings) noexcept
{
    if (img_idx_ + batch_size_ > (int)img_files_.size()) { return false; }

    float *ptr = batch_data;
    for (int i = img_idx_; i < img_idx_ + batch_size_; i++)
    {
        std::cout << img_files_[i] << "  " << i << std::endl;
        cv::Mat temp = cv::imread(img_dir_ + "/" + img_files_[i], cv::IMREAD_COLOR);
        if (temp.empty()){
            std::cerr << "Fatal error: image cannot open!" << std::endl;
            return false;
        }
        std::vector<float> input_data = preprocess(temp, input_w_, input_h_);
        memcpy(ptr, input_data.data(), (int)(input_data.size()) * sizeof(float));
        ptr += input_data.size();
    }
    img_idx_ += batch_size_;

    cudaMemcpy(device_input_, batch_data, input_count_ * sizeof(float), cudaMemcpyHostToDevice);
    bindings[0] = device_input_;
    return true;
}

const void* Int8EntropyCalibrator2::readCalibrationCache(size_t& length) noexcept
{
    std::cout << "reading calib cache: " << calib_table_name_ << std::endl;
    calib_cache_.clear();
    std::ifstream input(calib_table_name_, std::ios::binary);
    input >> std::noskipws;
    if (read_cache_ && input.good())
    {
        std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(calib_cache_));
    }
    length = calib_cache_.size();
    return length ? calib_cache_.data() : nullptr;
}

void Int8EntropyCalibrator2::writeCalibrationCache(const void* cache, size_t length) noexcept
{
    std::cout << "writing calib cache: " << calib_table_name_ << " size: " << length << std::endl;
    std::ofstream output(calib_table_name_, std::ios::binary);
    output.write(reinterpret_cast<const char*>(cache), length);
}

3.模型转换部分

#include <iostream>
#include <fstream>
#include <NvInfer.h>
#include <NvOnnxParser.h>
#include "Calibrator.h"
using namespace nvinfer1;
using namespace nvonnxparser;

// 定义配置参数结构体
struct Args {
    std::string onnx_file_path;
    std::string engine_file_path;
    std::string mode = "int8"; // fp32, fp16, int8
    int batch_size = 1;
    int channel = 3;
    int height = 640;
    int width = 640;
    bool dynamic = false;
    // 对于 int8 模式，需要添加校准器
};

class Logger : public ILogger {
public:
    void log(Severity severity, const char* msg) noexcept override {
        if (severity <= Severity::kWARNING) {
            std::cout << msg << std::endl;
        }
    }
};

// 主转换函数
bool ONNX2TRT(const Args& args, IInt8Calibrator* calib = nullptr) {
    // 验证模式
    if (args.mode != "fp32" && args.mode != "fp16" && args.mode != "int8") {
        std::cerr << "Error: mode should be in ['fp32', 'fp16', 'int8']" << std::endl;
        return false;
    }

    Logger logger;
    
    // 创建构建器和网络
    auto builder = std::unique_ptr<IBuilder>(createInferBuilder(logger));
    if (!builder) {
        std::cerr << "Failed to create builder" << std::endl;
        return false;
    }

    const auto explicitBatch = 1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
    auto network = std::unique_ptr<INetworkDefinition>(builder->createNetworkV2(explicitBatch));
    if (!network) {
        std::cerr << "Failed to create network" << std::endl;
        return false;
    }

    // 创建 ONNX 解析器
    auto parser = std::unique_ptr<IParser>(createParser(*network, logger));
    if (!parser) {
        std::cerr << "Failed to create ONNX parser" << std::endl;
        return false;
    }

    // 设置构建器参数
    builder->setMaxBatchSize(args.batch_size);
    auto config = std::unique_ptr<IBuilderConfig>(builder->createBuilderConfig());
    config->setMaxWorkspaceSize(1 << 30); // 1GB

    // 处理动态输入
    if (args.dynamic) {
        auto profile = builder->createOptimizationProfile();
        profile->setDimensions(
            "images", 
            OptProfileSelector::kMIN, 
            Dims4{1, args.channel, args.height, args.width}
        );
        profile->setDimensions(
            "images", 
            OptProfileSelector::kOPT, 
            Dims4{2, args.channel, args.height, args.width}
        );
        profile->setDimensions(
            "images", 
            OptProfileSelector::kMAX, 
            Dims4{4, args.channel, args.height, args.width}
        );
        config->addOptimizationProfile(profile);
    }

    // 设置精度模式
    if (args.mode == "int8") {
        if (!builder->platformHasFastInt8()) {
            std::cerr << "Platform does not support int8 inference" << std::endl;
            return false;
        }
        if (!calib) {
            std::cerr << "int8 mode requires a calibrator" << std::endl;
            return false;
        }
        config->setFlag(BuilderFlag::kINT8);
        config->setInt8Calibrator(calib);
    } 
    else if (args.mode == "fp16") {
        if (!builder->platformHasFastFp16()) {
            std::cerr << "Platform does not support fp16 inference" << std::endl;
            return false;
        }
        config->setFlag(BuilderFlag::kFP16);
    }

    // 解析 ONNX 模型
    std::cout << "Loading ONNX file from path " << args.onnx_file_path << "..." << std::endl;
    if (!parser->parseFromFile(args.onnx_file_path.c_str(), 
                              static_cast<int>(ILogger::Severity::kWARNING))) {
        std::cerr << "Failed to parse ONNX model" << std::endl;
        for (int i = 0; i < parser->getNbErrors(); ++i) {
            std::cerr << parser->getError(i)->desc() << std::endl;
        }
        return false;
    }
    std::cout << "Parsing ONNX file complete!" << std::endl;

    // 构建引擎
    std::cout << "Building an engine from file " << args.onnx_file_path 
              << "; this may take a while..." << std::endl;
    
    auto engine = std::shared_ptr<ICudaEngine>(
        builder->buildEngineWithConfig(*network, *config),
        [](ICudaEngine* engine) { engine->destroy(); }
    );
    
    if (!engine) {
        std::cerr << "ERROR: Failed to build engine" << std::endl;
        return false;
    }
    std::cout << "Engine created successfully!" << std::endl;

    // 序列化引擎并保存到文件
    auto serializedEngine = std::unique_ptr<IHostMemory>(engine->serialize());
    if (!serializedEngine) {
        std::cerr << "Failed to serialize engine" << std::endl;
        return false;
    }

    std::cout << "Saving TRT engine file to path " << args.engine_file_path << "..." << std::endl;
    std::ofstream engineFile(args.engine_file_path, std::ios::binary);
    if (!engineFile) {
        std::cerr << "Failed to open engine file for writing" << std::endl;
        return false;
    }

    engineFile.write(static_cast<const char*>(serializedEngine->data()), 
                     serializedEngine->size());
    engineFile.close();
    
    std::cout << "Engine file saved to " << args.engine_file_path << std::endl;
    return true;
}

// 示例使用
int main() {
    Args args;
    args.onnx_file_path = "model.onnx";
    args.engine_file_path = "engine.trt";
    args.mode = "int8"; // 或 "fp32", "int8"
    args.batch_size = 1;
    args.dynamic = false;
    
    // 对于 int8 模式，需要提供校准器
     IInt8Calibrator* calib = new Int8EntropyCalibrator2(args.batch_size,1024,1024,"image_dir","calib.data",false);
    
    if (ONNX2TRT(args,calib)) { // 
        std::cout << "Conversion successful!" << std::endl;
    } else {
        std::cerr << "Conversion failed!" << std::endl;
        return 1;
    }
    
    return 0;
}