1.转换onnx模型,同 之前推理部分介绍
2.创建校准器 Int8EntropyCalibrator2
#ifndef CALIBRATOR_H
#define CALIBRATOR_H
#include <string>
#include <vector>
#include <NvInfer.h>
using namespace nvinfer1;
class Int8EntropyCalibrator2 : public IInt8EntropyCalibrator2
{
public:
Int8EntropyCalibrator2(int batch_size, int input_w, int input_h, const char* img_dir, const char* calib_table_name, bool read_cache=true);
virtual ~Int8EntropyCalibrator2();
int getBatchSize() const noexcept override;
bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override;
const void* readCalibrationCache(size_t& length) noexcept override;
void writeCalibrationCache(const void* cache, size_t length) noexcept override;
private:
int batch_size_;
int input_w_;
int input_h_;
int img_idx_;
std::string img_dir_;
std::vector<std::string> img_files_;
size_t input_count_;
std::string calib_table_name_;
bool read_cache_;
float* batch_data;
void* device_input_;
std::vector<char> calib_cache_;
};
#endif // CALIBRATOR_H
//实现部分
#include <iostream>
#include <fstream>
#include <iterator>
#include <filesystem>
#include <opencv2/opencv.hpp>
#include "calibrator.h"
using namespace nvinfer1;
namespace fs = std::filesystem;
static inline int read_files_in_dir(const char* p_dir_name, std::vector<std::string>& file_names)
{
try {
// 使用 filesystem 目录迭代器
for (const auto& entry : fs::directory_iterator(p_dir_name)) {
// 获取文件名(不含路径)
std::string filename = entry.path().filename().string();
// 跳过 "." 和 ".." 目录
if (filename != "." && filename != "..") {
file_names.push_back(std::move(filename));
}
}
return 0; // 成功
}
catch (const fs::filesystem_error& e) {
// 处理文件系统错误(目录不存在/无权限等)
return -1;
}
catch (...) {
// 处理其他可能的异常
return -1;
}
}
std::vector<float> preprocess(cv::Mat& img, int input_w, int input_h)
{
int elements = 3 * input_h * input_w;
// letterbox and resize
int w, h, x, y;
float r_w = input_w / (img.cols * 1.0);
float r_h = input_h / (img.rows * 1.0);
if (r_h > r_w){
w = input_w;
h = r_w * img.rows;
x = 0;
y = (input_h - h) / 2;
}
else {
w = r_h * img.cols;
h = input_h;
x = (input_w - w) / 2;
y = 0;
}
cv::Mat re(h, w, CV_8UC3);
cv::resize(img, re, re.size(), 0, 0, cv::INTER_LINEAR);
cv::Mat out(input_h, input_w, CV_8UC3, cv::Scalar(128, 128, 128));
re.copyTo(out(cv::Rect(x, y, re.cols, re.rows)));
// HWC to CHW , BGR to RGB, Normalize
std::vector<float> result(elements);
float* norm_data = result.data(); // normalized data
uchar* uc_pixel = out.data;
for (int i = 0; i < input_h * input_w; i++)
{
norm_data[i] = (float)uc_pixel[2] / 255.0;
norm_data[i + input_h * input_w] = (float)uc_pixel[1] / 255.0;
norm_data[i + 2 * input_h * input_w] = (float)uc_pixel[0] / 255.0;
uc_pixel += 3;
}
return result;
}
Int8EntropyCalibrator2::Int8EntropyCalibrator2(int batch_size, int input_w, int input_h, const char* img_dir, const char* calib_table_name, bool read_cache)
: batch_size_(batch_size)
, input_w_(input_w)
, input_h_(input_h)
, img_idx_(0)
, img_dir_(img_dir)
, calib_table_name_(calib_table_name)
, read_cache_(read_cache)
{
input_count_ = 3 * input_w * input_h * batch_size;
// allocate memory for a batch of data, batchData is for CPU, deviceInput is for GPU
batch_data = new float[input_count_];
cudaMalloc(&device_input_, input_count_ * sizeof(float));
read_files_in_dir(img_dir, img_files_);
}
Int8EntropyCalibrator2::~Int8EntropyCalibrator2()
{
cudaFree(device_input_);
if (batch_data) {
delete[] batch_data;
}
}
int Int8EntropyCalibrator2::getBatchSize() const noexcept
{
return batch_size_;
}
bool Int8EntropyCalibrator2::getBatch(void* bindings[], const char* names[], int nbBindings) noexcept
{
if (img_idx_ + batch_size_ > (int)img_files_.size()) { return false; }
float *ptr = batch_data;
for (int i = img_idx_; i < img_idx_ + batch_size_; i++)
{
std::cout << img_files_[i] << " " << i << std::endl;
cv::Mat temp = cv::imread(img_dir_ + "/" + img_files_[i], cv::IMREAD_COLOR);
if (temp.empty()){
std::cerr << "Fatal error: image cannot open!" << std::endl;
return false;
}
std::vector<float> input_data = preprocess(temp, input_w_, input_h_);
memcpy(ptr, input_data.data(), (int)(input_data.size()) * sizeof(float));
ptr += input_data.size();
}
img_idx_ += batch_size_;
cudaMemcpy(device_input_, batch_data, input_count_ * sizeof(float), cudaMemcpyHostToDevice);
bindings[0] = device_input_;
return true;
}
const void* Int8EntropyCalibrator2::readCalibrationCache(size_t& length) noexcept
{
std::cout << "reading calib cache: " << calib_table_name_ << std::endl;
calib_cache_.clear();
std::ifstream input(calib_table_name_, std::ios::binary);
input >> std::noskipws;
if (read_cache_ && input.good())
{
std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(calib_cache_));
}
length = calib_cache_.size();
return length ? calib_cache_.data() : nullptr;
}
void Int8EntropyCalibrator2::writeCalibrationCache(const void* cache, size_t length) noexcept
{
std::cout << "writing calib cache: " << calib_table_name_ << " size: " << length << std::endl;
std::ofstream output(calib_table_name_, std::ios::binary);
output.write(reinterpret_cast<const char*>(cache), length);
}
3.模型转换部分
#include <iostream>
#include <fstream>
#include <NvInfer.h>
#include <NvOnnxParser.h>
#include "Calibrator.h"
using namespace nvinfer1;
using namespace nvonnxparser;
// 定义配置参数结构体
struct Args {
std::string onnx_file_path;
std::string engine_file_path;
std::string mode = "int8"; // fp32, fp16, int8
int batch_size = 1;
int channel = 3;
int height = 640;
int width = 640;
bool dynamic = false;
// 对于 int8 模式,需要添加校准器
};
class Logger : public ILogger {
public:
void log(Severity severity, const char* msg) noexcept override {
if (severity <= Severity::kWARNING) {
std::cout << msg << std::endl;
}
}
};
// 主转换函数
bool ONNX2TRT(const Args& args, IInt8Calibrator* calib = nullptr) {
// 验证模式
if (args.mode != "fp32" && args.mode != "fp16" && args.mode != "int8") {
std::cerr << "Error: mode should be in ['fp32', 'fp16', 'int8']" << std::endl;
return false;
}
Logger logger;
// 创建构建器和网络
auto builder = std::unique_ptr<IBuilder>(createInferBuilder(logger));
if (!builder) {
std::cerr << "Failed to create builder" << std::endl;
return false;
}
const auto explicitBatch = 1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
auto network = std::unique_ptr<INetworkDefinition>(builder->createNetworkV2(explicitBatch));
if (!network) {
std::cerr << "Failed to create network" << std::endl;
return false;
}
// 创建 ONNX 解析器
auto parser = std::unique_ptr<IParser>(createParser(*network, logger));
if (!parser) {
std::cerr << "Failed to create ONNX parser" << std::endl;
return false;
}
// 设置构建器参数
builder->setMaxBatchSize(args.batch_size);
auto config = std::unique_ptr<IBuilderConfig>(builder->createBuilderConfig());
config->setMaxWorkspaceSize(1 << 30); // 1GB
// 处理动态输入
if (args.dynamic) {
auto profile = builder->createOptimizationProfile();
profile->setDimensions(
"images",
OptProfileSelector::kMIN,
Dims4{1, args.channel, args.height, args.width}
);
profile->setDimensions(
"images",
OptProfileSelector::kOPT,
Dims4{2, args.channel, args.height, args.width}
);
profile->setDimensions(
"images",
OptProfileSelector::kMAX,
Dims4{4, args.channel, args.height, args.width}
);
config->addOptimizationProfile(profile);
}
// 设置精度模式
if (args.mode == "int8") {
if (!builder->platformHasFastInt8()) {
std::cerr << "Platform does not support int8 inference" << std::endl;
return false;
}
if (!calib) {
std::cerr << "int8 mode requires a calibrator" << std::endl;
return false;
}
config->setFlag(BuilderFlag::kINT8);
config->setInt8Calibrator(calib);
}
else if (args.mode == "fp16") {
if (!builder->platformHasFastFp16()) {
std::cerr << "Platform does not support fp16 inference" << std::endl;
return false;
}
config->setFlag(BuilderFlag::kFP16);
}
// 解析 ONNX 模型
std::cout << "Loading ONNX file from path " << args.onnx_file_path << "..." << std::endl;
if (!parser->parseFromFile(args.onnx_file_path.c_str(),
static_cast<int>(ILogger::Severity::kWARNING))) {
std::cerr << "Failed to parse ONNX model" << std::endl;
for (int i = 0; i < parser->getNbErrors(); ++i) {
std::cerr << parser->getError(i)->desc() << std::endl;
}
return false;
}
std::cout << "Parsing ONNX file complete!" << std::endl;
// 构建引擎
std::cout << "Building an engine from file " << args.onnx_file_path
<< "; this may take a while..." << std::endl;
auto engine = std::shared_ptr<ICudaEngine>(
builder->buildEngineWithConfig(*network, *config),
[](ICudaEngine* engine) { engine->destroy(); }
);
if (!engine) {
std::cerr << "ERROR: Failed to build engine" << std::endl;
return false;
}
std::cout << "Engine created successfully!" << std::endl;
// 序列化引擎并保存到文件
auto serializedEngine = std::unique_ptr<IHostMemory>(engine->serialize());
if (!serializedEngine) {
std::cerr << "Failed to serialize engine" << std::endl;
return false;
}
std::cout << "Saving TRT engine file to path " << args.engine_file_path << "..." << std::endl;
std::ofstream engineFile(args.engine_file_path, std::ios::binary);
if (!engineFile) {
std::cerr << "Failed to open engine file for writing" << std::endl;
return false;
}
engineFile.write(static_cast<const char*>(serializedEngine->data()),
serializedEngine->size());
engineFile.close();
std::cout << "Engine file saved to " << args.engine_file_path << std::endl;
return true;
}
// 示例使用
int main() {
Args args;
args.onnx_file_path = "model.onnx";
args.engine_file_path = "engine.trt";
args.mode = "int8"; // 或 "fp32", "int8"
args.batch_size = 1;
args.dynamic = false;
// 对于 int8 模式,需要提供校准器
IInt8Calibrator* calib = new Int8EntropyCalibrator2(args.batch_size,1024,1024,"image_dir","calib.data",false);
if (ONNX2TRT(args,calib)) { //
std::cout << "Conversion successful!" << std::endl;
} else {
std::cerr << "Conversion failed!" << std::endl;
return 1;
}
return 0;
}