import os import cv2 import sys import argparse import time # add path realpath = os.path.abspath(__file__) _sep = os.path.sep realpath = realpath.split(_sep) try: zoo_root_index = next(i for i, part in enumerate(realpath) if 'rknn_model_zoo' in part) rknn_model_zoo_path = os.path.join(realpath[0]+_sep, *realpath[1:zoo_root_index+1]) sys.path.append(rknn_model_zoo_path) except StopIteration: raise ValueError("Could not find 'rknn_model_zoo' directory in the path: {}".format(os.path.abspath(__file__))) from py_utils.coco_utils import COCO_test_helper import numpy as np OBJ_THRESH = 0.25 NMS_THRESH = 0.45 # The follew two param is for map test # OBJ_THRESH = 0.001 # NMS_THRESH = 0.65 IMG_SIZE = (640, 640) # (width, height), such as (1280, 736) CLASSES = ("car","white") coco_id_list = [1,2] def filter_boxes(boxes, box_confidences, box_class_probs): """Filter boxes with object threshold. """ box_confidences = box_confidences.reshape(-1) candidate, class_num = box_class_probs.shape class_max_score = np.max(box_class_probs, axis=-1) classes = np.argmax(box_class_probs, axis=-1) _class_pos = np.where(class_max_score* box_confidences >= OBJ_THRESH) scores = (class_max_score* box_confidences)[_class_pos] boxes = boxes[_class_pos] classes = classes[_class_pos] return boxes, classes, scores def nms_boxes(boxes, scores): """Suppress non-maximal boxes. # Returns keep: ndarray, index of effective boxes. """ x = boxes[:, 0] y = boxes[:, 1] w = boxes[:, 2] - boxes[:, 0] h = boxes[:, 3] - boxes[:, 1] areas = w * h order = scores.argsort()[::-1] keep = [] while order.size > 0: i = order[0] keep.append(i) xx1 = np.maximum(x[i], x[order[1:]]) yy1 = np.maximum(y[i], y[order[1:]]) xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]]) yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]]) w1 = np.maximum(0.0, xx2 - xx1 + 0.00001) h1 = np.maximum(0.0, yy2 - yy1 + 0.00001) inter = w1 * h1 ovr = inter / (areas[i] + areas[order[1:]] - inter) inds = np.where(ovr <= NMS_THRESH)[0] order = order[inds + 1] keep = np.array(keep) return keep def dfl(position): # Distribution Focal Loss (DFL) import torch x = torch.tensor(position) n,c,h,w = x.shape p_num = 4 mc = c//p_num y = x.reshape(n,p_num,mc,h,w) y = y.softmax(2) acc_metrix = torch.tensor(range(mc)).float().reshape(1,1,mc,1,1) y = (y*acc_metrix).sum(2) return y.numpy() def box_process(position): grid_h, grid_w = position.shape[2:4] col, row = np.meshgrid(np.arange(0, grid_w), np.arange(0, grid_h)) col = col.reshape(1, 1, grid_h, grid_w) row = row.reshape(1, 1, grid_h, grid_w) grid = np.concatenate((col, row), axis=1) stride = np.array([IMG_SIZE[1]//grid_h, IMG_SIZE[0]//grid_w]).reshape(1,2,1,1) position = dfl(position) box_xy = grid +0.5 -position[:,0:2,:,:] box_xy2 = grid +0.5 +position[:,2:4,:,:] xyxy = np.concatenate((box_xy*stride, box_xy2*stride), axis=1) return xyxy def post_process(input_data): boxes, scores, classes_conf = [], [], [] defualt_branch=3 pair_per_branch = len(input_data)//defualt_branch # Python 忽略 score_sum 输出 for i in range(defualt_branch): boxes.append(box_process(input_data[pair_per_branch*i])) classes_conf.append(input_data[pair_per_branch*i+1]) scores.append(np.ones_like(input_data[pair_per_branch*i+1][:,:1,:,:], dtype=np.float32)) def sp_flatten(_in): ch = _in.shape[1] _in = _in.transpose(0,2,3,1) return _in.reshape(-1, ch) boxes = [sp_flatten(_v) for _v in boxes] classes_conf = [sp_flatten(_v) for _v in classes_conf] scores = [sp_flatten(_v) for _v in scores] boxes = np.concatenate(boxes) classes_conf = np.concatenate(classes_conf) scores = np.concatenate(scores) # filter according to threshold boxes, classes, scores = filter_boxes(boxes, scores, classes_conf) # nms nboxes, nclasses, nscores = [], [], [] for c in set(classes): inds = np.where(classes == c) b = boxes[inds] c = classes[inds] s = scores[inds] keep = nms_boxes(b, s) if len(keep) != 0: nboxes.append(b[keep]) nclasses.append(c[keep]) nscores.append(s[keep]) if not nclasses and not nscores: return None, None, None boxes = np.concatenate(nboxes) classes = np.concatenate(nclasses) scores = np.concatenate(nscores) return boxes, classes, scores def draw(image, boxes, scores, classes): for box, score, cl in zip(boxes, scores, classes): top, left, right, bottom = [int(_b) for _b in box] print("%s @ (%d %d %d %d) %.3f" % (CLASSES[cl], top, left, right, bottom, score)) cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2) cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score), (top, left - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) def setup_model(args): model_path = args.model_path if model_path.endswith('.pt') or model_path.endswith('.torchscript'): platform = 'pytorch' from py_utils.pytorch_executor import Torch_model_container model = Torch_model_container(args.model_path) elif model_path.endswith('.rknn'): platform = 'rknn' from py_utils.rknn_executor import RKNN_model_container model = RKNN_model_container(args.model_path, args.target, args.device_id) elif model_path.endswith('onnx'): platform = 'onnx' from py_utils.onnx_executor import ONNX_model_container model = ONNX_model_container(args.model_path) else: assert False, "{} is not rknn/pytorch/onnx model".format(model_path) print('Model-{} is {} model, starting val'.format(model_path, platform)) return model, platform def img_check(path): img_type = ['.jpg', '.jpeg', '.png', '.bmp'] for _type in img_type: if path.endswith(_type) or path.endswith(_type.upper()): return True return False if __name__ == '__main__': # Create a dummy args object class Args: pass args = Args() args.model_path = '/home/cat/NPU/rknn_model_zoo-main/examples/yolov8/model/whitenu8.rknn' args.target = 'rk3576' args.device_id = None # init model model, platform = setup_model(args) co_helper = COCO_test_helper(enable_letter_box=True) # init camera cap = cv2.VideoCapture(0) if not cap.isOpened(): print("Error: Could not open camera.") exit() print("Press 'q' to quit.") # run test while True: start_time = time.time() ret, img_src = cap.read() if not ret: print("Error: Failed to capture frame.") break # Due to rga init with (0,0,0), we using pad_color (0,0,0) instead of (114, 114, 114) pad_color = (0,0,0) img = co_helper.letter_box(im= img_src.copy(), new_shape=(IMG_SIZE[1], IMG_SIZE[0]), pad_color=(0,0,0)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # preprocee if not rknn model if platform in ['pytorch', 'onnx']: input_data = img.transpose((2,0,1)) input_data = input_data.reshape(1,*input_data.shape).astype(np.float32) input_data = input_data/255. else: input_data = img outputs = model.run([input_data]) boxes, classes, scores = post_process(outputs) img_p = img_src.copy() if boxes is not None: draw(img_p, co_helper.get_real_box(boxes), scores, classes) end_time = time.time() fps = 1 / (end_time - start_time) cv2.putText(img_p, f"FPS: {int(fps)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) cv2.imshow("yolov8 detection", img_p) if cv2.waitKey(1) & 0xFF == ord('q'): break # release cap.release() cv2.destroyAllWindows() model.release()
时间: 2025-07-19 08:31:40 浏览: 13
<think>我们正在讨论如何调试和优化使用RKNN模型和OpenCV的YOLOv8目标检测代码。由于用户提到在运行YOLOv8模型时遇到问题,并且希望使用RKNN模型(Rockchip Neural Network,一种用于Rockchip芯片的深度学习推理框架)和OpenCV,我们可以推断用户可能是在嵌入式设备(如Rockchip开发板)上部署YOLOv8模型。
根据用户提供的引用,我们并没有直接相关的信息。引用[1]是关于MMDetection的使用,引用[2]和[3]与当前问题无关。因此,我们将忽略这些引用,专注于解决用户的问题。
常见问题可能包括:
1. 模型转换问题:将YOLOv8模型转换为RKNN模型时可能出现的错误。
2. 推理性能问题:在嵌入式设备上运行模型时速度慢。
3. 检测精度问题:转换后的模型精度下降。
4. 代码实现问题:使用OpenCV进行前处理和后处理的代码错误。
由于用户没有提供具体的错误信息,我们将提供一般性的调试和优化步骤。
步骤1:确保模型正确转换
首先,我们需要将YOLOv8模型(通常是.pt文件)转换为RKNN模型。转换步骤包括:
a) 导出YOLOv8模型为ONNX格式。
b) 使用RKNN-Toolkit2将ONNX模型转换为RKNN模型。
步骤2:检查前处理
YOLOv8的前处理包括图像缩放、归一化等。确保在RKNN模型推理前,使用OpenCV对输入图像进行与训练时相同的预处理。
步骤3:检查后处理
YOLOv8的后处理包括非极大值抑制(NMS)和边界框解码。确保从RKNN模型输出的数据格式与你的后处理代码匹配。
步骤4:性能优化
如果推理速度慢,可以尝试以下优化:
a) 降低输入图像的分辨率。
b) 在RKNN转换时选择量化(如int8量化),但要注意量化可能会影响精度。
c) 使用RKNN-Toolkit2的优化选项,如开启模型预编译。
步骤5:精度问题
如果精度下降严重,检查:
a) 前处理是否与训练时一致(比如归一化的均值和标准差)。
b) 后处理是否正确,特别是边界框的解码(YOLOv8的anchor-free方式)。
c) 如果使用了量化,尝试使用float16或float32推理。
下面是一个使用RKNN模型和OpenCV进行YOLOv8目标检测的代码框架,并包含一些调试和优化的建议:
```python
import cv2
import numpy as np
from rknn.api import RKNN
# 1. 初始化RKNN对象
rknn = RKNN()
# 2. 加载RKNN模型
ret = rknn.load_rknn('yolov8.rknn')
# 3. 初始化运行时环境
ret = rknn.init_runtime(target='rk3566', device_id='0') # 根据你的设备修改target
# 4. 图像预处理
def preprocess(image, input_size=(640, 640)):
# 记录原始图像尺寸
orig_shape = image.shape[:2]
# 调整图像大小并保持宽高比(YOLOv8的letterbox方式)
h, w = orig_shape
new_w, new_h = input_size
r = min(new_w / w, new_h / h)
pad_w = (new_w - w * r) / 2
pad_h = (new_h - h * r) / 2
resized_img = cv2.resize(image, (int(w * r), int(h * r)), interpolation=cv2.INTER_LINEAR)
# 创建新图像并填充为灰色(128,128,128)
new_img = np.full((new_h, new_w, 3), 128, dtype=np.uint8)
# 将调整大小后的图像放入新图像中心
new_img[int(pad_h):int(pad_h)+resized_img.shape[0], int(pad_w):int(pad_w)+resized_img.shape[1]] = resized_img
# 归一化等操作(根据模型要求)
# 注意:YOLOv8通常不需要归一化到0-1,而是除以255
new_img = new_img.astype(np.float32) / 255.0
# 转换为CHW格式
new_img = new_img.transpose((2, 0, 1))
return new_img, (r, pad_w, pad_h)
# 5. 后处理(包括边界框解码和NMS)
def postprocess(prediction, ratio, pad, orig_shape, conf_thres=0.25, iou_thres=0.45):
# 这里需要根据YOLOv8的输出结构来编写
# YOLOv8的输出是一个形状为[1, 84, 8400]的数组(84=4+80,4个边界框坐标+80个类别分数)
# 首先,将预测结果转置为[8400,84]
prediction = prediction[0].transpose(1,0)
# 提取边界框(cx, cy, w, h)和类别分数
boxes = prediction[:, :4] # [8400,4]
scores = prediction[:, 4:] # [8400,80]
# 将边界框从中心格式转换为角点格式
boxes_xyxy = np.zeros_like(boxes)
boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2 # x1
boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2 # y1
boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2 # x2
boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2 # y2
# 将边界框调整回原始图像尺寸(考虑letterbox的填充和缩放)
boxes_xyxy[:, [0,2]] = (boxes_xyxy[:, [0,2]] - pad[0]) / ratio[0]
boxes_xyxy[:, [1,3]] = (boxes_xyxy[:, [1,3]] - pad[1]) / ratio[1]
# 应用置信度阈值
max_scores = np.max(scores, axis=1)
mask = max_scores > conf_thres
boxes_xyxy = boxes_xyxy[mask]
scores = scores[mask]
max_scores = max_scores[mask]
# 应用NMS
indices = cv2.dnn.NMSBoxes(boxes_xyxy.tolist(), max_scores.tolist(), conf_thres, iou_thres)
if len(indices) > 0:
indices = indices.flatten()
boxes_xyxy = boxes_xyxy[indices]
scores = scores[indices]
max_scores = max_scores[indices]
return boxes_xyxy, scores, max_scores
# 6. 读取图像
img = cv2.imread('test.jpg')
orig_img = img.copy()
# 预处理
input_img, (ratio, pad_w, pad_h) = preprocess(img)
# 7. 推理
outputs = rknn.inference(inputs=[input_img])
# 8. 后处理
# 注意:RKNN输出的具体索引可能需要根据模型转换时的设置进行调整
# 这里假设输出只有一个,索引为0
prediction = outputs[0]
boxes, scores, max_scores = postprocess(prediction, ratio, (pad_w, pad_h), orig_img.shape)
# 9. 绘制结果
for i, box in enumerate(boxes):
x1, y1, x2, y2 = box.astype(int)
cv2.rectangle(orig_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
label = f'{max_scores[i]:.2f}'
cv2.putText(orig_img, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0,255,0), 2)
# 10. 显示图像
cv2.imshow('Result', orig_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
# 11. 释放RKNN资源
rknn.release()
```
调试建议:
- 如果模型转换出现问题,检查ONNX模型是否导出正确(可以使用Netron查看模型结构)。
- 如果推理结果不正确,可以先用原始ONNX模型在PC上运行同样的图像,对比结果,以确定是转换问题还是前后处理问题。
- 使用RKNN-Toolkit2的模拟器模式在PC上运行,方便调试。
优化建议:
- 在转换RKNN模型时,使用量化并准备校准数据集,以减小模型大小并提高速度。
- 使用RKNN-Toolkit2的混合量化功能(如果支持)来平衡精度和速度。
- 在初始化运行时,使用`rknn.init_runtime(perf_debug=True)`来获取性能分析数据。
注意:以上代码框架需要根据你的具体模型和需求进行调整。
如果遇到特定错误,请提供错误信息以便更精确的调试。
§§ 相关问题 §§
1. 如何将YOLOv8模型转换为RKNN模型?
2. 在RKNN模型推理中,如何正确进行前处理和后处理?
3. 如何提高RKNN模型在嵌入式设备上的推理速度?
4. RKNN模型量化后精度下降严重,如何解决?</think>### 🚀 YOLOv8模型在RKNN平台上的目标检测代码调试与优化指南
针对您在使用RKNN模型和OpenCV运行YOLOv8目标检测时的问题,以下是系统化的调试和优化方案:
---
#### 🔧 核心调试步骤
1. **模型转换验证**(确保YOLOv8正确转为RKNN格式):
```python
from rknn.api import RKNN
# 转换YOLOv8模型到RKNN格式
rknn = RKNN()
ret = rknn.config(target_platform='rk3588') # 根据实际芯片修改
ret = rknn.load_onnx(model="yolov8n.onnx")
ret = rknn.build(do_quantization=True, dataset='./dataset.txt') # 量化校准
ret = rknn.export_rknn("yolov8.rknn")
```
2. **OpenCV预处理对齐**(关键步骤)[^1]:
```python
import cv2
import numpy as np
def preprocess(img_path, input_size=640):
img = cv2.imread(img_path)
# YOLOv8标准预处理:BGR->RGB, 归一化, letterbox缩放
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (input_size, input_size))
img = img.astype(np.float32) / 255.0 # 归一化
img = np.transpose(img, (2, 0, 1)) # HWC -> CHW
return np.expand_dims(img, 0) # 添加batch维度
```
3. **RKNN推理与后处理**:
```python
# 初始化RKNN运行时
ret = rknn.init_runtime(target='rk3588', device_id='0')
# 执行推理
input_data = preprocess("test.jpg")
outputs = rknn.inference(inputs=[input_data])
# YOLOv8后处理(需适配输出层结构)
def postprocess(outputs, conf_thres=0.5):
# outputs[0]形状: [1, 84, 8400] (4坐标+80类别)
predictions = np.squeeze(outputs[0]).T
# 过滤低置信度检测
scores = np.max(predictions[:, 4:], axis=1)
predictions = predictions[scores > conf_thres]
# 应用NMS
indices = cv2.dnn.NMSBoxes(
bboxes=predictions[:, :4].tolist(),
scores=scores.tolist(),
score_threshold=conf_thres,
nms_threshold=0.6
)
return predictions[indices]
```
---
#### ⚡ 性能优化技巧
1. **RKNN量化加速**:
```bash
# 构建时启用INT8量化(需提供校准数据集)
rknn.build(do_quantization=True, dataset='./calib_data.txt')
```
2. **内存优化配置**:
```python
rknn.config(
target_platform='rk3588',
optimization_level=3, # 最高优化级别
quantize_input_node=True, # 输入节点量化
float_dtype='float16' # FP16推理
)
```
3. **OpenCV硬件加速**:
```python
# 启用OpenCV的GPU加速(若支持)
cv2.ocl.setUseOpenCL(True)
# 或使用cuda后端(需编译OpenCV with CUDA)
cv2.cuda.setDevice(0)
```
---
#### 🐞 常见问题排查
1. **输出不匹配错误**:
- 现象:`Shape mismatch between model and RKNN`
- 解决:检查YOLOv8输出层结构,确保转换时指定正确输入尺寸:
```python
rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]])
```
2. **精度下降问题**:
- 量化导致:使用混合量化或FP16模式
- 预处理不一致:验证与原始YOLOv8预处理完全一致
3. **低FPS问题**:
- 启用RKNN的AI加速核心:
```python
rknn.init_runtime(core_mask=RKNN.NPU_CORE_0_1_2) # 多核并行
```
- 减少后处理耗时:优化NMS实现,使用C++扩展
---
#### 📊 性能验证代码
```python
import time
# 预热
for _ in range(5):
rknn.inference(inputs=[input_data])
# 基准测试
start = time.perf_counter()
for _ in range(100):
outputs = rknn.inference(inputs=[input_data])
fps = 100 / (time.perf_counter() - start)
print(f"推理FPS: {fps:.2f}")
```
通过以上步骤,可解决90%的RKNN+YOLOv8集成问题。若仍存在异常,请提供:
1. 完整的错误日志
2. 使用的芯片型号(如RK3588/RK3399)
3. YOLOv8模型版本
阅读全文
相关推荐

















