paddleOCR中示例draw_ocr出错问题解决

from PIL import Image

image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
im_show = draw_ocr(image, boxes, txts, scores)
im_show = Image.fromarray(im_show)
im_show.save('result.jpg')

上面代码是原始示例代码,运行新版本的paddleocr会报错:

Traceback (most recent call last):
    im_show = draw_ocr(image, boxes, txts, scores)
  File "\lib\site-packages\paddleocr\tools\infer\utility.py", line 382, in draw_ocr
    if scores is not None and (scores[i] < drop_score or
TypeError: '<' not supported between instances of 'tuple' and 'float'

修改如下:

 

from PIL import Image
image = Image.open(img_path).convert('RGB')
boxes = [detection[0] for line in result for detection in line] # Nested loop added
txts = [detection[1][0] for line in result for detection in line] # Nested loop added
scores = [detection[1][1] for line in result for detection in line] # Nested loop added
im_show = draw_ocr(image, boxes, txts, scores)
im_show = Image.fromarray(im_show)
im_show.save('test.jpg')

ERROR [ 5%] test setup failed name = '__init__', package = None def import_module(name, package=None): """Import a module. The 'package' argument is required when performing a relative import. It specifies the package to use as the anchor point from which to resolve the relative import to an absolute import. """ level = 0 if name.startswith('.'): if not package: msg = ("the 'package' argument is required to perform a relative " "import for {!r}") raise TypeError(msg.format(name)) for character in name: if character != '.': break level += 1 > return _bootstrap._gcd_import(name[level:], package, level) C:\Users\ADMIN\Anaconda3\envs\paddle_env\lib\importlib\__init__.py:127: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ > from .paddleocr import ( PaddleOCR, PPStructure, draw_ocr, draw_structure_result, save_structure_res, download_with_progressbar, sorted_layout_boxes, convert_info_docx, to_excel, ) E ImportError: attempted relative import with no known parent package ..\__init__.py:14: ImportError The above exception was the direct cause of the following exception: cls = <class '_pytest.runner.CallInfo'> func = <function call_and_report.<locals>.<lambda> at 0x000001800A722430> when = 'setup' reraise = (<class '_pytest.outcomes.Exit'>, <class 'KeyboardInterrupt'>) @classmethod def from_call( cls, func: Callable[[], TResult], when: Literal["collect", "setup", "call", "teardown"], reraise: type[BaseException] | tuple[type[BaseException], ...] | None = None, ) -> CallInfo[TResult]: """Call func, wrapping the result in a CallInfo. :param func: The function to call. Called wi
03-27
import cv2 import numpy as np from paddleocr import PaddleOCR import re import traceback from PIL import Image, ImageDraw, ImageFont # 初始化PaddleOCR ocr = PaddleOCR( use_textline_orientation=True, lang="ch", # det_algorithm="DB", # 固定使用 DB 检测算法(更稳定) text_det_thresh=0, # 降低检测阈值,让检测框更贴合文字 text_det_unclip_ratio=0.5, # 缩小文本框扩展比例,避免框过大 text_det_box_thresh=0.5, # 过滤小文本框的阈值 # det_model_dir='D:\DaiMaGongJu\PaddleOCR\models\ch_PP-OCRv4_det_server_infer', ) def preprocess_image(image): """图像预处理以提高识别率""" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8)) gray = clahe.apply(gray) # gray = cv2.adaptiveThreshold( # gray, # 255, # cv2.ADAPTIVE_THRESH_GAUSSIAN_C, # cv2.THRESH_BINARY,11,2) # kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)) # gray = cv2.dilate(gray, kernel, iterations=1) # gray = cv2.erode(gray, kernel, iterations=1) gray = cv2.GaussianBlur(gray, (3, 3), 0) return cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR) def shrink_box(pts, shrink_ratio=0.03): """按比例收缩检测框""" x_min = np.min(pts[:, 0, 0]) y_min = np.min(pts[:, 0, 1]) x_max = np.max(pts[:, 0, 0]) y_max = np.max(pts[:, 0, 1]) width = x_max - x_min height = y_max - y_min x_min += width * shrink_ratio x_max -= width * shrink_ratio y_min += height * shrink_ratio y_max -= height * shrink_ratio return np.array([[[x_min, y_min]], [[x_max, y_min]], [[x_max, y_max]], [[x_min, y_max]]], dtype=np.int32) def draw_text_with_pil(image, text, position, color, font_size=14): """使用PIL库绘制中文文本""" # 转换为PIL图像 pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) draw = ImageDraw.Draw(pil_image) # 尝试加载中文字体,可根据系统调整字体路径 try: font = ImageFont.truetype("simhei.ttf", font_size, encoding="utf-8") except IOError: # 如果找不到指定字体,使用默认字体 font = ImageFont.load_default() # 绘制文本 draw.text(position, text, font=font, fill=tuple(reversed(color))) # 转回OpenCV格式 return cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR) def detect_text_with_colored_boxes(image_path, output_path=None): """使用PaddleOCR识别文本并绘制彩色边界框""" image = cv2.imread(image_path) if image is None: raise FileNotFoundError(f"无法读取图像: {image_path}") if len(image.shape) == 2: image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) try: processed_image = preprocess_image(image) result = ocr.predict(processed_image) color_map = { 'title': (0, 0, 255), 'body': (0, 255, 0), 'footer': (255, 0, 0), 'number': (255, 255, 0), 'default': (0, 255, 255) } recognized_text = [] if isinstance(result, list): if len(result) > 0 and isinstance(result[0], dict): for item in result: if 'rec_texts' in item and 'dt_polys' in item and 'rec_scores' in item: texts = item['rec_texts'] coords_list = item['dt_polys'] scores = item['rec_scores'] for i in range(min(len(texts), len(coords_list), len(scores))): text = texts[i].strip() coords = coords_list[i] confidence = scores[i] if len(text) > 0 and confidence > 0.3: pts = np.array(coords, np.int32).reshape((-1, 1, 2)) category = classify_text(text, i) color = color_map.get(category, color_map['default']) cv2.polylines(image, [pts], True, color, 2) # 计算文本位置 x, y = pts[0][0][0], pts[0][0][1] y = max(y - 15, 15) # 调整位置,确保文本不超出图像 # 使用PIL绘制文本 image = draw_text_with_pil(image, text, (x, y - 15), color) recognized_text.append({ 'text': text, 'category': category, 'confidence': confidence, 'coordinates': coords }) else: print(f"无法解析的结果格式: {list(item.keys())[:5]}...") else: for i, item in enumerate(result): if isinstance(item, list) and len(item) >= 2: coords = item[0] text_info = item[1] if isinstance(text_info, (list, tuple)) and len(text_info) >= 2: text = text_info[0].strip() confidence = text_info[1] if len(text) > 0 and confidence > 0.3: pts = np.array(coords, np.int32).reshape((-1, 1, 2)) category = classify_text(text, i) color = color_map.get(category, color_map['default']) cv2.polylines(image, [pts], True, color, 2) x, y = pts[0][0][0], pts[0][0][1] y = max(y - 15, 15) image = draw_text_with_pil(image, text, (x, y - 15), color) recognized_text.append({ 'text': text, 'category': category, 'confidence': confidence, 'coordinates': coords }) else: print(f"跳过格式异常的结果项: {item[:50]}...") else: print(f"OCR返回非预期格式: {type(result)}") if output_path: cv2.imwrite(output_path, image) return recognized_text, image except Exception as e: print(f"OCR处理过程中出错: {str(e)}") traceback.print_exc() raise def classify_text(text, idx): """根据文本内容和位置分类""" if idx < 3 and len(text) > 2: return 'title' elif re.match(r'^[\d\.¥¥%,]+$', text): return 'number' elif any(keyword in text for keyword in ['合计', '日期', '谢谢', '总计', '欢迎', '下次光临']): return 'footer' else: return 'body' if __name__ == "__main__": input_image = 'small.jpg' output_image = 'document_ocr2.jpg' try: print("开始OCR识别...") results, processed_image = detect_text_with_colored_boxes(input_image, output_image) print(f"识别完成,共识别出 {len(results)} 个文本区域") for item in results: print(f"[{item['category']}] {item['text']} (置信度: {item['confidence']:.2f})") cv2.imshow('OCR Result', processed_image) cv2.waitKey(0) cv2.destroyAllWindows() except FileNotFoundError as e: print(f"文件错误: {e}") except Exception as e: print(f"处理过程中出错: {e}") 该代码在识别图片中的文字时,识别框与文字不贴合,改进代码
最新发布
07-19
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值