import cv2
import numpy as np
from paddleocr import PaddleOCR
import re
import traceback
from PIL import Image, ImageDraw, ImageFont
# 初始化PaddleOCR
ocr = PaddleOCR(
use_textline_orientation=True,
lang="ch",
# det_algorithm="DB", # 固定使用 DB 检测算法(更稳定)
text_det_thresh=0, # 降低检测阈值,让检测框更贴合文字
text_det_unclip_ratio=0.5, # 缩小文本框扩展比例,避免框过大
text_det_box_thresh=0.5, # 过滤小文本框的阈值
# det_model_dir='D:\DaiMaGongJu\PaddleOCR\models\ch_PP-OCRv4_det_server_infer',
)
def preprocess_image(image):
"""图像预处理以提高识别率"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
gray = clahe.apply(gray)
# gray = cv2.adaptiveThreshold(
# gray,
# 255,
# cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
# cv2.THRESH_BINARY,11,2)
# kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
# gray = cv2.dilate(gray, kernel, iterations=1)
# gray = cv2.erode(gray, kernel, iterations=1)
gray = cv2.GaussianBlur(gray, (3, 3), 0)
return cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
def shrink_box(pts, shrink_ratio=0.03):
"""按比例收缩检测框"""
x_min = np.min(pts[:, 0, 0])
y_min = np.min(pts[:, 0, 1])
x_max = np.max(pts[:, 0, 0])
y_max = np.max(pts[:, 0, 1])
width = x_max - x_min
height = y_max - y_min
x_min += width * shrink_ratio
x_max -= width * shrink_ratio
y_min += height * shrink_ratio
y_max -= height * shrink_ratio
return np.array([[[x_min, y_min]], [[x_max, y_min]],
[[x_max, y_max]], [[x_min, y_max]]], dtype=np.int32)
def draw_text_with_pil(image, text, position, color, font_size=14):
"""使用PIL库绘制中文文本"""
# 转换为PIL图像
pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
draw = ImageDraw.Draw(pil_image)
# 尝试加载中文字体,可根据系统调整字体路径
try:
font = ImageFont.truetype("simhei.ttf", font_size, encoding="utf-8")
except IOError:
# 如果找不到指定字体,使用默认字体
font = ImageFont.load_default()
# 绘制文本
draw.text(position, text, font=font, fill=tuple(reversed(color)))
# 转回OpenCV格式
return cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
def detect_text_with_colored_boxes(image_path, output_path=None):
"""使用PaddleOCR识别文本并绘制彩色边界框"""
image = cv2.imread(image_path)
if image is None:
raise FileNotFoundError(f"无法读取图像: {image_path}")
if len(image.shape) == 2:
image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
try:
processed_image = preprocess_image(image)
result = ocr.predict(processed_image)
color_map = {
'title': (0, 0, 255),
'body': (0, 255, 0),
'footer': (255, 0, 0),
'number': (255, 255, 0),
'default': (0, 255, 255)
}
recognized_text = []
if isinstance(result, list):
if len(result) > 0 and isinstance(result[0], dict):
for item in result:
if 'rec_texts' in item and 'dt_polys' in item and 'rec_scores' in item:
texts = item['rec_texts']
coords_list = item['dt_polys']
scores = item['rec_scores']
for i in range(min(len(texts), len(coords_list), len(scores))):
text = texts[i].strip()
coords = coords_list[i]
confidence = scores[i]
if len(text) > 0 and confidence > 0.3:
pts = np.array(coords, np.int32).reshape((-1, 1, 2))
category = classify_text(text, i)
color = color_map.get(category, color_map['default'])
cv2.polylines(image, [pts], True, color, 2)
# 计算文本位置
x, y = pts[0][0][0], pts[0][0][1]
y = max(y - 15, 15) # 调整位置,确保文本不超出图像
# 使用PIL绘制文本
image = draw_text_with_pil(image, text, (x, y - 15), color)
recognized_text.append({
'text': text,
'category': category,
'confidence': confidence,
'coordinates': coords
})
else:
print(f"无法解析的结果格式: {list(item.keys())[:5]}...")
else:
for i, item in enumerate(result):
if isinstance(item, list) and len(item) >= 2:
coords = item[0]
text_info = item[1]
if isinstance(text_info, (list, tuple)) and len(text_info) >= 2:
text = text_info[0].strip()
confidence = text_info[1]
if len(text) > 0 and confidence > 0.3:
pts = np.array(coords, np.int32).reshape((-1, 1, 2))
category = classify_text(text, i)
color = color_map.get(category, color_map['default'])
cv2.polylines(image, [pts], True, color, 2)
x, y = pts[0][0][0], pts[0][0][1]
y = max(y - 15, 15)
image = draw_text_with_pil(image, text, (x, y - 15), color)
recognized_text.append({
'text': text,
'category': category,
'confidence': confidence,
'coordinates': coords
})
else:
print(f"跳过格式异常的结果项: {item[:50]}...")
else:
print(f"OCR返回非预期格式: {type(result)}")
if output_path:
cv2.imwrite(output_path, image)
return recognized_text, image
except Exception as e:
print(f"OCR处理过程中出错: {str(e)}")
traceback.print_exc()
raise
def classify_text(text, idx):
"""根据文本内容和位置分类"""
if idx < 3 and len(text) > 2:
return 'title'
elif re.match(r'^[\d\.¥¥%,]+$', text):
return 'number'
elif any(keyword in text for keyword in ['合计', '日期', '谢谢', '总计', '欢迎', '下次光临']):
return 'footer'
else:
return 'body'
if __name__ == "__main__":
input_image = 'small.jpg'
output_image = 'document_ocr2.jpg'
try:
print("开始OCR识别...")
results, processed_image = detect_text_with_colored_boxes(input_image, output_image)
print(f"识别完成,共识别出 {len(results)} 个文本区域")
for item in results:
print(f"[{item['category']}] {item['text']} (置信度: {item['confidence']:.2f})")
cv2.imshow('OCR Result', processed_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
except FileNotFoundError as e:
print(f"文件错误: {e}")
except Exception as e:
print(f"处理过程中出错: {e}") 该代码在识别图片中的文字时,识别框与文字不贴合,改进代码
最新发布