一、腾讯云OCR简介与应用场景
腾讯云OCR(光学字符识别)服务提供高精度的文字识别能力,支持身份证、银行卡、营业执照、通用印刷体等多种识别场景。相比百度OCR,腾讯云OCR在金融票据识别和证件识别方面具有独特优势。
主要应用场景:
-
金融行业:支票、汇票识别
-
政务领域:身份证、驾驶证识别
-
企业办公:合同、发票数字化
-
教育行业:试卷、文档电子化
二、项目配置与依赖设置
1. 腾讯云账号准备
-
访问腾讯云控制台
-
创建子账号并分配OCR权限
-
获取
SecretId
和SecretKey
2. Maven依赖 (pom.xml
)
<dependencies>
<!-- Spring Boot基础依赖 -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!-- 配置属性处理 -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-configuration-processor</artifactId>
<optional>true</optional>
</dependency>
<!-- 腾讯云Java SDK -->
<dependency>
<groupId>com.tencentcloudapi</groupId>
<artifactId>tencentcloud-sdk-java</artifactId>
<version>3.1.270</version>
</dependency>
</dependencies>
3. YAML配置 (application.yml
)
tencent:
ocr:
secret-id: "your-secret-id" # 腾讯云SecretId
secret-key: "your-secret-key" # 腾讯云SecretKey
region: "ap-guangzhou" # 服务区域
general-ocr-endpoint: "ocr.tencentcloudapi.com"
id-card-ocr-endpoint: "ocr.tencentcloudapi.com"
engine-type: "GeneralBasicOCR" # 默认引擎
# 连接池配置
max-idle-conns: 100 # 最大空闲连接数
keep-alive-seconds: 120 # 保持连接时间(秒)
# 重试策略
max-retries: 3 # 最大重试次数
# 图像限制
max-image-size: 3145728 # 3MB (3 * 1024 * 1024)
# Spring异步配置
spring:
task:
execution:
pool:
core-size: 10
max-size: 50
queue-capacity: 100
thread-name-prefix: OCR-Async-
# 文件上传限制
spring:
servlet:
multipart:
max-file-size: 3MB
max-request-size: 10MB
三、配置参数封装
1.配置类 (TencentOcrProperties.java
)
@Data
@Builder
@Component
@AllArgsConstructor
@NoArgsConstructor
@ConfigurationProperties(prefix = "tencent.ocr")
public class TencentOcrProperties {
private String secretId;
private String secretKey;
private String region;
private String generalOcrEndpoint;
private String idCardOcrEndpoint;
private String engineType;
// 新增连接池配置
private int maxIdleConns = 50;
private int keepAliveSeconds = 60;
private int maxRetries = 3;
private int maxImageSize = 3 * 1024 * 1024; // 3MB
}
2.配置类OcrClientConfig (客户端Bean管理)
import com.tencentcloudapi.common.Credential;
import com.tencentcloudapi.common.profile.ClientProfile;
import com.tencentcloudapi.common.profile.HttpProfile;
import com.tencentcloudapi.ocr.v20181119.OcrClient;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class OcrClientConfig {
@Autowired
private TencentOcrProperties ocrProperties;
@Bean(name = "generalOcrClient", destroyMethod = "shutdown")
public OcrClient generalOcrClient() {
return createOcrClient(ocrProperties.getGeneralOcrEndpoint());
}
@Bean(name = "idCardOcrClient", destroyMethod = "shutdown")
public OcrClient idCardOcrClient() {
return createOcrClient(ocrProperties.getIdCardOcrEndpoint());
}
private OcrClient createOcrClient(String endpoint) {
Credential cred = new Credential(
ocrProperties.getSecretId(),
ocrProperties.getSecretKey()
);
HttpProfile httpProfile = new HttpProfile();
httpProfile.setEndpoint(endpoint);
httpProfile.setMaxIdleConns(ocrProperties.getMaxIdleConns());
httpProfile.setKeepAliveDuration(ocrProperties.getKeepAliveSeconds() * 1000);
ClientProfile clientProfile = new ClientProfile();
clientProfile.setHttpProfile(httpProfile);
return new OcrClient(cred, ocrProperties.getRegion(), clientProfile);
}
}
3. 异步配置支持 AsyncConfig
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.annotation.AsyncConfigurer;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import java.util.concurrent.Executor;
@Configuration
@EnableAsync
public class AsyncConfig implements AsyncConfigurer {
@Override
public Executor getAsyncExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
executor.setCorePoolSize(10);
executor.setMaxPoolSize(50);
executor.setQueueCapacity(100);
executor.setThreadNamePrefix("OCR-Async-");
executor.initialize();
return executor;
}
}
四、OCR服务实现
1. OCR服务类 (TencentOcrService.java
)
import com.tencentcloudapi.common.Credential;
import com.tencentcloudapi.common.exception.TencentCloudSDKException;
import com.tencentcloudapi.common.profile.ClientProfile;
import com.tencentcloudapi.common.profile.HttpProfile;
import com.tencentcloudapi.ocr.v20181119.OcrClient;
import com.tencentcloudapi.ocr.v20181119.models.*;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import javax.annotation.PostConstruct;
import javax.annotation.PreDestroy;
import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Base64;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@Service
public class TencentOcrService {
private final TencentOcrProperties ocrProperties;
private OcrClient generalOcrClient;
private OcrClient idCardOcrClient;
private final ExecutorService asyncExecutor = Executors.newFixedThreadPool(10);
@Autowired
public TencentOcrService(TencentOcrProperties ocrProperties) {
this.ocrProperties = ocrProperties;
}
// 初始化OCR客户端(带连接池)
@PostConstruct
public void init() {
this.generalOcrClient = createOcrClient(ocrProperties.getGeneralOcrEndpoint());
this.idCardOcrClient = createOcrClient(ocrProperties.getIdCardOcrEndpoint());
}
// 销毁资源
@PreDestroy
public void shutdown() {
if (generalOcrClient != null) {
generalOcrClient.shutdown();
}
if (idCardOcrClient != null) {
idCardOcrClient.shutdown();
}
asyncExecutor.shutdown();
}
// 创建OCR客户端(带连接池配置)
private OcrClient createOcrClient(String endpoint) {
Credential cred = new Credential(ocrProperties.getSecretId(), ocrProperties.getSecretKey());
HttpProfile httpProfile = new HttpProfile();
httpProfile.setEndpoint(endpoint);
httpProfile.setMaxIdleConns(ocrProperties.getMaxIdleConns());
httpProfile.setKeepAliveDuration(ocrProperties.getKeepAliveSeconds() * 1000);
ClientProfile clientProfile = new ClientProfile();
clientProfile.setHttpProfile(httpProfile);
return new OcrClient(cred, ocrProperties.getRegion(), clientProfile);
}
/**
* 通用文字识别(同步)
*/
public String recognizeGeneralText(MultipartFile file) throws Exception {
validateImage(file);
BufferedImage processedImage = preprocessImage(file);
String imageBase64 = convertToBase64(processedImage);
GeneralBasicOCRRequest request = new GeneralBasicOCRRequest();
request.setImageBase64(imageBase64);
return callOcrApiWithRetry(
request,
"GeneralBasicOCR",
generalOcrClient,
ocrProperties.getMaxRetries()
);
}
/**
* 通用文字识别(异步)
*/
@Async
public CompletableFuture<String> recognizeGeneralTextAsync(MultipartFile file) {
return CompletableFuture.supplyAsync(() -> {
try {
return recognizeGeneralText(file);
} catch (Exception e) {
throw new RuntimeException("异步OCR识别失败", e);
}
}, asyncExecutor);
}
/**
* 身份证识别
*/
public String recognizeIdCard(MultipartFile file, boolean isFront) throws Exception {
validateImage(file);
BufferedImage processedImage = preprocessImage(file);
String imageBase64 = convertToBase64(processedImage);
IdCardOCRRequest request = new IdCardOCRRequest();
request.setImageBase64(imageBase64);
request.setCardSide(isFront ? "FRONT" : "BACK");
return callOcrApiWithRetry(
request,
"IDCardOCR",
idCardOcrClient,
ocrProperties.getMaxRetries()
);
}
/**
* 带重试机制的API调用
*/
private String callOcrApiWithRetry(AbstractModel request, String engineType,
OcrClient client, int maxRetries)
throws Exception {
int retryCount = 0;
while (true) {
try {
return executeOcrApiCall(request, engineType, client);
} catch (TencentCloudSDKException e) {
if (shouldRetry(e, retryCount, maxRetries)) {
retryCount++;
Thread.sleep(calculateBackoffTime(retryCount));
} else {
throw new Exception("OCR识别失败: " + e.getMessage(), e);
}
}
}
}
/**
* 执行OCR API调用
*/
private String executeOcrApiCall(AbstractModel request, String engineType, OcrClient client)
throws TencentCloudSDKException {
switch (engineType) {
case "GeneralBasicOCR":
return GeneralBasicOCRResponse.toJsonString(
client.GeneralBasicOCR((GeneralBasicOCRRequest) request));
case "GeneralAccurateOCR":
return GeneralAccurateOCRResponse.toJsonString(
client.GeneralAccurateOCR((GeneralAccurateOCRRequest) request));
case "IDCardOCR":
return IdCardOCRResponse.toJsonString(
client.IdCardOCR((IdCardOCRRequest) request));
default:
throw new IllegalArgumentException("不支持的OCR引擎类型: " + engineType);
}
}
/**
* 图像预处理(灰度化)
*/
private BufferedImage preprocessImage(MultipartFile file) throws IOException {
BufferedImage originalImage = ImageIO.read(file.getInputStream());
// 转换为灰度图像
BufferedImage grayImage = new BufferedImage(
originalImage.getWidth(),
originalImage.getHeight(),
BufferedImage.TYPE_BYTE_GRAY
);
Graphics2D g2d = grayImage.createGraphics();
g2d.drawImage(originalImage, 0, 0, null);
g2d.dispose();
return grayImage;
}
/**
* 将BufferedImage转换为Base64
*/
private String convertToBase64(BufferedImage image) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ImageIO.write(image, "jpg", baos);
return Base64.getEncoder().encodeToString(baos.toByteArray());
}
/**
* 验证图片
*/
private void validateImage(MultipartFile file) throws IOException {
if (file == null || file.isEmpty()) {
throw new IllegalArgumentException("请上传图片文件");
}
if (file.getSize() > ocrProperties.getMaxImageSize()) {
throw new IllegalArgumentException(
"图片大小不能超过" + (ocrProperties.getMaxImageSize() / 1024) + "KB");
}
// 验证图片格式
String contentType = file.getContentType();
if (contentType == null ||
!(contentType.equals("image/jpeg") ||
contentType.equals("image/png") ||
contentType.equals("image/bmp"))) {
throw new IllegalArgumentException("仅支持JPG/PNG/BMP格式图片");
}
}
/**
* 判断是否需要重试
*/
private boolean shouldRetry(TencentCloudSDKException e, int retryCount, int maxRetries) {
return retryCount < maxRetries &&
(e.getErrorCode().equals("RequestLimitExceeded") ||
e.getErrorCode().equals("InternalError"));
}
/**
* 计算退避时间(指数退避)
*/
private long calculateBackoffTime(int retryCount) {
return (long) (500 * Math.pow(2, retryCount));
}
}
五、控制器实现
OCR控制器 (OcrController.java
)
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import java.util.concurrent.CompletableFuture;
@RestController
@RequestMapping("/api/ocr")
public class OcrController {
private final TencentOcrService ocrService;
@Autowired
public OcrController(TencentOcrService ocrService) {
this.ocrService = ocrService;
}
// 同步通用文字识别
@PostMapping("/general")
public ResponseEntity<?> generalOcr(@RequestParam("image") MultipartFile file) {
try {
String result = ocrService.recognizeGeneralText(file);
return ResponseEntity.ok(result);
} catch (Exception e) {
return ResponseEntity.badRequest().body(
new ErrorResponse("OCR_FAILED", e.getMessage()));
}
}
// 异步通用文字识别
@PostMapping("/general/async")
public CompletableFuture<ResponseEntity<?>> generalOcrAsync(
@RequestParam("image") MultipartFile file) {
return ocrService.recognizeGeneralTextAsync(file)
.thenApply(ResponseEntity::ok)
.exceptionally(ex -> ResponseEntity.badRequest().body(
new ErrorResponse("ASYNC_OCR_FAILED", ex.getCause().getMessage())));
}
// 身份证识别(正面)
@PostMapping("/idcard/front")
public ResponseEntity<?> idCardFront(@RequestParam("image") MultipartFile file) {
try {
String result = ocrService.recognizeIdCard(file, true);
return ResponseEntity.ok(result);
} catch (Exception e) {
return ResponseEntity.badRequest().body(
new ErrorResponse("IDCARD_OCR_FAILED", e.getMessage()));
}
}
// 身份证识别(反面)
@PostMapping("/idcard/back")
public ResponseEntity<?> idCardBack(@RequestParam("image") MultipartFile file) {
try {
String result = ocrService.recognizeIdCard(file, false);
return ResponseEntity.ok(result);
} catch (Exception e) {
return ResponseEntity.badRequest().body(
new ErrorResponse("IDCARD_OCR_FAILED", e.getMessage()));
}
}
// 错误响应封装
private static class ErrorResponse {
private String errorCode;
private String message;
public ErrorResponse(String errorCode, String message) {
this.errorCode = errorCode;
this.message = message;
}
// Getters
public String getErrorCode() { return errorCode; }
public String getMessage() { return message; }
}
}
六. 高级功能解析
1. 连接池优化
-
通过
HttpProfile.setMaxIdleConns()
配置最大空闲连接数 -
使用
keep-alive
减少TCP连接建立开销 -
客户端单例模式避免重复创建
2. 智能重试机制
-
指数退避算法:重试间隔 = 500ms * 2^retryCount
-
条件重试:仅对限流错误(RequestLimitExceeded)和系统错误(InternalError)重试
-
最大重试次数:通过配置灵活控制
3. 异步处理流程
4. 图像预处理优化
-
灰度转换:减少颜色干扰,提高识别准确率
-
格式统一:统一转换为JPG格式
-
大小校验:防止过大文件导致API失败
5. 防御性编程
-
文件类型校验:仅允许JPG/PNG/BMP格式
-
大小限制:防止DDoS攻击
-
错误封装:统一错误响应格式