Spring Boot开发者指南:从零构建腾讯云OCR服务(附完整源码)

一、腾讯云OCR简介与应用场景

腾讯云OCR(光学字符识别)服务提供高精度的文字识别能力,支持身份证、银行卡、营业执照、通用印刷体等多种识别场景。相比百度OCR,腾讯云OCR在金融票据识别证件识别方面具有独特优势。

主要应用场景:

  • 金融行业:支票、汇票识别

  • 政务领域:身份证、驾驶证识别

  • 企业办公:合同、发票数字化

  • 教育行业:试卷、文档电子化

二、项目配置与依赖设置

1. 腾讯云账号准备

  1. 访问腾讯云控制台

  2. 创建子账号并分配OCR权限

  3. 获取SecretIdSecretKey

2. Maven依赖 (pom.xml)

<dependencies>
    <!-- Spring Boot基础依赖 -->
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-web</artifactId>
    </dependency>
    
    <!-- 配置属性处理 -->
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-configuration-processor</artifactId>
        <optional>true</optional>
    </dependency>
    
    <!-- 腾讯云Java SDK -->
    <dependency>
        <groupId>com.tencentcloudapi</groupId>
        <artifactId>tencentcloud-sdk-java</artifactId>
        <version>3.1.270</version>
    </dependency>
</dependencies>

3. YAML配置 (application.yml)

tencent:
  ocr:
    secret-id: "your-secret-id"          # 腾讯云SecretId
    secret-key: "your-secret-key"        # 腾讯云SecretKey
    region: "ap-guangzhou"               # 服务区域
    general-ocr-endpoint: "ocr.tencentcloudapi.com"
    id-card-ocr-endpoint: "ocr.tencentcloudapi.com"
    engine-type: "GeneralBasicOCR"       # 默认引擎
    
    # 连接池配置
    max-idle-conns: 100                  # 最大空闲连接数
    keep-alive-seconds: 120              # 保持连接时间(秒)
    
    # 重试策略
    max-retries: 3                       # 最大重试次数
    
    # 图像限制
    max-image-size: 3145728              # 3MB (3 * 1024 * 1024)

# Spring异步配置
spring:
  task:
    execution:
      pool:
        core-size: 10
        max-size: 50
        queue-capacity: 100
        thread-name-prefix: OCR-Async-

# 文件上传限制
spring:
  servlet:
    multipart:
      max-file-size: 3MB
      max-request-size: 10MB

三、配置参数封装

1.配置类 (TencentOcrProperties.java)

@Data
@Builder
@Component
@AllArgsConstructor
@NoArgsConstructor
@ConfigurationProperties(prefix = "tencent.ocr")
public class TencentOcrProperties {
    private String secretId;
    private String secretKey;
    private String region;
    private String generalOcrEndpoint;
    private String idCardOcrEndpoint;
    private String engineType;
    
    // 新增连接池配置
    private int maxIdleConns = 50;
    private int keepAliveSeconds = 60;
    private int maxRetries = 3;
    private int maxImageSize = 3 * 1024 * 1024; // 3MB
}

2.配置类OcrClientConfig (客户端Bean管理) 

import com.tencentcloudapi.common.Credential;
import com.tencentcloudapi.common.profile.ClientProfile;
import com.tencentcloudapi.common.profile.HttpProfile;
import com.tencentcloudapi.ocr.v20181119.OcrClient;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

@Configuration
public class OcrClientConfig {

    @Autowired
    private TencentOcrProperties ocrProperties;

    @Bean(name = "generalOcrClient", destroyMethod = "shutdown")
    public OcrClient generalOcrClient() {
        return createOcrClient(ocrProperties.getGeneralOcrEndpoint());
    }

    @Bean(name = "idCardOcrClient", destroyMethod = "shutdown")
    public OcrClient idCardOcrClient() {
        return createOcrClient(ocrProperties.getIdCardOcrEndpoint());
    }

    private OcrClient createOcrClient(String endpoint) {
        Credential cred = new Credential(
            ocrProperties.getSecretId(), 
            ocrProperties.getSecretKey()
        );
        
        HttpProfile httpProfile = new HttpProfile();
        httpProfile.setEndpoint(endpoint);
        httpProfile.setMaxIdleConns(ocrProperties.getMaxIdleConns());
        httpProfile.setKeepAliveDuration(ocrProperties.getKeepAliveSeconds() * 1000);
        
        ClientProfile clientProfile = new ClientProfile();
        clientProfile.setHttpProfile(httpProfile);
        
        return new OcrClient(cred, ocrProperties.getRegion(), clientProfile);
    }
}

3. 异步配置支持 AsyncConfig

import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.annotation.AsyncConfigurer;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;

import java.util.concurrent.Executor;

@Configuration
@EnableAsync
public class AsyncConfig implements AsyncConfigurer {

    @Override
    public Executor getAsyncExecutor() {
        ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
        executor.setCorePoolSize(10);
        executor.setMaxPoolSize(50);
        executor.setQueueCapacity(100);
        executor.setThreadNamePrefix("OCR-Async-");
        executor.initialize();
        return executor;
    }
}

四、OCR服务实现

1. OCR服务类 (TencentOcrService.java)

import com.tencentcloudapi.common.Credential;
import com.tencentcloudapi.common.exception.TencentCloudSDKException;
import com.tencentcloudapi.common.profile.ClientProfile;
import com.tencentcloudapi.common.profile.HttpProfile;
import com.tencentcloudapi.ocr.v20181119.OcrClient;
import com.tencentcloudapi.ocr.v20181119.models.*;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;

import javax.annotation.PostConstruct;
import javax.annotation.PreDestroy;
import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Base64;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

@Service
public class TencentOcrService {

    private final TencentOcrProperties ocrProperties;
    private OcrClient generalOcrClient;
    private OcrClient idCardOcrClient;
    private final ExecutorService asyncExecutor = Executors.newFixedThreadPool(10);

    @Autowired
    public TencentOcrService(TencentOcrProperties ocrProperties) {
        this.ocrProperties = ocrProperties;
    }

    // 初始化OCR客户端(带连接池)
    @PostConstruct
    public void init() {
        this.generalOcrClient = createOcrClient(ocrProperties.getGeneralOcrEndpoint());
        this.idCardOcrClient = createOcrClient(ocrProperties.getIdCardOcrEndpoint());
    }

    // 销毁资源
    @PreDestroy
    public void shutdown() {
        if (generalOcrClient != null) {
            generalOcrClient.shutdown();
        }
        if (idCardOcrClient != null) {
            idCardOcrClient.shutdown();
        }
        asyncExecutor.shutdown();
    }

    // 创建OCR客户端(带连接池配置)
    private OcrClient createOcrClient(String endpoint) {
        Credential cred = new Credential(ocrProperties.getSecretId(), ocrProperties.getSecretKey());
        
        HttpProfile httpProfile = new HttpProfile();
        httpProfile.setEndpoint(endpoint);
        httpProfile.setMaxIdleConns(ocrProperties.getMaxIdleConns());
        httpProfile.setKeepAliveDuration(ocrProperties.getKeepAliveSeconds() * 1000);
        
        ClientProfile clientProfile = new ClientProfile();
        clientProfile.setHttpProfile(httpProfile);
        
        return new OcrClient(cred, ocrProperties.getRegion(), clientProfile);
    }

    /**
     * 通用文字识别(同步)
     */
    public String recognizeGeneralText(MultipartFile file) throws Exception {
        validateImage(file);
        BufferedImage processedImage = preprocessImage(file);
        String imageBase64 = convertToBase64(processedImage);
        
        GeneralBasicOCRRequest request = new GeneralBasicOCRRequest();
        request.setImageBase64(imageBase64);
        
        return callOcrApiWithRetry(
            request, 
            "GeneralBasicOCR", 
            generalOcrClient, 
            ocrProperties.getMaxRetries()
        );
    }

    /**
     * 通用文字识别(异步)
     */
    @Async
    public CompletableFuture<String> recognizeGeneralTextAsync(MultipartFile file) {
        return CompletableFuture.supplyAsync(() -> {
            try {
                return recognizeGeneralText(file);
            } catch (Exception e) {
                throw new RuntimeException("异步OCR识别失败", e);
            }
        }, asyncExecutor);
    }

    /**
     * 身份证识别
     */
    public String recognizeIdCard(MultipartFile file, boolean isFront) throws Exception {
        validateImage(file);
        BufferedImage processedImage = preprocessImage(file);
        String imageBase64 = convertToBase64(processedImage);
        
        IdCardOCRRequest request = new IdCardOCRRequest();
        request.setImageBase64(imageBase64);
        request.setCardSide(isFront ? "FRONT" : "BACK");
        
        return callOcrApiWithRetry(
            request, 
            "IDCardOCR", 
            idCardOcrClient, 
            ocrProperties.getMaxRetries()
        );
    }

    /**
     * 带重试机制的API调用
     */
    private String callOcrApiWithRetry(AbstractModel request, String engineType, 
                                       OcrClient client, int maxRetries) 
        throws Exception {
        
        int retryCount = 0;
        while (true) {
            try {
                return executeOcrApiCall(request, engineType, client);
            } catch (TencentCloudSDKException e) {
                if (shouldRetry(e, retryCount, maxRetries)) {
                    retryCount++;
                    Thread.sleep(calculateBackoffTime(retryCount));
                } else {
                    throw new Exception("OCR识别失败: " + e.getMessage(), e);
                }
            }
        }
    }

    /**
     * 执行OCR API调用
     */
    private String executeOcrApiCall(AbstractModel request, String engineType, OcrClient client) 
        throws TencentCloudSDKException {
        
        switch (engineType) {
            case "GeneralBasicOCR":
                return GeneralBasicOCRResponse.toJsonString(
                    client.GeneralBasicOCR((GeneralBasicOCRRequest) request));
                    
            case "GeneralAccurateOCR":
                return GeneralAccurateOCRResponse.toJsonString(
                    client.GeneralAccurateOCR((GeneralAccurateOCRRequest) request));
                    
            case "IDCardOCR":
                return IdCardOCRResponse.toJsonString(
                    client.IdCardOCR((IdCardOCRRequest) request));
                    
            default:
                throw new IllegalArgumentException("不支持的OCR引擎类型: " + engineType);
        }
    }

    /**
     * 图像预处理(灰度化)
     */
    private BufferedImage preprocessImage(MultipartFile file) throws IOException {
        BufferedImage originalImage = ImageIO.read(file.getInputStream());
        
        // 转换为灰度图像
        BufferedImage grayImage = new BufferedImage(
            originalImage.getWidth(), 
            originalImage.getHeight(),
            BufferedImage.TYPE_BYTE_GRAY
        );
        
        Graphics2D g2d = grayImage.createGraphics();
        g2d.drawImage(originalImage, 0, 0, null);
        g2d.dispose();
        
        return grayImage;
    }

    /**
     * 将BufferedImage转换为Base64
     */
    private String convertToBase64(BufferedImage image) throws IOException {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        ImageIO.write(image, "jpg", baos);
        return Base64.getEncoder().encodeToString(baos.toByteArray());
    }

    /**
     * 验证图片
     */
    private void validateImage(MultipartFile file) throws IOException {
        if (file == null || file.isEmpty()) {
            throw new IllegalArgumentException("请上传图片文件");
        }
        
        if (file.getSize() > ocrProperties.getMaxImageSize()) {
            throw new IllegalArgumentException(
                "图片大小不能超过" + (ocrProperties.getMaxImageSize() / 1024) + "KB");
        }
        
        // 验证图片格式
        String contentType = file.getContentType();
        if (contentType == null || 
            !(contentType.equals("image/jpeg") || 
             contentType.equals("image/png") || 
             contentType.equals("image/bmp"))) {
            throw new IllegalArgumentException("仅支持JPG/PNG/BMP格式图片");
        }
    }

    /**
     * 判断是否需要重试
     */
    private boolean shouldRetry(TencentCloudSDKException e, int retryCount, int maxRetries) {
        return retryCount < maxRetries && 
               (e.getErrorCode().equals("RequestLimitExceeded") || 
                e.getErrorCode().equals("InternalError"));
    }

    /**
     * 计算退避时间(指数退避)
     */
    private long calculateBackoffTime(int retryCount) {
        return (long) (500 * Math.pow(2, retryCount));
    }
}

五、控制器实现

OCR控制器 (OcrController.java)

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;

import java.util.concurrent.CompletableFuture;

@RestController
@RequestMapping("/api/ocr")
public class OcrController {

    private final TencentOcrService ocrService;

    @Autowired
    public OcrController(TencentOcrService ocrService) {
        this.ocrService = ocrService;
    }

    // 同步通用文字识别
    @PostMapping("/general")
    public ResponseEntity<?> generalOcr(@RequestParam("image") MultipartFile file) {
        try {
            String result = ocrService.recognizeGeneralText(file);
            return ResponseEntity.ok(result);
        } catch (Exception e) {
            return ResponseEntity.badRequest().body(
                new ErrorResponse("OCR_FAILED", e.getMessage()));
        }
    }

    // 异步通用文字识别
    @PostMapping("/general/async")
    public CompletableFuture<ResponseEntity<?>> generalOcrAsync(
            @RequestParam("image") MultipartFile file) {
        return ocrService.recognizeGeneralTextAsync(file)
            .thenApply(ResponseEntity::ok)
            .exceptionally(ex -> ResponseEntity.badRequest().body(
                new ErrorResponse("ASYNC_OCR_FAILED", ex.getCause().getMessage())));
    }

    // 身份证识别(正面)
    @PostMapping("/idcard/front")
    public ResponseEntity<?> idCardFront(@RequestParam("image") MultipartFile file) {
        try {
            String result = ocrService.recognizeIdCard(file, true);
            return ResponseEntity.ok(result);
        } catch (Exception e) {
            return ResponseEntity.badRequest().body(
                new ErrorResponse("IDCARD_OCR_FAILED", e.getMessage()));
        }
    }

    // 身份证识别(反面)
    @PostMapping("/idcard/back")
    public ResponseEntity<?> idCardBack(@RequestParam("image") MultipartFile file) {
        try {
            String result = ocrService.recognizeIdCard(file, false);
            return ResponseEntity.ok(result);
        } catch (Exception e) {
            return ResponseEntity.badRequest().body(
                new ErrorResponse("IDCARD_OCR_FAILED", e.getMessage()));
        }
    }

    // 错误响应封装
    private static class ErrorResponse {
        private String errorCode;
        private String message;

        public ErrorResponse(String errorCode, String message) {
            this.errorCode = errorCode;
            this.message = message;
        }

        // Getters
        public String getErrorCode() { return errorCode; }
        public String getMessage() { return message; }
    }
}

六. 高级功能解析

1. 连接池优化

  • 通过 HttpProfile.setMaxIdleConns() 配置最大空闲连接数

  • 使用 keep-alive 减少TCP连接建立开销

  • 客户端单例模式避免重复创建

2. 智能重试机制

  • 指数退避算法:重试间隔 = 500ms * 2^retryCount

  • 条件重试:仅对限流错误(RequestLimitExceeded)和系统错误(InternalError)重试

  • 最大重试次数:通过配置灵活控制

3. 异步处理流程

4. 图像预处理优化

  • 灰度转换:减少颜色干扰,提高识别准确率

  • 格式统一:统一转换为JPG格式

  • 大小校验:防止过大文件导致API失败

5. 防御性编程

  • 文件类型校验:仅允许JPG/PNG/BMP格式

  • 大小限制:防止DDoS攻击

  • 错误封装:统一错误响应格式

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值