对接ali云文本转语音服务

最新推荐文章于 2025-08-22 13:45:39 发布

qq_31683775

最新推荐文章于 2025-08-22 13:45:39 发布

阅读量123

点赞数 2

CC 4.0 BY-SA版权

文章标签： java

本文链接：https://blog.csdn.net/qq_31683775/article/details/149859289

1.引入依赖

<!-- https://mvnrepository.com/artifact/com.alibaba.nls/nls-sdk-tts -->
        <!-- 阿里云 语音合成tts -->
        <dependency>
            <groupId>com.alibaba.nls</groupId>
            <artifactId>nls-sdk-tts</artifactId>
            <version>2.2.18</version>
        </dependency>
        <!-- 语音翻译 -->
        <!-- https://mvnrepository.com/artifact/com.alibaba.nls/nls-sdk-recognizer -->
        <dependency>
            <groupId>com.alibaba.nls</groupId>
            <artifactId>nls-sdk-recognizer</artifactId>
            <version>2.2.18</version>
        </dependency>

2.工具类

package com.ruoyi.ai.utils;

import com.ruoyi.ai.utils.alitts.AliSpeechUtil;
import com.ruoyi.ai.utils.alitts.AliTtsReq;
import com.ruoyi.ai.utils.alitts.AliTtsRes;
import com.ruoyi.common.core.constant.Constants;
import com.ruoyi.common.core.exception.ServiceException;
import com.ruoyi.common.core.utils.StringUtils;
import com.ruoyi.common.core.utils.file.FileUtils;
import com.ruoyi.common.core.utils.hwcloud.obs.ObsUtil;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import org.springframework.web.multipart.MultipartFile;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

@Slf4j
@Component
public class AliTtsUtil {
    /**
     * 文本转语音
     * @param copywriting
     * @param voice
     * @param emotion
     * @param speechRate
     * @param volume
     * @param pitchRate
     * @return
     */
    public Object[] textToSpeech(String copywriting,String voice,String emotion,String speechRate,String volume,String pitchRate){
        // 校验文案是否为空
        if (StringUtils.isBlank(copywriting)) {
            throw new ServiceException("参数 copywriting 不能为空", Constants.FAIL);
        }

        List<String> textList = this.split300(copywriting);

        // 保存阿里云返回的音频文件
        List<AliTtsRes> aliTtsResList = new ArrayList<>();

        // 1、 调用阿里云 语音合成接口 生成语音文件
        for (String text : textList) {
            // TTS
            AliTtsRes aliTtsRes = this.textToSpeechFragmentation(text, voice, emotion,
                    speechRate, volume, pitchRate);

            if (aliTtsRes.getAudioFile() == null || !aliTtsRes.getAudioFile().exists()) {
                // 如果某一个失败了，删除已经生成的文件.
                aliTtsResList.forEach(AliTtsRes::deleteFile);
                throw new ServiceException("语音生成失败", Constants.FAIL);
            }
            aliTtsResList.add(aliTtsRes);
        }

        // 把aliTtsResList中的多个音频文件合成一个
        File audioFile = null;
        try {
            audioFile = FfmpegUtil.mergeAudioFiles(aliTtsResList.stream().map(AliTtsRes::getAudioFile).collect(Collectors.toList()));
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

        if (audioFile == null || !audioFile.exists()) {
            throw new ServiceException("语音生成失败", Constants.FAIL);
        }

        // 2. 获取音频时长
        int audioDuration = FfmpegUtil.getAudioDuration(audioFile);

        return new Object[]{audioFile,audioDuration};
    }


    /**
     * 文本转语音并上传
     **/
    public Object[] textToSpeechUploadObs(String copywriting,String voice,String emotion,String speechRate,String volume,String pitchRate){
        // 校验文案是否为空
        if (StringUtils.isBlank(copywriting)) {
            throw new ServiceException("参数 copywriting 不能为空", Constants.FAIL);
        }

        List<String> textList = this.split300(copywriting);

        // 保存阿里云返回的音频文件
        List<AliTtsRes> aliTtsResList = new ArrayList<>();

        // 1、 调用阿里云 语音合成接口 生成语音文件
        for (String text : textList) {
            // TTS
            AliTtsRes aliTtsRes = this.textToSpeechFragmentation(text, voice, emotion,
                    speechRate, volume, pitchRate);

            if (aliTtsRes.getAudioFile() == null || !aliTtsRes.getAudioFile().exists()) {
                // 如果某一个失败了，删除已经生成的文件.
                aliTtsResList.forEach(AliTtsRes::deleteFile);
                throw new ServiceException("语音生成失败", Constants.FAIL);
            }
            aliTtsResList.add(aliTtsRes);
        }

        // 把aliTtsResList中的多个音频文件合成一个
        File audioFile = null;
        try {
            audioFile = FfmpegUtil.mergeAudioFiles(aliTtsResList.stream().map(AliTtsRes::getAudioFile).collect(Collectors.toList()));
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

        if (audioFile == null || !audioFile.exists()) {
            throw new ServiceException("语音生成失败", Constants.FAIL);
        }

        // 2. 获取音频时长
        int audioDuration = FfmpegUtil.getAudioDuration(audioFile);

        // 3. 将语音文件转换为MultipartFile
        MultipartFile multipartFile = null;
        try {
            multipartFile = FileUtils.convertToMultipartFile(audioFile);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

        // 4. 上传语音文件到OBS
        Map<String, String> uploadResult = null;
        try {
            uploadResult = ObsUtil.upload(multipartFile);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        if (!uploadResult.containsKey("url")) {
            throw new ServiceException("语音上传失败", Constants.FAIL);
        }

        return new Object[]{uploadResult.get("url"),audioDuration};
    }

    /**
     * 调用阿里TTS服务，生成语音
     * @param text      文本
     * @param voice     发音人
     * @param emotion   情感
     * @param speechRate 语速
     * @param volume    音量
     * @param pitchRate 语调
     * @return com.ruoyi.image.utils.alitts.AliTtsRes
     **/
    private AliTtsRes textToSpeechFragmentation(String text, String voice, String emotion, String speechRate, String volume, String pitchRate) {
        //if (speechRate == null) speechRate = "80%"; // 语速
        //if (volume == null) volume = "100%"; // 音量
        //if (volume == null) volume = "+4dB"; // 音量
        //if (pitchRate == null) pitchRate = "0"; // 语调

        // 1. 调用讯飞语音合成接口生成语音文件
        //File audioFile = XfTalkUtil.tts(video.getCopywriting());

        // 1、 调用阿里云 语音合成接口 生成语音文件
        // 构建请求对象
        AliTtsReq aliTtsReq = new AliTtsReq();
        //添加情感 使用SSML处理
        text = "<speak volume=\"75\" voice=\"" +
                voice +
                "\"><emotion category=\"" +
                emotion +
                "\" intensity=\"1\"><s>" +
                "<prosody volume=\"" +
                volume +
                "\" rate=\"" +
                speechRate +
                "\" pitch=\"" +
                pitchRate +
                "\">" +
                text +
                "</prosody>" +
                "</s></emotion></speak>";
        // tts文本入参
        aliTtsReq.setText(text);

        // 阿里
        return AliSpeechUtil.tts(aliTtsReq);
    }

    /**
     * 将一段文本截成多段,每段最大300字,阿里TTS需要
     *
     **/
    private List<String> split300(String text) {
        List<String> textList = new ArrayList<>();
        if (text.length() >= 300) {
            String copywriting = text;

            // 定义句子结束的标点符号
            String[] sentenceEndings = {"。", "！", "？", "；", ".", "!", "?", ";", "，", ",", "...", " "};

            int startIndex = 0;
            while (startIndex < copywriting.length()) {
                // 计算当前段的结束位置（不超过300字）
                int endIndex = Math.min(startIndex + 300, copywriting.length());

                // 如果不是最后一段，需要找到合适的句子结束位置
                if (endIndex < copywriting.length()) {
                    // 从300字位置向前查找最近的句子结束符
                    boolean foundEnding = false;
                    for (int i = endIndex; i > startIndex; i--) {
                        String currentChar = String.valueOf(copywriting.charAt(i - 1));
                        for (String ending : sentenceEndings) {
                            if (ending.equals(currentChar)) {
                                endIndex = i;
                                foundEnding = true;
                                break;
                            }
                        }
                        if (foundEnding) {
                            break;
                        }
                    }

                    // 如果没找到句子结束符，就按300字截取
                    if (!foundEnding) {
                        endIndex = startIndex + 300;
                    }
                }

                // 截取当前段并添加到列表
                String segment = copywriting.substring(startIndex, endIndex);
                textList.add(segment);

                // 更新起始位置
                startIndex = endIndex;
            }

            // 打印分段结果，方便调试
            log.info("文案分段结果：");
            for (int i = 0; i < textList.size(); i++) {
                log.info("第{}段：{}", i + 1, textList.get(i));
            }
        } else {
            textList.add(text);
        }
        return textList;
    }
}

package com.ruoyi.ai.utils.alitts;

/**
 * @author Administrator
 * @since 2025/5/23 15:33
 */

import com.alibaba.nls.client.AccessToken;
import com.alibaba.nls.client.protocol.NlsClient;
import com.alibaba.nls.client.protocol.OutputFormatEnum;
import com.alibaba.nls.client.protocol.SampleRateEnum;
import com.alibaba.nls.client.protocol.tts.SpeechSynthesizer;
import com.alibaba.nls.client.protocol.tts.SpeechSynthesizerListener;
import com.alibaba.nls.client.protocol.tts.SpeechSynthesizerResponse;
import com.ruoyi.common.core.constant.Constants;
import com.ruoyi.common.core.exception.ServiceException;
import com.ruoyi.common.core.utils.uuid.UUID;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;

/**
 * 此示例演示了：
 * 语音合成API调用。
 * 动态获取token。获取Token具体操作，请参见：https://help.aliyun.com/document_detail/450514.html
 * 流式合成TTS。
 * 首包延迟计算。
 */
public class AliSpeechUtil {
    private static final Logger logger = LoggerFactory.getLogger(AliSpeechUtil.class);
    private static long startTime;

    public static final String appKey = "HmW";
    // 临时token
//    private static String accessToken = "ed22";
    public static final String url = "wss://nls-gateway-cn-shanghai.aliyuncs.com/ws/v1";
    public static final String accessKeyId = "LTA";
    public static final String accessKeySecret = "WjIo";

    /*private static NlsClient client;

    static {
        AliSpeechUtil.initNlsClient();
    }

    *//**
     * 初始化
     **//*
    private static void initNlsClient() {
        if (client != null) {
            return;
        }
        AccessToken accessToken = new AccessToken(accessKeyId, accessKeySecret);
        try {
            accessToken.apply();
            System.out.println("get token: " + accessToken.getToken() + ", expire time: " + accessToken.getExpireTime());
            client = new NlsClient(url, accessToken.getToken());
        } catch (IOException e) {
            e.printStackTrace();
        }
    }*/


//    public static void shutdown() {
//        client.shutdown();
//    }

    private static SpeechSynthesizerListener getSynthesizerListener(String fileName) {
        SpeechSynthesizerListener listener = null;
        try {
            listener = new SpeechSynthesizerListener() {
                File f = new File(fileName);
                FileOutputStream fout = new FileOutputStream(f);
                private boolean firstRecvBinary = true;

                //语音合成结束
                @Override
                public void onComplete(SpeechSynthesizerResponse response) {
                    //调用onComplete时表示所有TTS数据已接收完成，因此为整个合成数据的延迟。该延迟可能较大，不一定满足实时场景。
                    System.out.println("name: " + response.getName() +
                            ", status: " + response.getStatus() +
                            ", output file :" + f.getAbsolutePath()
                    );
                }

                //语音合成的语音二进制数据
                @Override
                public void onMessage(ByteBuffer message) {
                    try {
                        if (firstRecvBinary) {
                            //计算首包语音流的延迟，收到第一包语音流时，即可以进行语音播放，以提升响应速度（特别是实时交互场景下）。
                            firstRecvBinary = false;
                            long now = System.currentTimeMillis();
                            logger.info("tts first latency : " + (now - AliSpeechUtil.startTime) + " ms");
                        }
                        byte[] bytesArray = new byte[message.remaining()];
                        message.get(bytesArray, 0, bytesArray.length);
                        fout.write(bytesArray);
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }

                @Override
                public void onFail(SpeechSynthesizerResponse response) {
                    //task_id是调用方和服务端通信的唯一标识，当遇到问题时需要提供task_id以便排查。
                    System.out.println(
                            "task_id: " + response.getTaskId() +
                                    //状态码 20000000 表示识别成功
                                    ", status: " + response.getStatus() +
                                    //错误信息
                                    ", status_text: " + response.getStatusText());
                    if (f.exists()) f.delete(); // 删除临时文件

                    throw new ServiceException("【" + response.getStatus() + "】" + response.getStatusText());
                }
            };
        } catch (Exception e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
        return listener;
    }

    /**
     * 文字转语音
     * 官方案例演示
     **//*
    public static void ttsDemo() {
        String text = "<speak volume=\"70\" pitch=\"100\" rate=\"-100\" voice=\"zhimi_emo\">\n" +
                "  <emotion category=\"sad\" intensity=\"1.1\">\n" +
                "    <s><prosody volume=\"80%\" rate=\"90%\">20多年前，我以为财富自由就是终点，</prosody></s>\n" +
                "  </emotion>\n" +
                "  <break time=\"0.3s\"/>\n" +
                "  <emotion category=\"sad\" intensity=\"0.5\">\n" +
                "    <s><prosody volume=\"85%\" pitch=\"50\" rate=\"90%\">现在我觉得，留给子孙的传家宝，</prosody></s>\n" +
                "  </emotion>\n" +
                "  <emotion category=\"sad\" intensity=\"0.5\">\n" +
                "    <s><prosody volume=\"75%\" rate=\"95%\">不能是锁在柜子里的金银与珠宝，</prosody></s>\n" +
                "  </emotion>\n" +
                "  <break time=\"0.4s\"/>\n" +
                "  <emotion category=\"neutral\" intensity=\"0.8\">\n" +
                "    <s><prosody volume=\"90%\" pitch=\"150\" rate=\"80%\">而是刻在血脉中，</prosody></s>\n" +
                "  </emotion>\n" +
                "  <emotion category=\"neutral\" intensity=\"0.8\">\n" +
                "    <s><prosody volume=\"95%\" pitch=\"200\" rate=\"70%\">向上，向善，向未来，</prosody></s>\n" +
                "  </emotion>\n" +
                "  <emotion category=\"neutral\" intensity=\"0.8\">\n" +
                "    <s><prosody volume=\"100%\" pitch=\"250\" rate=\"80%\">的道德与善念。</prosody></s>\n" +
                "  </emotion>\n" +
                "</speak>";


        String fileName = "E:\\名门望族\\音乐\\tts_test222.mp3";
        String voice = "zhimi_emo";

//        TtsUtil demo = new TtsUtil();
//        demo.process(text, fileName, voice);

        AccessToken accessToken = new AccessToken(accessKeyId, accessKeySecret);
        try {
            accessToken.apply();
            System.out.println("get token: " + accessToken.getToken() + ", expire time: " + accessToken.getExpireTime());
            client = new NlsClient(url, accessToken.getToken());
        } catch (IOException e) {
            e.printStackTrace();
        }

        SpeechSynthesizer synthesizer = null;
        try {
            //创建实例，建立连接。
            synthesizer = new SpeechSynthesizer(client, getSynthesizerListener(fileName));
            synthesizer.setAppKey(appKey);
            //设置返回音频的编码格式
            synthesizer.setFormat(OutputFormatEnum.MP3);
            //设置返回音频的采样率
            synthesizer.setSampleRate(SampleRateEnum.SAMPLE_RATE_16K);
            //发音人
            synthesizer.setVoice(voice);
            // 音量
//            synthesizer.setVolume(80);
            //语调，范围是-500~500，可选，默认是0。
//            synthesizer.setPitchRate(100);
            //语速，范围是-500~500，默认是0。
//            synthesizer.setSpeechRate(-200);
            //设置用于语音合成的文本
//            synthesizer.setText("欢迎使用阿里巴巴智能语音合成服务，您可以说北京明天天气怎么样啊");
            synthesizer.setText(text);
            // 是否开启字幕功能（返回相应文本的时间戳），默认不开启，需要注意并非所有发音人都支持该参数。
//            synthesizer.addCustomedParam("enable_subtitle", false);
            //此方法将以上参数设置序列化为JSON格式发送给服务端，并等待服务端确认。
            long start = System.currentTimeMillis();
            synthesizer.start();
            logger.info("tts start latency " + (System.currentTimeMillis() - start) + " ms");
            AliSpeechUtil.startTime = System.currentTimeMillis();
            //等待语音合成结束
            synthesizer.waitForComplete();
            logger.info("tts stop latency " + (System.currentTimeMillis() - start) + " ms");
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            //关闭连接
            if (null != synthesizer) {
                synthesizer.close();
            }
        }

        shutdown();
    }*/





    /**
     * 文字转语音
     **/
    public static AliTtsRes tts(AliTtsReq ttsReq) {

        if (StringUtils.isBlank(ttsReq.getText())) { throw new ServiceException("文本内容不能为空", Constants.FAIL); }
//        if (StringUtils.isBlank(ttsReq.getVoice())) { throw new ServiceException("发音人不能为空", Constants.FAIL); }

        String filePath = ttsReq.getFilePath();
        if (StringUtils.isBlank(filePath)) {
            String tmpdir = System.getProperty("java.io.tmpdir");
            if (tmpdir != null && !tmpdir.endsWith(File.separator)) tmpdir += File.separator;
            filePath = tmpdir + UUID.randomUUID().toString().replace("-", "") + ".mp3";
        }

        // 语音合成监听类，监听返回结果（观察者模式）。非线程安全
        AliSpeechSynthesizerListener listener = new AliSpeechSynthesizerListener(filePath);

        SpeechSynthesizer synthesizer = null;
        try {
            // 每次都创建一个
            AccessToken accessToken = new AccessToken(accessKeyId, accessKeySecret);
            accessToken.apply();
            NlsClient client = new NlsClient(url, accessToken.getToken());
            //创建实例，建立连接。
            synthesizer = new SpeechSynthesizer(client, listener);
            synthesizer.setAppKey(appKey);
            //设置返回音频的编码格式
            synthesizer.setFormat(OutputFormatEnum.MP3);
            //设置返回音频的采样率
            synthesizer.setSampleRate(SampleRateEnum.SAMPLE_RATE_16K);
            //发音人
            synthesizer.setVoice(ttsReq.getVoice());
            //语调，范围是-500~500，可选，默认是0。  测试结果：多情感角色不生效
//            synthesizer.setPitchRate(ttsReq.getPitchRate());
            //语速，范围是-500~500，默认是0。 测试结果：多情感角色不生效
//            synthesizer.setSpeechRate(ttsReq.getSpeechRate());
            //设置用于语音合成的文本
            synthesizer.setText(ttsReq.getText());
            // 开启字幕
            synthesizer.addCustomedParam("enable_subtitle", true);
            //此方法将以上参数设置序列化为JSON格式发送给服务端，并等待服务端确认。
            synthesizer.start();
            //等待语音合成结束
            synthesizer.waitForComplete();
        } catch (Exception e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        } finally {
            //关闭连接
            if (null != synthesizer) {
                synthesizer.close();
            }
        }

        // 获取语音合成结果
        AliTtsRes ttsRes = listener.getTtsRes();
        if (ttsRes == null || !AliSpeechSynthesizerListener.SUCCESS.equals(ttsRes.getStatus())) {
            // 失败
            logger.error("tts fail。AliTtsRes：{}", ttsRes);

            if (ttsRes != null && "41020001".equals(ttsRes.getStatus())) {
                // 长度超过限制
                throw new ServiceException("请输入300字以内的文本！", Constants.FAIL);
            }
            throw new ServiceException("生成语音失败，请稍后重试", Constants.FAIL);
        }

        return ttsRes;
    }


    public static void main(String[] args) {
        System.out.println(System.getProperty("java.io.tmpdir"));
    }






















}

package com.ruoyi.ai.utils.alitts;

import com.alibaba.nls.client.protocol.tts.SpeechSynthesizerListener;
import com.alibaba.nls.client.protocol.tts.SpeechSynthesizerResponse;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;

/**
 * @author Administrator
 * @since 2025/5/26 10:22
 *
 * 语音合成监听类，监听返回结果（观察者模式）。非线程安全
 * 读取到音频数据后，会写入到指定的文件中
 * 返回示例：
 * {
 *     "header": {
 *         "message_id": "05450bf69c53413f8d88aed1ee60****",
 *         "task_id": "640bc797bb684bd6960185651307****",
 *         "namespace": "SpeechSynthesizer",
 *         "name": "MetaInfo",
 *         "status": 20000000,
 *         "status_message": "GATEWAY|SUCCESS|Success."
 *     },
 *     "payload": {
 *         "subtitles": [
 *             {
 *                 "begin_index": 0,
 *                 "phoneme": "null",
 *                 "end_time": 180,
 *                 "end_index": 1,
 *                 "begin_time": 0,
 *                 "text": "你"
 *             },
 *             {
 *                 "begin_index": 1,
 *                 "phoneme": "null",
 *                 "end_time": 299,
 *                 "end_index": 2,
 *                 "begin_time": 180,
 *                 "text": "好"
 *             }
 *         ]
 *     }
 * }
 */
public class AliSpeechSynthesizerListener extends SpeechSynthesizerListener {

    public static final Logger logger = LoggerFactory.getLogger(AliSpeechSynthesizerListener.class);

    // 成功响应码
    public static final String SUCCESS = "20000000";
    public static final String SUBTITLES = "subtitles";

    private AliTtsRes ttsRes;

    private File file;
    private FileOutputStream fout;

    public AliTtsRes getTtsRes() {
        return this.ttsRes;
    }

    public AliSpeechSynthesizerListener(String filePath) {
        ttsRes = new AliTtsRes();
        file = new File(filePath);
        file.deleteOnExit();

        ttsRes.setAudioFile(file);
        ttsRes.setAudioFilePath(filePath);
        try {
            fout = new FileOutputStream(file);
        } catch (FileNotFoundException e) {
            logger.error(e.getMessage(), e);
            throw new RuntimeException(e);
        }
    }

    @Override
    public void onComplete(SpeechSynthesizerResponse response) {
        //调用onComplete时表示所有TTS数据已接收完成，因此为整个合成数据的延迟。该延迟可能较大，不一定满足实时场景。
        logger.info("所有TTS数据已接收完成");
        logger.info("name: {}, status: {}", response.getName(), response.getStatus());

        ttsRes.setStatus(String.valueOf(response.getStatus()));
        ttsRes.setStatusText(response.getStatusText());
        ttsRes.setTaskId(response.getTaskId());
//        Object object = response.getObject(SUBTITLES);
//        System.out.println(object);
    }

    @Override
    public void onFail(SpeechSynthesizerResponse response) {
        //task_id是调用方和服务端通信的唯一标识，当遇到问题时需要提供task_id以便排查。
        logger.error("task_id: {}, status: {}, status_text: {}", response.getTaskId(), response.getStatus(), response.getStatusText());

        ttsRes.setStatus(String.valueOf(response.getStatus()));
        ttsRes.setStatusText(response.getStatusText());
        ttsRes.setTaskId(response.getTaskId());
    }

    @Override
    public void onMessage(ByteBuffer byteBuffer) {
        //语音合成的语音二进制数据
        byte[] bytesArray = new byte[byteBuffer.remaining()];
        byteBuffer.get(bytesArray, 0, bytesArray.length);
        try {
            fout.write(bytesArray);
        } catch (IOException e) {
            logger.error(e.getMessage(), e);
            throw new RuntimeException(e);
        }
    }

}

package com.ruoyi.ai.utils.alitts;

import lombok.Getter;
import lombok.Setter;
import lombok.ToString;

/**
 * @author Administrator
 * @since 2025/5/26 11:45
 *
 *  aliyun tts request object
 */
@Getter
@Setter
@ToString
public class AliTtsReq {
    // 需要转语音的文本
    String text;
    // 发音人 名字  例：zhimiao_emo
    String voice;
    // 音量 100%
    String volume;
    // 语速 100%
    String speechRate;
    // 语调 100
    String pitchRate;
    // 文件全路径名 路径+名称
    String filePath;
}

package com.ruoyi.ai.utils.alitts;

import lombok.Getter;
import lombok.Setter;
import lombok.ToString;

import java.io.File;

/**
 * @author Administrator
 * @since 2025/5/26 11:47
 *
 * aliyun tts response object
 */
@Getter
@Setter
@ToString
public class AliTtsRes {

    /* 服务状态码 https://help.aliyun.com/zh/isi/developer-reference/overview-of-speech-synthesis?spm=a2c4g.11186623.0.0.cfbc727ctrDNba#section-so5-m5u-kw2 */
    String status;
    String statusText;
    /* task_id是调用方和服务端通信的唯一标识，当遇到问题时需要提供task_id以便排查。 */
    String taskId;
    /* 合成的语音文件 */
    String audioFilePath;
    File audioFile;

    public void deleteFile() {
        if (this.audioFile != null && this.audioFile.exists()) this.audioFile.delete();
    }
}

package com.ruoyi.ai.utils.alitts;

import lombok.Data;

/**
 * @author Administrator
 * @since 2025/6/11 11:18
 * {
 *     "begin_index": 0,
 *     "phoneme": "null",
 *     "end_time": 180,
 *     "end_index": 1,
 *     "begin_time": 0,
 *     "text": "你"
 * }
 */
@Data
public class AliTtsSubtitle {

    private String text;        // ⽂本信息
    private int begin_index;    // 该字在整句中的开始位置，从0开始
    private int begin_time;     // ⽂本对应tts语⾳开始时间戳，单位ms
    private int end_index;      // 该字在整句中的结束位置，从0开始
    private int end_time;       // ⽂本对应tts语⾳结束时间戳，单位ms

}

3.调用示例

Object[] objects = aliTtsUtil.textToSpeech(chat1, split[1], split[2], "80%", "+4dB", "0");
            File tts = (File)objects[0];