sherpa 语音唤醒

最新推荐文章于 2025-06-12 23:12:33 发布

原创最新推荐文章于 2025-06-12 23:12:33 发布 · 323 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#语音唤醒 #sherpa #大模型

可以参考其他sherpa的语音唤醒文章。

主要代码：

import android.Manifest;
import android.content.Context;
import android.content.pm.PackageManager;
import android.content.res.AssetManager;
import android.media.AudioFormat;
import android.media.AudioRecord;
import android.media.MediaRecorder;
import android.text.TextUtils;
import android.util.Log;

import com.k2fsa.sherpa.onnx.FeatureConfig;
import com.k2fsa.sherpa.onnx.KeywordSpotter;
import com.k2fsa.sherpa.onnx.KeywordSpotterConfig;
import com.k2fsa.sherpa.onnx.OnlineModelConfig;
import com.k2fsa.sherpa.onnx.OnlineStream;
import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.concurrent.Executors;

public class KwsTools {
    private static AudioRecord audioRecord;
    private static final int sampleRateInHz = 16000;
    private static final int channelConfig = AudioFormat.CHANNEL_IN_MONO;
    private static final int audioFormat = AudioFormat.ENCODING_PCM_16BIT;
    private static KeywordSpotter spotter;
    private static OnlineStream stream;
    private static boolean run = true;
    private static KeywordSpotterConfig keyConfig = null;
    public static void init(Context context,Result result) {
        Executors.newSingleThreadExecutor().submit(() -> {
            if (context.checkSelfPermission(Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) {
                return;
            }

            if (keyConfig == null)
            {
                String path = context.getExternalFilesDir(null) + "";
                String sherpa = "sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01";
                copyAsset(context,sherpa, path);
                copyAsset(context,"dexopt", path);
                String decoder = path + "/" + sherpa + "/decoder-epoch-12-avg-2-chunk-16-left-64.onnx";
                String encoder = path + "/" + sherpa + "/encoder-epoch-12-avg-2-chunk-16-left-64.onnx";
                String joiner = path + "/" + sherpa + "/joiner-epoch-12-avg-2-chunk-16-left-64.onnx";
                String tokens = path + "/" + sherpa + "/tokens.txt";
                String keywords = path + "/" + sherpa + "/keywords.txt";

                OnlineTransducerModelConfig ctc = OnlineTransducerModelConfig.builder()
                        .setDecoder(decoder).setEncoder(encoder).setJoiner(joiner).build();
                OnlineModelConfig modelConfig = OnlineModelConfig.builder()
                        .setTransducer(ctc).setTokens(tokens).setModelType("zipformer2").build();
                keyConfig = KeywordSpotterConfig.builder()
                        .setFeatureConfig(new FeatureConfig.Builder().setSampleRate(sampleRateInHz).setFeatureDim(80).build())
                        .setOnlineModelConfig(modelConfig)
                        .setKeywordsFile(keywords).build();
                spotter = new KeywordSpotter(keyConfig);
            }

            stream = spotter.createStream();
            try {
                int bufferSize = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
                audioRecord = new AudioRecord(
                        MediaRecorder.AudioSource.MIC, // 设置为麦克风录音
                        sampleRateInHz, // 采样率
                        channelConfig, // 单声道输入
                        audioFormat, // 16位PCM编码
                        bufferSize * 2 // 缓冲区大小
                );
                audioRecord.startRecording();
            } catch (IllegalStateException e)
            {
                Log.d("kws audio record error:", e.toString());
                return;
            }

            run = true;
            int i = (int) (sampleRateInHz * 0.1d);
            short[] sArr = new short[i];
            while (run) {
                Integer valueOf = audioRecord != null ? audioRecord.read(sArr, 0, i) : null;
                if (valueOf != null && valueOf > 0) {
                    float[] fArr = new float[valueOf];
                    for (int i2 = 0; i2 < valueOf; i2++) {
                        fArr[i2] = sArr[i2] / 32768.0f;
                    }
                    if (stream != null && spotter != null)
                    {
                        stream.acceptWaveform(fArr, sampleRateInHz);
                        while (spotter.isReady(stream)) {
                            spotter.decode(stream);
                            String text = spotter.getResult(stream).getKeyword();
                            if (!TextUtils.isEmpty(text)){
                                spotter.reset(stream);
                                result.result(text);
                                run = false;
                                break;
                            }
                        }
                    }
                }
            }
        });
    }

    public static void copyAsset(Context context,String assetPath, String root) {
        AssetManager assetManager = context.getAssets(); // 获取 AssetManager
        String[] files = null;
        try {
            // 获取指定目录下的所有文件和目录
            files = assetManager.list(assetPath);
        } catch (IOException e) {
            e.printStackTrace();
        }
        File file = new File(root + "/" + assetPath);
        if (!file.exists()) {
            file.mkdirs();
        }

        if (files != null) {
            for (String filename : files) {
                String assetFilePath = assetPath + "/" + filename; // 资源文件的完整路径
                String destFilePath = root + "/" + assetPath + "/" + filename; // 目标文件的完整路径
                try {
                    // 判断是否为目录
                    if (assetManager.list(assetFilePath) != null && assetManager.list(assetFilePath).length > 0) {
                        // 如果是目录，则创建目录并递归复制
                        boolean res = new File(destFilePath).mkdirs();
                        copyAsset(context,assetFilePath, root);
                    } else {
                        InputStream in = assetManager.open(assetFilePath);
                        OutputStream out = null;
                        if (android.os.Build.VERSION.SDK_INT >= android.os.Build.VERSION_CODES.O) {
                            out = Files.newOutputStream(Paths.get(destFilePath));
                        } else {
                            out = new FileOutputStream(destFilePath);
                        }
                        if (out == null) continue;

                        // 复制文件
                        byte[] buffer = new byte[1024];
                        int read;
                        while ((read = in.read(buffer)) != -1) {
                            out.write(buffer, 0, read);
                        }
                        // 关闭流
                        in.close();
                        out.flush();
                        out.close();
                    }
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }
    public interface Result {
        void result(String result);
    }
    public static void Stop(){
        run = false;
        if (audioRecord != null && audioRecord.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING){
            audioRecord.stop();
        }
    }

    public static void destroy() {
        run = false;
        if (audioRecord != null){
            audioRecord.release();
            audioRecord = null;
        }
        if (stream != null){
            stream.release();
            stream = null;
        }
        if (spotter != null){
            spotter.release();
            spotter = null;
        }
    }
}

语音模型文件如下，实际使用只需以下相关模型文件，keywords可以手动通过tokens找到对应的音标组合，无需用命令行去跑：