可以参考其他sherpa的语音唤醒文章。
主要代码:
import android.Manifest;
import android.content.Context;
import android.content.pm.PackageManager;
import android.content.res.AssetManager;
import android.media.AudioFormat;
import android.media.AudioRecord;
import android.media.MediaRecorder;
import android.text.TextUtils;
import android.util.Log;
import com.k2fsa.sherpa.onnx.FeatureConfig;
import com.k2fsa.sherpa.onnx.KeywordSpotter;
import com.k2fsa.sherpa.onnx.KeywordSpotterConfig;
import com.k2fsa.sherpa.onnx.OnlineModelConfig;
import com.k2fsa.sherpa.onnx.OnlineStream;
import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.concurrent.Executors;
public class KwsTools {
private static AudioRecord audioRecord;
private static final int sampleRateInHz = 16000;
private static final int channelConfig = AudioFormat.CHANNEL_IN_MONO;
private static final int audioFormat = AudioFormat.ENCODING_PCM_16BIT;
private static KeywordSpotter spotter;
private static OnlineStream stream;
private static boolean run = true;
private static KeywordSpotterConfig keyConfig = null;
public static void init(Context context,Result result) {
Executors.newSingleThreadExecutor().submit(() -> {
if (context.checkSelfPermission(Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) {
return;
}
if (keyConfig == null)
{
String path = context.getExternalFilesDir(null) + "";
String sherpa = "sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01";
copyAsset(context,sherpa, path);
copyAsset(context,"dexopt", path);
String decoder = path + "/" + sherpa + "/decoder-epoch-12-avg-2-chunk-16-left-64.onnx";
String encoder = path + "/" + sherpa + "/encoder-epoch-12-avg-2-chunk-16-left-64.onnx";
String joiner = path + "/" + sherpa + "/joiner-epoch-12-avg-2-chunk-16-left-64.onnx";
String tokens = path + "/" + sherpa + "/tokens.txt";
String keywords = path + "/" + sherpa + "/keywords.txt";
OnlineTransducerModelConfig ctc = OnlineTransducerModelConfig.builder()
.setDecoder(decoder).setEncoder(encoder).setJoiner(joiner).build();
OnlineModelConfig modelConfig = OnlineModelConfig.builder()
.setTransducer(ctc).setTokens(tokens).setModelType("zipformer2").build();
keyConfig = KeywordSpotterConfig.builder()
.setFeatureConfig(new FeatureConfig.Builder().setSampleRate(sampleRateInHz).setFeatureDim(80).build())
.setOnlineModelConfig(modelConfig)
.setKeywordsFile(keywords).build();
spotter = new KeywordSpotter(keyConfig);
}
stream = spotter.createStream();
try {
int bufferSize = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
audioRecord = new AudioRecord(
MediaRecorder.AudioSource.MIC, // 设置为麦克风录音
sampleRateInHz, // 采样率
channelConfig, // 单声道输入
audioFormat, // 16位PCM编码
bufferSize * 2 // 缓冲区大小
);
audioRecord.startRecording();
} catch (IllegalStateException e)
{
Log.d("kws audio record error:", e.toString());
return;
}
run = true;
int i = (int) (sampleRateInHz * 0.1d);
short[] sArr = new short[i];
while (run) {
Integer valueOf = audioRecord != null ? audioRecord.read(sArr, 0, i) : null;
if (valueOf != null && valueOf > 0) {
float[] fArr = new float[valueOf];
for (int i2 = 0; i2 < valueOf; i2++) {
fArr[i2] = sArr[i2] / 32768.0f;
}
if (stream != null && spotter != null)
{
stream.acceptWaveform(fArr, sampleRateInHz);
while (spotter.isReady(stream)) {
spotter.decode(stream);
String text = spotter.getResult(stream).getKeyword();
if (!TextUtils.isEmpty(text)){
spotter.reset(stream);
result.result(text);
run = false;
break;
}
}
}
}
}
});
}
public static void copyAsset(Context context,String assetPath, String root) {
AssetManager assetManager = context.getAssets(); // 获取 AssetManager
String[] files = null;
try {
// 获取指定目录下的所有文件和目录
files = assetManager.list(assetPath);
} catch (IOException e) {
e.printStackTrace();
}
File file = new File(root + "/" + assetPath);
if (!file.exists()) {
file.mkdirs();
}
if (files != null) {
for (String filename : files) {
String assetFilePath = assetPath + "/" + filename; // 资源文件的完整路径
String destFilePath = root + "/" + assetPath + "/" + filename; // 目标文件的完整路径
try {
// 判断是否为目录
if (assetManager.list(assetFilePath) != null && assetManager.list(assetFilePath).length > 0) {
// 如果是目录,则创建目录并递归复制
boolean res = new File(destFilePath).mkdirs();
copyAsset(context,assetFilePath, root);
} else {
InputStream in = assetManager.open(assetFilePath);
OutputStream out = null;
if (android.os.Build.VERSION.SDK_INT >= android.os.Build.VERSION_CODES.O) {
out = Files.newOutputStream(Paths.get(destFilePath));
} else {
out = new FileOutputStream(destFilePath);
}
if (out == null) continue;
// 复制文件
byte[] buffer = new byte[1024];
int read;
while ((read = in.read(buffer)) != -1) {
out.write(buffer, 0, read);
}
// 关闭流
in.close();
out.flush();
out.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
public interface Result {
void result(String result);
}
public static void Stop(){
run = false;
if (audioRecord != null && audioRecord.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING){
audioRecord.stop();
}
}
public static void destroy() {
run = false;
if (audioRecord != null){
audioRecord.release();
audioRecord = null;
}
if (stream != null){
stream.release();
stream = null;
}
if (spotter != null){
spotter.release();
spotter = null;
}
}
}
语音模型文件如下,实际使用只需以下相关模型文件,keywords可以手动通过tokens找到对应的音标组合,无需用命令行去跑:
我只用了arm64的,里面的so也只需要这两种,另两种是c++相关的。其他版本需要的话添加对应的文件夹和文件
同时build.gradle里的android {}里添加下面这段代码
libs里要添加这个,最新版的java库文件,java几看build.gradle的compileOptions,我用的是java8