commit ebc9c69f0a9e49116a9044c2c0fea029b8205d27 Author: Harden <1915702192@qq.com> Date: Thu Jul 25 17:34:40 2024 +0800 “首次提交” diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5ff6309 --- /dev/null +++ b/.gitignore @@ -0,0 +1,38 @@ +target/ +!.mvn/wrapper/maven-wrapper.jar +!**/src/main/**/target/ +!**/src/test/**/target/ + +### IntelliJ IDEA ### +.idea/modules.xml +.idea/jarRepositories.xml +.idea/compiler.xml +.idea/libraries/ +*.iws +*.iml +*.ipr + +### Eclipse ### +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache + +### NetBeans ### +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ +build/ +!**/src/main/**/build/ +!**/src/test/**/build/ + +### VS Code ### +.vscode/ + +### Mac OS ### +.DS_Store \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 0000000..aa00ffa --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..fe1451c --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,14 @@ + + + + + + + + + + \ No newline at end of file diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..1061e97 --- /dev/null +++ b/pom.xml @@ -0,0 +1,45 @@ + + + 4.0.0 + + org.example + voice-to-text-demo + 1.0-SNAPSHOT + + + 8 + 8 + UTF-8 + + + + + + org + jaudiotagger + 2.0.3 + + + + + net.java.dev.jna + jna + 5.7.0 + + + com.alphacephei + vosk + 0.3.32 + + + + com.hynnet + jacob + 1.18 + + + + + diff --git a/src/main/java/org/example/Main.java b/src/main/java/org/example/Main.java new file mode 100644 index 0000000..a20ded2 --- /dev/null +++ b/src/main/java/org/example/Main.java @@ -0,0 +1,41 @@ +package org.example; + +import java.io.FileInputStream; +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import javax.sound.sampled.*; +import org.vosk.LogLevel; +import org.vosk.Recognizer; +import org.vosk.LibVosk; +import org.vosk.Model; + +// 识别传入的音频 +public class Main { + public static void main(String[] args) throws IOException, UnsupportedAudioFileException { + LibVosk.setLogLevel(LogLevel.DEBUG); + + long startTime = System.currentTimeMillis(); // 记录开始时间 + + try (Model model = new Model("src\\main\\resources\\model\\vosk-model-cn-0.22"); +// try (Model model = new Model("D:\\Chrome Downloads\\voiceToText\\model\\vosk-model-small-cn-0.22"); + InputStream ais = AudioSystem.getAudioInputStream(new BufferedInputStream(new FileInputStream("src\\main\\resources\\voice\\dhxy.wav"))); + Recognizer recognizer = new Recognizer(model, 16000)) { + + int bytes; + byte[] b = new byte[4096]; + while ((bytes = ais.read(b)) >= 0) { + recognizer.acceptWaveForm(b, bytes); + } + + System.out.println(recognizer.getFinalResult() + System.lineSeparator()); + }catch (Exception e){ + System.out.println("文件读取错误"+e); + } + + long endTime = System.currentTimeMillis(); // 记录结束时间 + long durationMillis = endTime - startTime; // 计算耗时(毫秒) + + System.out.println("处理总耗时: " + durationMillis + " 毫秒"); + } +} diff --git a/src/main/java/org/example/TxtToSoundUtils.java b/src/main/java/org/example/TxtToSoundUtils.java new file mode 100644 index 0000000..a8dee46 --- /dev/null +++ b/src/main/java/org/example/TxtToSoundUtils.java @@ -0,0 +1,71 @@ +package org.example; + + +import com.jacob.activeX.ActiveXComponent; +import com.jacob.com.Dispatch; +import com.jacob.com.Variant; + + +public class TxtToSoundUtils { + + public static void main(String[] args) { + textToSpeech("曾经有一份真挚的爱情摆在我的面前 但是我没有珍惜 等到了失去的时候才后悔莫及 尘世间最痛苦的事莫过于此 如果上天可以给我一个机会再来一次的话 我会跟那个女孩子说三个字: 我爱你"); + System.out.println("生成成功!"); + } + + /** + * 语音转文字并播放 + * + * @param text + */ + public static void textToSpeech(String text) { + ActiveXComponent ax; + try { + ax = new ActiveXComponent("Sapi.SpVoice"); + // 运行时输出语音内容 + Dispatch spVoice = ax.getObject(); + // 音量 0-100 + ax.setProperty("Volume", new Variant(100)); + // 语音朗读速度 -10 到 +10 + ax.setProperty("Rate", new Variant(-2)); + // 执行朗读 + Dispatch.call(spVoice, "Speak", new Variant(text)); + + // 下面是构建文件流把生成语音文件 + ax = new ActiveXComponent("Sapi.SpFileStream"); + Dispatch spFileStream = ax.getObject(); + + ax = new ActiveXComponent("Sapi.SpAudioFormat"); + Dispatch spAudioFormat = ax.getObject(); + + // 设置音频流格式 + Dispatch.put(spAudioFormat, "Type", new Variant(22)); + // 设置文件输出流格式 + Dispatch.putRef(spFileStream, "Format", spAudioFormat); + // 调用输出 文件流打开方法,创建一个.wav文件 + Dispatch.call(spFileStream, "Open", new Variant("D:\\Chrome Downloads\\TestFile.wav"), new Variant(3), new Variant(true)); + // 设置声音对象的音频输出流为输出文件对象 + Dispatch.putRef(spVoice, "AudioOutputStream", spFileStream); + // 设置音量 0到100 + Dispatch.put(spVoice, "Volume", new Variant(100)); + // 设置朗读速度 + Dispatch.put(spVoice, "Rate", new Variant(-2)); + // 开始朗读 + Dispatch.call(spVoice, "Speak", new Variant(text)); + + // 关闭输出文件 + Dispatch.call(spFileStream, "Close"); + Dispatch.putRef(spVoice, "AudioOutputStream", null); + + spAudioFormat.safeRelease(); + spFileStream.safeRelease(); + spVoice.safeRelease(); + ax.safeRelease(); + + } catch (Exception e) { + e.printStackTrace(); + } + } + +} + diff --git a/src/main/java/org/example/demo1.java b/src/main/java/org/example/demo1.java new file mode 100644 index 0000000..6661297 --- /dev/null +++ b/src/main/java/org/example/demo1.java @@ -0,0 +1,68 @@ +package org.example; + +import org.vosk.LibVosk; +import org.vosk.LogLevel; +import org.vosk.Model; +import org.vosk.Recognizer; +import javax.sound.sampled.*; +import java.io.IOException; + +// 持续监听 +public class demo1 { + + public static void main(String[] args) throws IOException { + LibVosk.setLogLevel(LogLevel.DEBUG); + + // 设置音频格式 + AudioFormat format = new AudioFormat(16000, 16, 1, true, false); + DataLine.Info info = new DataLine.Info(TargetDataLine.class, format); + + // 检查是否支持该音频格式 + if (!AudioSystem.isLineSupported(info)) { + System.out.println("不支持的音频格式"); + System.exit(0); + } + + // 获取并打开音频输入线 + TargetDataLine microphone; + try { + microphone = (TargetDataLine) AudioSystem.getLine(info); + microphone.open(format); + } catch (LineUnavailableException e) { + e.printStackTrace(); + return; + } + + // 创建Vosk识别器 + try (Model model = new Model("src\\main\\resources\\model\\vosk-model-cn-0.22"); + Recognizer recognizer = new Recognizer(model, 16000)) { + + // 开始录音 + microphone.start(); + System.out.println("语言识别模型载入完成..."); + System.out.println("开始录音..."); + + byte[] buffer = new byte[4096]; + int bytesRead; + + while (true) { // 持续读取麦克风输入 + bytesRead = microphone.read(buffer, 0, buffer.length); + if (bytesRead > 0) { + recognizer.acceptWaveForm(buffer, bytesRead); + String result = recognizer.getPartialResult(); // 获取部分识别结果 + System.out.println(result); + } + } + + } catch (Exception e) { + e.printStackTrace(); + } finally { + // 关闭麦克风 + if (microphone != null && microphone.isOpen()) { + microphone.stop(); + microphone.close(); + } + } + } + +} diff --git a/src/main/java/org/example/demo2.java b/src/main/java/org/example/demo2.java new file mode 100644 index 0000000..55f2430 --- /dev/null +++ b/src/main/java/org/example/demo2.java @@ -0,0 +1,102 @@ +package org.example; + +import org.vosk.LibVosk; +import org.vosk.LogLevel; +import org.vosk.Model; +import org.vosk.Recognizer; +import javax.sound.sampled.*; +import java.io.IOException; + +public class demo2 { + + private static volatile boolean recording = false; // 控制录音的标志 + private static final String START_KEYWORD = "开始"; // 启动录音的关键词 + private static final String STOP_KEYWORD = "关闭"; // 停止录音的关键词 + + public static void main(String[] args) throws IOException { + LibVosk.setLogLevel(LogLevel.DEBUG); + + // 设置音频格式 + AudioFormat format = new AudioFormat(16000, 16, 1, true, false); + DataLine.Info info = new DataLine.Info(TargetDataLine.class, format); + + // 检查是否支持该音频格式 + if (!AudioSystem.isLineSupported(info)) { + System.out.println("不支持的音频格式"); + System.exit(0); + } + + // 获取并打开音频输入线 + TargetDataLine microphone; + try { + microphone = (TargetDataLine) AudioSystem.getLine(info); + microphone.open(format); + } catch (LineUnavailableException e) { + e.printStackTrace(); + return; + } + + // 创建Vosk识别器用于检测关键词 + try (Model model = new Model("src\\main\\resources\\model\\vosk-model-cn-0.22"); + Recognizer recognizer = new Recognizer(model, 16000)) { + + microphone.start(); + System.out.println("语言识别模型载入完成..."); + System.out.println("开始监听..."); + + byte[] buffer = new byte[4096]; + int bytesRead; + + // 初步监听关键词“开始” + while (true) { + bytesRead = microphone.read(buffer, 0, buffer.length); + if (bytesRead > 0) { + recognizer.acceptWaveForm(buffer, bytesRead); + String result = recognizer.getPartialResult(); // 获取部分识别结果 + + if (result.contains(START_KEYWORD)) { + System.out.println("检测到开始字样,启动录音..."); + recording = true; + startRecording(microphone, model); + break; // 退出循环,开始录音 + } + } + } + + } catch (Exception e) { + e.printStackTrace(); + } finally { + // 关闭麦克风 + if (microphone != null && microphone.isOpen()) { + microphone.stop(); + microphone.close(); + } + } + } + + private static void startRecording(TargetDataLine microphone, Model model) { + try (Recognizer recognizer = new Recognizer(model, 16000)) { + System.out.println("开始录音..."); + + byte[] buffer = new byte[4096]; + int bytesRead; + + while (recording) { // 进行实际的语音识别 + bytesRead = microphone.read(buffer, 0, buffer.length); + if (bytesRead > 0) { + recognizer.acceptWaveForm(buffer, bytesRead); + String result = recognizer.getPartialResult(); // 获取部分识别结果 + System.out.println(result); + + if (result.contains(STOP_KEYWORD)) { + System.out.println("检测到关闭字样,停止录音..."); + recording = false; // 设置标志,停止录音 + } + } + } + + } catch (Exception e) { + e.printStackTrace(); + } + } +} diff --git a/src/main/java/org/example/demo3.java b/src/main/java/org/example/demo3.java new file mode 100644 index 0000000..65510cb --- /dev/null +++ b/src/main/java/org/example/demo3.java @@ -0,0 +1,139 @@ +package org.example; + +import javax.sound.sampled.*; +import org.vosk.LogLevel; +import org.vosk.Recognizer; +import org.vosk.LibVosk; +import org.vosk.Model; +import java.io.IOException; + +public class demo3 { + + private static volatile boolean recording = false; // 控制录音的标志 + private static final String START_KEYWORD = "开始"; // 启动录音的关键词 + private static final String STOP_KEYWORD = "停止"; // 停止录音的关键词 + private static final long PAUSE_DURATION_MS = 3000; // 停顿时间:3秒 + private static final float VOLUME_THRESHOLD = 0.01f; // 音量阈值 + + public static void main(String[] args) throws IOException { + LibVosk.setLogLevel(LogLevel.DEBUG); + + // 设置音频格式 + AudioFormat format = new AudioFormat(16000, 16, 1, true, false); + DataLine.Info info = new DataLine.Info(TargetDataLine.class, format); + + // 检查是否支持该音频格式 + if (!AudioSystem.isLineSupported(info)) { + System.out.println("不支持的音频格式"); + System.exit(0); + } + + // 获取并打开音频输入线 + TargetDataLine microphone; + try { + microphone = (TargetDataLine) AudioSystem.getLine(info); + microphone.open(format); + } catch (LineUnavailableException e) { + e.printStackTrace(); + return; + } + + // 创建Vosk识别器用于检测关键词 + try (Model model = new Model("src\\main\\resources\\model\\vosk-model-cn-0.22"); + Recognizer recognizer = new Recognizer(model, 16000)) { + + microphone.start(); + System.out.println("语言识别模型载入完成..."); + System.out.println("开始监听..."); + + byte[] buffer = new byte[4096]; + int bytesRead; + + while (true) { + bytesRead = microphone.read(buffer, 0, buffer.length); + if (bytesRead > 0) { + recognizer.acceptWaveForm(buffer, bytesRead); + String result = recognizer.getPartialResult(); // 获取部分识别结果 + + if (result.contains(START_KEYWORD)) { + System.out.println("检测到开始字样,启动录音..."); + recording = true; + startRecording(microphone, model); + break; // 退出循环,开始录音 + } + } + } + + } catch (Exception e) { + e.printStackTrace(); + } finally { + // 关闭麦克风 + if (microphone != null && microphone.isOpen()) { + microphone.stop(); + microphone.close(); + } + } + } + + private static void startRecording(TargetDataLine microphone, Model model) { + try (Recognizer recognizer = new Recognizer(model, 16000)) { + System.out.println("开始录音..."); + + byte[] buffer = new byte[4096]; + int bytesRead; + long lastActivityTime = System.currentTimeMillis(); + StringBuilder resultBuilder = new StringBuilder(); + + while (recording) { // 进行实际的语音识别 + bytesRead = microphone.read(buffer, 0, buffer.length); + if (bytesRead > 0) { + recognizer.acceptWaveForm(buffer, bytesRead); + String result = recognizer.getPartialResult(); // 获取部分识别结果 + System.out.println(result); + + // 计算音量 + float volume = calculateVolume(buffer, bytesRead); + + // 更新最后活动时间 + if (volume > VOLUME_THRESHOLD) { + lastActivityTime = System.currentTimeMillis(); + } + + // 检查是否有3秒钟的停顿 + if (System.currentTimeMillis() - lastActivityTime > PAUSE_DURATION_MS) { + System.out.println("检测到停顿超过3秒,清空识别内容..."); + recognizer.reset(); // 清空识别器内容 + resultBuilder.append("\n[停顿]"); + } + + // 检测停止关键词 + if (result.contains(STOP_KEYWORD)) { + System.out.println("检测到停止字样,停止录音..."); + recording = false; // 设置标志,停止录音 + resultBuilder.append("\n[停止]"); + } + + // 将当前识别结果添加到结果构建器中 + resultBuilder.append(result); + } + } + + // 输出最终识别结果 + System.out.println("最终识别结果: " + resultBuilder.toString()); + + } catch (Exception e) { + e.printStackTrace(); + } + } + + private static float calculateVolume(byte[] buffer, int bytesRead) { + long sum = 0; + for (int i = 0; i < bytesRead; i += 2) { + // 读取每个样本的短整数值 + int sample = (buffer[i] & 0xFF) | (buffer[i + 1] << 8); + sum += Math.abs(sample); + } + float average = (float) sum / (bytesRead / 2); + return average / Short.MAX_VALUE; // 归一化 + } +} diff --git a/src/main/java/org/example/demo4.java b/src/main/java/org/example/demo4.java new file mode 100644 index 0000000..f57fe04 --- /dev/null +++ b/src/main/java/org/example/demo4.java @@ -0,0 +1,66 @@ +package org.example; + +import org.vosk.LibVosk; +import org.vosk.LogLevel; +import org.vosk.Model; +import org.vosk.Recognizer; +import javax.sound.sampled.*; +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +// 语音识别示例 停顿后不输出识别内容 +public class demo4 { + + public static void main(String[] args) throws IOException { + + LibVosk.setLogLevel(LogLevel.DEBUG); + + AudioFormat format = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, 60000, 16, 2, 4, 44100, false); + DataLine.Info info = new DataLine.Info(TargetDataLine.class, format); + TargetDataLine microphone; + SourceDataLine speakers; + + try (Model model = new Model("src\\main\\resources\\model\\vosk-model-cn-0.22"); + Recognizer recognizer = new Recognizer(model, 120000)) { + System.out.println("语言识别模型载入完成..."); + try { + + microphone = (TargetDataLine) AudioSystem.getLine(info); + microphone.open(format); + microphone.start(); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + int numBytesRead; + int CHUNK_SIZE = 1024; + int bytesRead = 0; + + DataLine.Info dataLineInfo = new DataLine.Info(SourceDataLine.class, format); + speakers = (SourceDataLine) AudioSystem.getLine(dataLineInfo); + speakers.open(format); + speakers.start(); + byte[] b = new byte[4096]; + + while (bytesRead <= 100000000) { + numBytesRead = microphone.read(b, 0, CHUNK_SIZE); + bytesRead += numBytesRead; + + out.write(b, 0, numBytesRead); + + speakers.write(b, 0, numBytesRead); + + if (recognizer.acceptWaveForm(b, numBytesRead)) { + System.out.println(recognizer.getResult()); + } else { + System.out.println(recognizer.getPartialResult()); + } + } + System.out.println(recognizer.getFinalResult()); + speakers.drain(); + speakers.close(); + microphone.close(); + } catch (Exception e) { + e.printStackTrace(); + } + } + } +} diff --git a/src/main/java/org/example/demo5.java b/src/main/java/org/example/demo5.java new file mode 100644 index 0000000..7342009 --- /dev/null +++ b/src/main/java/org/example/demo5.java @@ -0,0 +1,84 @@ +package org.example; + +import org.vosk.LibVosk; +import org.vosk.LogLevel; +import org.vosk.Model; +import org.vosk.Recognizer; +import javax.sound.sampled.*; +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +// 语音识别示例 对话 +public class demo5 { + + private static final String WAKE_WORD = "小爱同学"; // 唤醒词 + + public static void main(String[] args) { + // 设置日志级别 + LibVosk.setLogLevel(LogLevel.DEBUG); + + // 配置音频格式 + AudioFormat format = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, 16000, 16, 1, 2, 16000, true); + DataLine.Info inputInfo = new DataLine.Info(TargetDataLine.class, format); + DataLine.Info outputInfo = new DataLine.Info(SourceDataLine.class, format); + + try (Model model = new Model("src/main/resources/model/vosk-model-cn-0.22"); + Recognizer recognizer = new Recognizer(model, 16000)) { + + // 打开并启动麦克风 + try (TargetDataLine microphone = (TargetDataLine) AudioSystem.getLine(inputInfo)) { + microphone.open(format); + microphone.start(); + + // 准备音频输出(用于测试) + try (SourceDataLine speakers = (SourceDataLine) AudioSystem.getLine(outputInfo)) { + speakers.open(format); + speakers.start(); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + byte[] buffer = new byte[4096]; + int bytesRead; + boolean wakeWordDetected = false; + + System.out.println("等待唤醒词..."); + + while (true) { + bytesRead = microphone.read(buffer, 0, buffer.length); + if (bytesRead == -1) break; // 检测到流结束 + + // 写入扬声器用于播放(可选) + speakers.write(buffer, 0, bytesRead); + + // 简单的唤醒词检测 + if (!wakeWordDetected) { + String partialResult = recognizer.getPartialResult(); + if (partialResult.replaceAll("\\s+", "").contains(WAKE_WORD)) { + System.out.println("唤醒词检测到,开始语音识别..."); + wakeWordDetected = true; + } + } + + // 启动语音识别 + if (wakeWordDetected) { + if (recognizer.acceptWaveForm(buffer, bytesRead)) { + System.out.println("识别结果: " + recognizer.getResult()); + } else { + System.out.println("部分结果: " + recognizer.getPartialResult()); + } + } + } + + System.out.println("最终结果: " + recognizer.getFinalResult()); + speakers.drain(); + } catch (Exception e) { + System.err.println("音频播放错误: " + e.getMessage()); + } + } catch (Exception e) { + System.err.println("音频输入错误: " + e.getMessage()); + } + + } catch (IOException e) { + System.err.println("模型加载错误: " + e.getMessage()); + } + } +} diff --git a/src/main/java/org/example/openAiDemo.java b/src/main/java/org/example/openAiDemo.java new file mode 100644 index 0000000..7f56d27 --- /dev/null +++ b/src/main/java/org/example/openAiDemo.java @@ -0,0 +1,4 @@ +package org.example; + +public class openAiDemo { +} diff --git a/src/main/resources/voice/dhxy.wav b/src/main/resources/voice/dhxy.wav new file mode 100644 index 0000000..ae6e152 Binary files /dev/null and b/src/main/resources/voice/dhxy.wav differ