“首次提交”

master
蒋尚宏 1 year ago
commit ebc9c69f0a

38
.gitignore vendored

@ -0,0 +1,38 @@
target/
!.mvn/wrapper/maven-wrapper.jar
!**/src/main/**/target/
!**/src/test/**/target/
### IntelliJ IDEA ###
.idea/modules.xml
.idea/jarRepositories.xml
.idea/compiler.xml
.idea/libraries/
*.iws
*.iml
*.ipr
### Eclipse ###
.apt_generated
.classpath
.factorypath
.project
.settings
.springBeans
.sts4-cache
### NetBeans ###
/nbproject/private/
/nbbuild/
/dist/
/nbdist/
/.nb-gradle/
build/
!**/src/main/**/build/
!**/src/test/**/build/
### VS Code ###
.vscode/
### Mac OS ###
.DS_Store

8
.idea/.gitignore vendored

@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding">
<file url="file://$PROJECT_DIR$/src/main/java" charset="UTF-8" />
<file url="file://$PROJECT_DIR$/src/main/resources" charset="UTF-8" />
</component>
</project>

@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="MavenProjectsManager">
<option name="originalFiles">
<list>
<option value="$PROJECT_DIR$/pom.xml" />
</list>
</option>
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="true" project-jdk-name="1.8 (2)" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>

@ -0,0 +1,45 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>voice-to-text-demo</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<!-- 获取音频信息 -->
<dependency>
<groupId>org</groupId>
<artifactId>jaudiotagger</artifactId>
<version>2.0.3</version>
</dependency>
<!-- 语音识别 -->
<dependency>
<groupId>net.java.dev.jna</groupId>
<artifactId>jna</artifactId>
<version>5.7.0</version>
</dependency>
<dependency>
<groupId>com.alphacephei</groupId>
<artifactId>vosk</artifactId>
<version>0.3.32</version>
</dependency>
<!-- 文字转语音 -->
<dependency>
<groupId>com.hynnet</groupId>
<artifactId>jacob</artifactId>
<version>1.18</version>
</dependency>
<!-- openai -->
</dependencies>
</project>

@ -0,0 +1,41 @@
package org.example;
import java.io.FileInputStream;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import javax.sound.sampled.*;
import org.vosk.LogLevel;
import org.vosk.Recognizer;
import org.vosk.LibVosk;
import org.vosk.Model;
// 识别传入的音频
public class Main {
public static void main(String[] args) throws IOException, UnsupportedAudioFileException {
LibVosk.setLogLevel(LogLevel.DEBUG);
long startTime = System.currentTimeMillis(); // 记录开始时间
try (Model model = new Model("src\\main\\resources\\model\\vosk-model-cn-0.22");
// try (Model model = new Model("D:\\Chrome Downloads\\voiceToText\\model\\vosk-model-small-cn-0.22");
InputStream ais = AudioSystem.getAudioInputStream(new BufferedInputStream(new FileInputStream("src\\main\\resources\\voice\\dhxy.wav")));
Recognizer recognizer = new Recognizer(model, 16000)) {
int bytes;
byte[] b = new byte[4096];
while ((bytes = ais.read(b)) >= 0) {
recognizer.acceptWaveForm(b, bytes);
}
System.out.println(recognizer.getFinalResult() + System.lineSeparator());
}catch (Exception e){
System.out.println("文件读取错误"+e);
}
long endTime = System.currentTimeMillis(); // 记录结束时间
long durationMillis = endTime - startTime; // 计算耗时(毫秒)
System.out.println("处理总耗时: " + durationMillis + " 毫秒");
}
}

@ -0,0 +1,71 @@
package org.example;
import com.jacob.activeX.ActiveXComponent;
import com.jacob.com.Dispatch;
import com.jacob.com.Variant;
public class TxtToSoundUtils {
public static void main(String[] args) {
textToSpeech("曾经有一份真挚的爱情摆在我的面前 但是我没有珍惜 等到了失去的时候才后悔莫及 尘世间最痛苦的事莫过于此 如果上天可以给我一个机会再来一次的话 我会跟那个女孩子说三个字: 我爱你");
System.out.println("生成成功!");
}
/**
*
*
* @param text
*/
public static void textToSpeech(String text) {
ActiveXComponent ax;
try {
ax = new ActiveXComponent("Sapi.SpVoice");
// 运行时输出语音内容
Dispatch spVoice = ax.getObject();
// 音量 0-100
ax.setProperty("Volume", new Variant(100));
// 语音朗读速度 -10 到 +10
ax.setProperty("Rate", new Variant(-2));
// 执行朗读
Dispatch.call(spVoice, "Speak", new Variant(text));
// 下面是构建文件流把生成语音文件
ax = new ActiveXComponent("Sapi.SpFileStream");
Dispatch spFileStream = ax.getObject();
ax = new ActiveXComponent("Sapi.SpAudioFormat");
Dispatch spAudioFormat = ax.getObject();
// 设置音频流格式
Dispatch.put(spAudioFormat, "Type", new Variant(22));
// 设置文件输出流格式
Dispatch.putRef(spFileStream, "Format", spAudioFormat);
// 调用输出 文件流打开方法,创建一个.wav文件
Dispatch.call(spFileStream, "Open", new Variant("D:\\Chrome Downloads\\TestFile.wav"), new Variant(3), new Variant(true));
// 设置声音对象的音频输出流为输出文件对象
Dispatch.putRef(spVoice, "AudioOutputStream", spFileStream);
// 设置音量 0到100
Dispatch.put(spVoice, "Volume", new Variant(100));
// 设置朗读速度
Dispatch.put(spVoice, "Rate", new Variant(-2));
// 开始朗读
Dispatch.call(spVoice, "Speak", new Variant(text));
// 关闭输出文件
Dispatch.call(spFileStream, "Close");
Dispatch.putRef(spVoice, "AudioOutputStream", null);
spAudioFormat.safeRelease();
spFileStream.safeRelease();
spVoice.safeRelease();
ax.safeRelease();
} catch (Exception e) {
e.printStackTrace();
}
}
}

@ -0,0 +1,68 @@
package org.example;
import org.vosk.LibVosk;
import org.vosk.LogLevel;
import org.vosk.Model;
import org.vosk.Recognizer;
import javax.sound.sampled.*;
import java.io.IOException;
// 持续监听
public class demo1 {
public static void main(String[] args) throws IOException {
LibVosk.setLogLevel(LogLevel.DEBUG);
// 设置音频格式
AudioFormat format = new AudioFormat(16000, 16, 1, true, false);
DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
// 检查是否支持该音频格式
if (!AudioSystem.isLineSupported(info)) {
System.out.println("不支持的音频格式");
System.exit(0);
}
// 获取并打开音频输入线
TargetDataLine microphone;
try {
microphone = (TargetDataLine) AudioSystem.getLine(info);
microphone.open(format);
} catch (LineUnavailableException e) {
e.printStackTrace();
return;
}
// 创建Vosk识别器
try (Model model = new Model("src\\main\\resources\\model\\vosk-model-cn-0.22");
Recognizer recognizer = new Recognizer(model, 16000)) {
// 开始录音
microphone.start();
System.out.println("语言识别模型载入完成...");
System.out.println("开始录音...");
byte[] buffer = new byte[4096];
int bytesRead;
while (true) { // 持续读取麦克风输入
bytesRead = microphone.read(buffer, 0, buffer.length);
if (bytesRead > 0) {
recognizer.acceptWaveForm(buffer, bytesRead);
String result = recognizer.getPartialResult(); // 获取部分识别结果
System.out.println(result);
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
// 关闭麦克风
if (microphone != null && microphone.isOpen()) {
microphone.stop();
microphone.close();
}
}
}
}

@ -0,0 +1,102 @@
package org.example;
import org.vosk.LibVosk;
import org.vosk.LogLevel;
import org.vosk.Model;
import org.vosk.Recognizer;
import javax.sound.sampled.*;
import java.io.IOException;
public class demo2 {
private static volatile boolean recording = false; // 控制录音的标志
private static final String START_KEYWORD = "开始"; // 启动录音的关键词
private static final String STOP_KEYWORD = "关闭"; // 停止录音的关键词
public static void main(String[] args) throws IOException {
LibVosk.setLogLevel(LogLevel.DEBUG);
// 设置音频格式
AudioFormat format = new AudioFormat(16000, 16, 1, true, false);
DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
// 检查是否支持该音频格式
if (!AudioSystem.isLineSupported(info)) {
System.out.println("不支持的音频格式");
System.exit(0);
}
// 获取并打开音频输入线
TargetDataLine microphone;
try {
microphone = (TargetDataLine) AudioSystem.getLine(info);
microphone.open(format);
} catch (LineUnavailableException e) {
e.printStackTrace();
return;
}
// 创建Vosk识别器用于检测关键词
try (Model model = new Model("src\\main\\resources\\model\\vosk-model-cn-0.22");
Recognizer recognizer = new Recognizer(model, 16000)) {
microphone.start();
System.out.println("语言识别模型载入完成...");
System.out.println("开始监听...");
byte[] buffer = new byte[4096];
int bytesRead;
// 初步监听关键词“开始”
while (true) {
bytesRead = microphone.read(buffer, 0, buffer.length);
if (bytesRead > 0) {
recognizer.acceptWaveForm(buffer, bytesRead);
String result = recognizer.getPartialResult(); // 获取部分识别结果
if (result.contains(START_KEYWORD)) {
System.out.println("检测到开始字样,启动录音...");
recording = true;
startRecording(microphone, model);
break; // 退出循环,开始录音
}
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
// 关闭麦克风
if (microphone != null && microphone.isOpen()) {
microphone.stop();
microphone.close();
}
}
}
private static void startRecording(TargetDataLine microphone, Model model) {
try (Recognizer recognizer = new Recognizer(model, 16000)) {
System.out.println("开始录音...");
byte[] buffer = new byte[4096];
int bytesRead;
while (recording) { // 进行实际的语音识别
bytesRead = microphone.read(buffer, 0, buffer.length);
if (bytesRead > 0) {
recognizer.acceptWaveForm(buffer, bytesRead);
String result = recognizer.getPartialResult(); // 获取部分识别结果
System.out.println(result);
if (result.contains(STOP_KEYWORD)) {
System.out.println("检测到关闭字样,停止录音...");
recording = false; // 设置标志,停止录音
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}

@ -0,0 +1,139 @@
package org.example;
import javax.sound.sampled.*;
import org.vosk.LogLevel;
import org.vosk.Recognizer;
import org.vosk.LibVosk;
import org.vosk.Model;
import java.io.IOException;
public class demo3 {
private static volatile boolean recording = false; // 控制录音的标志
private static final String START_KEYWORD = "开始"; // 启动录音的关键词
private static final String STOP_KEYWORD = "停止"; // 停止录音的关键词
private static final long PAUSE_DURATION_MS = 3000; // 停顿时间3秒
private static final float VOLUME_THRESHOLD = 0.01f; // 音量阈值
public static void main(String[] args) throws IOException {
LibVosk.setLogLevel(LogLevel.DEBUG);
// 设置音频格式
AudioFormat format = new AudioFormat(16000, 16, 1, true, false);
DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
// 检查是否支持该音频格式
if (!AudioSystem.isLineSupported(info)) {
System.out.println("不支持的音频格式");
System.exit(0);
}
// 获取并打开音频输入线
TargetDataLine microphone;
try {
microphone = (TargetDataLine) AudioSystem.getLine(info);
microphone.open(format);
} catch (LineUnavailableException e) {
e.printStackTrace();
return;
}
// 创建Vosk识别器用于检测关键词
try (Model model = new Model("src\\main\\resources\\model\\vosk-model-cn-0.22");
Recognizer recognizer = new Recognizer(model, 16000)) {
microphone.start();
System.out.println("语言识别模型载入完成...");
System.out.println("开始监听...");
byte[] buffer = new byte[4096];
int bytesRead;
while (true) {
bytesRead = microphone.read(buffer, 0, buffer.length);
if (bytesRead > 0) {
recognizer.acceptWaveForm(buffer, bytesRead);
String result = recognizer.getPartialResult(); // 获取部分识别结果
if (result.contains(START_KEYWORD)) {
System.out.println("检测到开始字样,启动录音...");
recording = true;
startRecording(microphone, model);
break; // 退出循环,开始录音
}
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
// 关闭麦克风
if (microphone != null && microphone.isOpen()) {
microphone.stop();
microphone.close();
}
}
}
private static void startRecording(TargetDataLine microphone, Model model) {
try (Recognizer recognizer = new Recognizer(model, 16000)) {
System.out.println("开始录音...");
byte[] buffer = new byte[4096];
int bytesRead;
long lastActivityTime = System.currentTimeMillis();
StringBuilder resultBuilder = new StringBuilder();
while (recording) { // 进行实际的语音识别
bytesRead = microphone.read(buffer, 0, buffer.length);
if (bytesRead > 0) {
recognizer.acceptWaveForm(buffer, bytesRead);
String result = recognizer.getPartialResult(); // 获取部分识别结果
System.out.println(result);
// 计算音量
float volume = calculateVolume(buffer, bytesRead);
// 更新最后活动时间
if (volume > VOLUME_THRESHOLD) {
lastActivityTime = System.currentTimeMillis();
}
// 检查是否有3秒钟的停顿
if (System.currentTimeMillis() - lastActivityTime > PAUSE_DURATION_MS) {
System.out.println("检测到停顿超过3秒清空识别内容...");
recognizer.reset(); // 清空识别器内容
resultBuilder.append("\n[停顿]");
}
// 检测停止关键词
if (result.contains(STOP_KEYWORD)) {
System.out.println("检测到停止字样,停止录音...");
recording = false; // 设置标志,停止录音
resultBuilder.append("\n[停止]");
}
// 将当前识别结果添加到结果构建器中
resultBuilder.append(result);
}
}
// 输出最终识别结果
System.out.println("最终识别结果: " + resultBuilder.toString());
} catch (Exception e) {
e.printStackTrace();
}
}
private static float calculateVolume(byte[] buffer, int bytesRead) {
long sum = 0;
for (int i = 0; i < bytesRead; i += 2) {
// 读取每个样本的短整数值
int sample = (buffer[i] & 0xFF) | (buffer[i + 1] << 8);
sum += Math.abs(sample);
}
float average = (float) sum / (bytesRead / 2);
return average / Short.MAX_VALUE; // 归一化
}
}

@ -0,0 +1,66 @@
package org.example;
import org.vosk.LibVosk;
import org.vosk.LogLevel;
import org.vosk.Model;
import org.vosk.Recognizer;
import javax.sound.sampled.*;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
// 语音识别示例 停顿后不输出识别内容
public class demo4 {
public static void main(String[] args) throws IOException {
LibVosk.setLogLevel(LogLevel.DEBUG);
AudioFormat format = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, 60000, 16, 2, 4, 44100, false);
DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
TargetDataLine microphone;
SourceDataLine speakers;
try (Model model = new Model("src\\main\\resources\\model\\vosk-model-cn-0.22");
Recognizer recognizer = new Recognizer(model, 120000)) {
System.out.println("语言识别模型载入完成...");
try {
microphone = (TargetDataLine) AudioSystem.getLine(info);
microphone.open(format);
microphone.start();
ByteArrayOutputStream out = new ByteArrayOutputStream();
int numBytesRead;
int CHUNK_SIZE = 1024;
int bytesRead = 0;
DataLine.Info dataLineInfo = new DataLine.Info(SourceDataLine.class, format);
speakers = (SourceDataLine) AudioSystem.getLine(dataLineInfo);
speakers.open(format);
speakers.start();
byte[] b = new byte[4096];
while (bytesRead <= 100000000) {
numBytesRead = microphone.read(b, 0, CHUNK_SIZE);
bytesRead += numBytesRead;
out.write(b, 0, numBytesRead);
speakers.write(b, 0, numBytesRead);
if (recognizer.acceptWaveForm(b, numBytesRead)) {
System.out.println(recognizer.getResult());
} else {
System.out.println(recognizer.getPartialResult());
}
}
System.out.println(recognizer.getFinalResult());
speakers.drain();
speakers.close();
microphone.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}

@ -0,0 +1,84 @@
package org.example;
import org.vosk.LibVosk;
import org.vosk.LogLevel;
import org.vosk.Model;
import org.vosk.Recognizer;
import javax.sound.sampled.*;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
// 语音识别示例 对话
public class demo5 {
private static final String WAKE_WORD = "小爱同学"; // 唤醒词
public static void main(String[] args) {
// 设置日志级别
LibVosk.setLogLevel(LogLevel.DEBUG);
// 配置音频格式
AudioFormat format = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, 16000, 16, 1, 2, 16000, true);
DataLine.Info inputInfo = new DataLine.Info(TargetDataLine.class, format);
DataLine.Info outputInfo = new DataLine.Info(SourceDataLine.class, format);
try (Model model = new Model("src/main/resources/model/vosk-model-cn-0.22");
Recognizer recognizer = new Recognizer(model, 16000)) {
// 打开并启动麦克风
try (TargetDataLine microphone = (TargetDataLine) AudioSystem.getLine(inputInfo)) {
microphone.open(format);
microphone.start();
// 准备音频输出(用于测试)
try (SourceDataLine speakers = (SourceDataLine) AudioSystem.getLine(outputInfo)) {
speakers.open(format);
speakers.start();
ByteArrayOutputStream out = new ByteArrayOutputStream();
byte[] buffer = new byte[4096];
int bytesRead;
boolean wakeWordDetected = false;
System.out.println("等待唤醒词...");
while (true) {
bytesRead = microphone.read(buffer, 0, buffer.length);
if (bytesRead == -1) break; // 检测到流结束
// 写入扬声器用于播放(可选)
speakers.write(buffer, 0, bytesRead);
// 简单的唤醒词检测
if (!wakeWordDetected) {
String partialResult = recognizer.getPartialResult();
if (partialResult.replaceAll("\\s+", "").contains(WAKE_WORD)) {
System.out.println("唤醒词检测到,开始语音识别...");
wakeWordDetected = true;
}
}
// 启动语音识别
if (wakeWordDetected) {
if (recognizer.acceptWaveForm(buffer, bytesRead)) {
System.out.println("识别结果: " + recognizer.getResult());
} else {
System.out.println("部分结果: " + recognizer.getPartialResult());
}
}
}
System.out.println("最终结果: " + recognizer.getFinalResult());
speakers.drain();
} catch (Exception e) {
System.err.println("音频播放错误: " + e.getMessage());
}
} catch (Exception e) {
System.err.println("音频输入错误: " + e.getMessage());
}
} catch (IOException e) {
System.err.println("模型加载错误: " + e.getMessage());
}
}
}

@ -0,0 +1,4 @@
package org.example;
public class openAiDemo {
}

Binary file not shown.
Loading…
Cancel
Save