“首次提交”

2 years ago · ebc9c69f0a
commit ebc9c69f0a
14 changed files with 687 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,38 @@
 target/
 !.mvn/wrapper/maven-wrapper.jar
 !**/src/main/**/target/
 !**/src/test/**/target/
 ### IntelliJ IDEA ###
 .idea/modules.xml
 .idea/jarRepositories.xml
 .idea/compiler.xml
 .idea/libraries/
 *.iws
 *.iml
 *.ipr
 ### Eclipse ###
 .apt_generated
 .classpath
 .factorypath
 .project
 .settings
 .springBeans
 .sts4-cache
 ### NetBeans ###
 /nbproject/private/
 /nbbuild/
 /dist/
 /nbdist/
 /.nb-gradle/
 build/
 !**/src/main/**/build/
 !**/src/test/**/build/
 ### VS Code ###
 .vscode/
 ### Mac OS ###
 .DS_Store
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@ -0,0 +1,8 @@
 # Default ignored files
 /shelf/
 /workspace.xml
 # Editor-based HTTP Client requests
 /httpRequests/
 # Datasource local storage ignored files
 /dataSources/
 /dataSources.local.xml
--- a/.idea/encodings.xml
+++ b/.idea/encodings.xml
@ -0,0 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="Encoding">
    <file url="file://$PROJECT_DIR$/src/main/java" charset="UTF-8" />
    <file url="file://$PROJECT_DIR$/src/main/resources" charset="UTF-8" />
  </component>
 </project>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -0,0 +1,14 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="ExternalStorageConfigurationManager" enabled="true" />
  <component name="MavenProjectsManager">
    <option name="originalFiles">
      <list>
        <option value="$PROJECT_DIR$/pom.xml" />
      </list>
    </option>
  </component>
  <component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="true" project-jdk-name="1.8 (2)" project-jdk-type="JavaSDK">
    <output url="file://$PROJECT_DIR$/out" />
  </component>
 </project>
--- a/pom.xml
+++ b/pom.xml
@ -0,0 +1,45 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>org.example</groupId>
    <artifactId>voice-to-text-demo</artifactId>
    <version>1.0-SNAPSHOT</version>
    <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    </properties>
    <dependencies>
        <!-- 获取音频信息 -->
        <dependency>
            <groupId>org</groupId>
            <artifactId>jaudiotagger</artifactId>
            <version>2.0.3</version>
        </dependency>
        <!-- 语音识别 -->
        <dependency>
            <groupId>net.java.dev.jna</groupId>
            <artifactId>jna</artifactId>
            <version>5.7.0</version>
        </dependency>
        <dependency>
            <groupId>com.alphacephei</groupId>
            <artifactId>vosk</artifactId>
            <version>0.3.32</version>
        </dependency>
        <!-- 文字转语音 -->
        <dependency>
            <groupId>com.hynnet</groupId>
            <artifactId>jacob</artifactId>
            <version>1.18</version>
        </dependency>
        <!-- openai -->
    </dependencies>
 </project>
--- a/src/main/java/org/example/Main.java
+++ b/src/main/java/org/example/Main.java
@ -0,0 +1,41 @@
 package org.example;
 import java.io.FileInputStream;
 import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import javax.sound.sampled.*;
 import org.vosk.LogLevel;
 import org.vosk.Recognizer;
 import org.vosk.LibVosk;
 import org.vosk.Model;
 // 识别传入的音频
 public class Main {
    public static void main(String[] args) throws IOException, UnsupportedAudioFileException {
        LibVosk.setLogLevel(LogLevel.DEBUG);
        long startTime = System.currentTimeMillis(); // 记录开始时间
        try (Model model = new Model("src\\main\\resources\\model\\vosk-model-cn-0.22");
 //        try (Model model = new Model("D:\\Chrome Downloads\\voiceToText\\model\\vosk-model-small-cn-0.22");
             InputStream ais = AudioSystem.getAudioInputStream(new BufferedInputStream(new FileInputStream("src\\main\\resources\\voice\\dhxy.wav")));
             Recognizer recognizer = new Recognizer(model, 16000)) {
            int bytes;
            byte[] b = new byte[4096];
            while ((bytes = ais.read(b)) >= 0) {
                recognizer.acceptWaveForm(b, bytes);
            }
            System.out.println(recognizer.getFinalResult() + System.lineSeparator());
        }catch (Exception e){
            System.out.println("文件读取错误"+e);
        }
        long endTime = System.currentTimeMillis(); // 记录结束时间
        long durationMillis = endTime - startTime; // 计算耗时（毫秒）
        System.out.println("处理总耗时: " + durationMillis + " 毫秒");
    }
 }
--- a/src/main/java/org/example/TxtToSoundUtils.java
+++ b/src/main/java/org/example/TxtToSoundUtils.java
@ -0,0 +1,71 @@
 package org.example;
 import com.jacob.activeX.ActiveXComponent;
 import com.jacob.com.Dispatch;
 import com.jacob.com.Variant;
 public class TxtToSoundUtils {
    public static void main(String[] args) {
        textToSpeech("曾经有一份真挚的爱情摆在我的面前 但是我没有珍惜 等到了失去的时候才后悔莫及 尘世间最痛苦的事莫过于此 如果上天可以给我一个机会再来一次的话 我会跟那个女孩子说三个字：  我爱你");
        System.out.println("生成成功！");
    }
    /**
     * 语音转文字并播放
     *
     * @param text
     */
    public static void textToSpeech(String text) {
        ActiveXComponent ax;
        try {
            ax = new ActiveXComponent("Sapi.SpVoice");
            // 运行时输出语音内容
            Dispatch spVoice = ax.getObject();
            // 音量 0-100
            ax.setProperty("Volume", new Variant(100));
            // 语音朗读速度 -10 到 +10
            ax.setProperty("Rate", new Variant(-2));
            // 执行朗读
            Dispatch.call(spVoice, "Speak", new Variant(text));
            // 下面是构建文件流把生成语音文件
            ax = new ActiveXComponent("Sapi.SpFileStream");
            Dispatch spFileStream = ax.getObject();
            ax = new ActiveXComponent("Sapi.SpAudioFormat");
            Dispatch spAudioFormat = ax.getObject();
            // 设置音频流格式
            Dispatch.put(spAudioFormat, "Type", new Variant(22));
            // 设置文件输出流格式
            Dispatch.putRef(spFileStream, "Format", spAudioFormat);
            // 调用输出 文件流打开方法，创建一个.wav文件
            Dispatch.call(spFileStream, "Open", new Variant("D:\\Chrome Downloads\\TestFile.wav"), new Variant(3), new Variant(true));
            // 设置声音对象的音频输出流为输出文件对象
            Dispatch.putRef(spVoice, "AudioOutputStream", spFileStream);
            // 设置音量 0到100
            Dispatch.put(spVoice, "Volume", new Variant(100));
            // 设置朗读速度
            Dispatch.put(spVoice, "Rate", new Variant(-2));
            // 开始朗读
            Dispatch.call(spVoice, "Speak", new Variant(text));
            // 关闭输出文件
            Dispatch.call(spFileStream, "Close");
            Dispatch.putRef(spVoice, "AudioOutputStream", null);
            spAudioFormat.safeRelease();
            spFileStream.safeRelease();
            spVoice.safeRelease();
            ax.safeRelease();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
 }
--- a/src/main/java/org/example/demo1.java
+++ b/src/main/java/org/example/demo1.java
@ -0,0 +1,68 @@
 package org.example;
 import org.vosk.LibVosk;
 import org.vosk.LogLevel;
 import org.vosk.Model;
 import org.vosk.Recognizer;
 import javax.sound.sampled.*;
 import java.io.IOException;
 // 持续监听
 public class demo1 {
    public static void main(String[] args) throws IOException {
        LibVosk.setLogLevel(LogLevel.DEBUG);
        // 设置音频格式
        AudioFormat format = new AudioFormat(16000, 16, 1, true, false);
        DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
        // 检查是否支持该音频格式
        if (!AudioSystem.isLineSupported(info)) {
            System.out.println("不支持的音频格式");
            System.exit(0);
        }
        // 获取并打开音频输入线
        TargetDataLine microphone;
        try {
            microphone = (TargetDataLine) AudioSystem.getLine(info);
            microphone.open(format);
        } catch (LineUnavailableException e) {
            e.printStackTrace();
            return;
        }
        // 创建Vosk识别器
        try (Model model = new Model("src\\main\\resources\\model\\vosk-model-cn-0.22");
             Recognizer recognizer = new Recognizer(model, 16000)) {
            // 开始录音
            microphone.start();
            System.out.println("语言识别模型载入完成...");
            System.out.println("开始录音...");
            byte[] buffer = new byte[4096];
            int bytesRead;
            while (true) { // 持续读取麦克风输入
                bytesRead = microphone.read(buffer, 0, buffer.length);
                if (bytesRead > 0) {
                    recognizer.acceptWaveForm(buffer, bytesRead);
                    String result = recognizer.getPartialResult(); // 获取部分识别结果
                    System.out.println(result);
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            // 关闭麦克风
            if (microphone != null && microphone.isOpen()) {
                microphone.stop();
                microphone.close();
            }
        }
    }
 }
--- a/src/main/java/org/example/demo2.java
+++ b/src/main/java/org/example/demo2.java
@ -0,0 +1,102 @@
 package org.example;
 import org.vosk.LibVosk;
 import org.vosk.LogLevel;
 import org.vosk.Model;
 import org.vosk.Recognizer;
 import javax.sound.sampled.*;
 import java.io.IOException;
 public class demo2 {
    private static volatile boolean recording = false; // 控制录音的标志
    private static final String START_KEYWORD = "开始"; // 启动录音的关键词
    private static final String STOP_KEYWORD = "关闭"; // 停止录音的关键词
    public static void main(String[] args) throws IOException {
        LibVosk.setLogLevel(LogLevel.DEBUG);
        // 设置音频格式
        AudioFormat format = new AudioFormat(16000, 16, 1, true, false);
        DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
        // 检查是否支持该音频格式
        if (!AudioSystem.isLineSupported(info)) {
            System.out.println("不支持的音频格式");
            System.exit(0);
        }
        // 获取并打开音频输入线
        TargetDataLine microphone;
        try {
            microphone = (TargetDataLine) AudioSystem.getLine(info);
            microphone.open(format);
        } catch (LineUnavailableException e) {
            e.printStackTrace();
            return;
        }
        // 创建Vosk识别器用于检测关键词
        try (Model model = new Model("src\\main\\resources\\model\\vosk-model-cn-0.22");
             Recognizer recognizer = new Recognizer(model, 16000)) {
            microphone.start();
            System.out.println("语言识别模型载入完成...");
            System.out.println("开始监听...");
            byte[] buffer = new byte[4096];
            int bytesRead;
            // 初步监听关键词“开始”
            while (true) {
                bytesRead = microphone.read(buffer, 0, buffer.length);
                if (bytesRead > 0) {
                    recognizer.acceptWaveForm(buffer, bytesRead);
                    String result = recognizer.getPartialResult(); // 获取部分识别结果
                    if (result.contains(START_KEYWORD)) {
                        System.out.println("检测到开始字样，启动录音...");
                        recording = true;
                        startRecording(microphone, model);
                        break; // 退出循环，开始录音
                    }
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            // 关闭麦克风
            if (microphone != null && microphone.isOpen()) {
                microphone.stop();
                microphone.close();
            }
        }
    }
    private static void startRecording(TargetDataLine microphone, Model model) {
        try (Recognizer recognizer = new Recognizer(model, 16000)) {
            System.out.println("开始录音...");
            byte[] buffer = new byte[4096];
            int bytesRead;
            while (recording) { // 进行实际的语音识别
                bytesRead = microphone.read(buffer, 0, buffer.length);
                if (bytesRead > 0) {
                    recognizer.acceptWaveForm(buffer, bytesRead);
                    String result = recognizer.getPartialResult(); // 获取部分识别结果
                    System.out.println(result);
                    if (result.contains(STOP_KEYWORD)) {
                        System.out.println("检测到关闭字样，停止录音...");
                        recording = false; // 设置标志，停止录音
                    }
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
 }
--- a/src/main/java/org/example/demo3.java
+++ b/src/main/java/org/example/demo3.java
@ -0,0 +1,139 @@
 package org.example;
 import javax.sound.sampled.*;
 import org.vosk.LogLevel;
 import org.vosk.Recognizer;
 import org.vosk.LibVosk;
 import org.vosk.Model;
 import java.io.IOException;
 public class demo3 {
    private static volatile boolean recording = false; // 控制录音的标志
    private static final String START_KEYWORD = "开始"; // 启动录音的关键词
    private static final String STOP_KEYWORD = "停止"; // 停止录音的关键词
    private static final long PAUSE_DURATION_MS = 3000; // 停顿时间：3秒
    private static final float VOLUME_THRESHOLD = 0.01f; // 音量阈值
    public static void main(String[] args) throws IOException {
        LibVosk.setLogLevel(LogLevel.DEBUG);
        // 设置音频格式
        AudioFormat format = new AudioFormat(16000, 16, 1, true, false);
        DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
        // 检查是否支持该音频格式
        if (!AudioSystem.isLineSupported(info)) {
            System.out.println("不支持的音频格式");
            System.exit(0);
        }
        // 获取并打开音频输入线
        TargetDataLine microphone;
        try {
            microphone = (TargetDataLine) AudioSystem.getLine(info);
            microphone.open(format);
        } catch (LineUnavailableException e) {
            e.printStackTrace();
            return;
        }
        // 创建Vosk识别器用于检测关键词
        try (Model model = new Model("src\\main\\resources\\model\\vosk-model-cn-0.22");
             Recognizer recognizer = new Recognizer(model, 16000)) {
            microphone.start();
            System.out.println("语言识别模型载入完成...");
            System.out.println("开始监听...");
            byte[] buffer = new byte[4096];
            int bytesRead;
            while (true) {
                bytesRead = microphone.read(buffer, 0, buffer.length);
                if (bytesRead > 0) {
                    recognizer.acceptWaveForm(buffer, bytesRead);
                    String result = recognizer.getPartialResult(); // 获取部分识别结果
                    if (result.contains(START_KEYWORD)) {
                        System.out.println("检测到开始字样，启动录音...");
                        recording = true;
                        startRecording(microphone, model);
                        break; // 退出循环，开始录音
                    }
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            // 关闭麦克风
            if (microphone != null && microphone.isOpen()) {
                microphone.stop();
                microphone.close();
            }
        }
    }
    private static void startRecording(TargetDataLine microphone, Model model) {
        try (Recognizer recognizer = new Recognizer(model, 16000)) {
            System.out.println("开始录音...");
            byte[] buffer = new byte[4096];
            int bytesRead;
            long lastActivityTime = System.currentTimeMillis();
            StringBuilder resultBuilder = new StringBuilder();
            while (recording) { // 进行实际的语音识别
                bytesRead = microphone.read(buffer, 0, buffer.length);
                if (bytesRead > 0) {
                    recognizer.acceptWaveForm(buffer, bytesRead);
                    String result = recognizer.getPartialResult(); // 获取部分识别结果
                    System.out.println(result);
                    // 计算音量
                    float volume = calculateVolume(buffer, bytesRead);
                    // 更新最后活动时间
                    if (volume > VOLUME_THRESHOLD) {
                        lastActivityTime = System.currentTimeMillis();
                    }
                    // 检查是否有3秒钟的停顿
                    if (System.currentTimeMillis() - lastActivityTime > PAUSE_DURATION_MS) {
                        System.out.println("检测到停顿超过3秒，清空识别内容...");
                        recognizer.reset(); // 清空识别器内容
                        resultBuilder.append("\n[停顿]");
                    }
                    // 检测停止关键词
                    if (result.contains(STOP_KEYWORD)) {
                        System.out.println("检测到停止字样，停止录音...");
                        recording = false; // 设置标志，停止录音
                        resultBuilder.append("\n[停止]");
                    }
                    // 将当前识别结果添加到结果构建器中
                    resultBuilder.append(result);
                }
            }
            // 输出最终识别结果
            System.out.println("最终识别结果: " + resultBuilder.toString());
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    private static float calculateVolume(byte[] buffer, int bytesRead) {
        long sum = 0;
        for (int i = 0; i < bytesRead; i += 2) {
            // 读取每个样本的短整数值
            int sample = (buffer[i] & 0xFF) | (buffer[i + 1] << 8);
            sum += Math.abs(sample);
        }
        float average = (float) sum / (bytesRead / 2);
        return average / Short.MAX_VALUE; // 归一化
    }
 }
--- a/src/main/java/org/example/demo4.java
+++ b/src/main/java/org/example/demo4.java
@ -0,0 +1,66 @@
 package org.example;
 import org.vosk.LibVosk;
 import org.vosk.LogLevel;
 import org.vosk.Model;
 import org.vosk.Recognizer;
 import javax.sound.sampled.*;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 // 语音识别示例 停顿后不输出识别内容
 public class demo4 {
    public static void main(String[] args) throws IOException {
        LibVosk.setLogLevel(LogLevel.DEBUG);
        AudioFormat format = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, 60000, 16, 2, 4, 44100, false);
        DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
        TargetDataLine microphone;
        SourceDataLine speakers;
        try (Model model = new Model("src\\main\\resources\\model\\vosk-model-cn-0.22");
             Recognizer recognizer = new Recognizer(model, 120000))  {
            System.out.println("语言识别模型载入完成...");
            try {
                microphone = (TargetDataLine) AudioSystem.getLine(info);
                microphone.open(format);
                microphone.start();
                ByteArrayOutputStream out = new ByteArrayOutputStream();
                int numBytesRead;
                int CHUNK_SIZE = 1024;
                int bytesRead = 0;
                DataLine.Info dataLineInfo = new DataLine.Info(SourceDataLine.class, format);
                speakers = (SourceDataLine) AudioSystem.getLine(dataLineInfo);
                speakers.open(format);
                speakers.start();
                byte[] b = new byte[4096];
                while (bytesRead <= 100000000) {
                    numBytesRead = microphone.read(b, 0, CHUNK_SIZE);
                    bytesRead += numBytesRead;
                    out.write(b, 0, numBytesRead);
                    speakers.write(b, 0, numBytesRead);
                    if (recognizer.acceptWaveForm(b, numBytesRead)) {
                        System.out.println(recognizer.getResult());
                    } else {
                        System.out.println(recognizer.getPartialResult());
                    }
                }
                System.out.println(recognizer.getFinalResult());
                speakers.drain();
                speakers.close();
                microphone.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
 }
--- a/src/main/java/org/example/demo5.java
+++ b/src/main/java/org/example/demo5.java
@ -0,0 +1,84 @@
 package org.example;
 import org.vosk.LibVosk;
 import org.vosk.LogLevel;
 import org.vosk.Model;
 import org.vosk.Recognizer;
 import javax.sound.sampled.*;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 // 语音识别示例 对话
 public class demo5 {
    private static final String WAKE_WORD = "小爱同学"; // 唤醒词
    public static void main(String[] args) {
        // 设置日志级别
        LibVosk.setLogLevel(LogLevel.DEBUG);
        // 配置音频格式
        AudioFormat format = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, 16000, 16, 1, 2, 16000, true);
        DataLine.Info inputInfo = new DataLine.Info(TargetDataLine.class, format);
        DataLine.Info outputInfo = new DataLine.Info(SourceDataLine.class, format);
        try (Model model = new Model("src/main/resources/model/vosk-model-cn-0.22");
             Recognizer recognizer = new Recognizer(model, 16000)) {
            // 打开并启动麦克风
            try (TargetDataLine microphone = (TargetDataLine) AudioSystem.getLine(inputInfo)) {
                microphone.open(format);
                microphone.start();
                // 准备音频输出（用于测试）
                try (SourceDataLine speakers = (SourceDataLine) AudioSystem.getLine(outputInfo)) {
                    speakers.open(format);
                    speakers.start();
                    ByteArrayOutputStream out = new ByteArrayOutputStream();
                    byte[] buffer = new byte[4096];
                    int bytesRead;
                    boolean wakeWordDetected = false;
                    System.out.println("等待唤醒词...");
                    while (true) {
                        bytesRead = microphone.read(buffer, 0, buffer.length);
                        if (bytesRead == -1) break; // 检测到流结束
                        // 写入扬声器用于播放（可选）
                        speakers.write(buffer, 0, bytesRead);
                        // 简单的唤醒词检测
                        if (!wakeWordDetected) {
                            String partialResult = recognizer.getPartialResult();
                            if (partialResult.replaceAll("\\s+", "").contains(WAKE_WORD)) {
                                System.out.println("唤醒词检测到，开始语音识别...");
                                wakeWordDetected = true;
                            }
                        }
                        // 启动语音识别
                        if (wakeWordDetected) {
                            if (recognizer.acceptWaveForm(buffer, bytesRead)) {
                                System.out.println("识别结果: " + recognizer.getResult());
                            } else {
                                System.out.println("部分结果: " + recognizer.getPartialResult());
                            }
                        }
                    }
                    System.out.println("最终结果: " + recognizer.getFinalResult());
                    speakers.drain();
                } catch (Exception e) {
                    System.err.println("音频播放错误: " + e.getMessage());
                }
            } catch (Exception e) {
                System.err.println("音频输入错误: " + e.getMessage());
            }
        } catch (IOException e) {
            System.err.println("模型加载错误: " + e.getMessage());
        }
    }
 }
--- a/src/main/java/org/example/openAiDemo.java
+++ b/src/main/java/org/example/openAiDemo.java
@ -0,0 +1,4 @@
 package org.example;
 public class openAiDemo {
 }
--- a/src/main/resources/voice/dhxy.wav
+++ b/src/main/resources/voice/dhxy.wav