Java 实现socket VAD通讯客户端
以下是基于Java的WebSocket ASR客户端实现,支持实时音频采集、VAD检测和标点符号恢复:
1. 基础Java客户端实现
import javax.sound.sampled.*;
import org.java_websocket.client.WebSocketClient;
import org.java_websocket.handshake.ServerHandshake;
import java.net.URI;
import java.nio.ByteBuffer;
import java.util.concurrent.ArrayBlockingQueue;public class AsrClient {private static final int SAMPLE_RATE = 16000;private static final int CHUNK_SIZE = 1024;private static final float SILENCE_THRESHOLD = 0.02f;private static final int SILENCE_TIMEOUT_MS = 1500;private final ArrayBlockingQueue<byte[]> audioQueue = new ArrayBlockingQueue<>(100);private volatile boolean isRecording = false;public static void main(String[] args) throws Exception {new AsrClient().start("ws://localhost:8765");}public void start(String serverUri) throws Exception {// WebSocket连接WSClient wsClient = new WSClient(new URI(serverUri));wsClient.connect();// 音频采集线程Thread captureThread = new Thread(() -> {try {AudioFormat format = new AudioFormat(SAMPLE_RATE, 16, 1, true, false);DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);if (!AudioSystem.isLineSupported(info)) {System.err.println("不支持的音频格式");return;}TargetDataLine line = (TargetDataLine) AudioSystem.getLine(info);line.open(format);line.start();byte[] buffer = new byte[CHUNK_SIZE];System.out.println("开始录音...");while (isRecording) {int bytesRead = line.read(buffer, 0, buffer.length);if (bytesRead > 0) {audioQueue.put(buffer.clone());}}line.close();} catch (Exception e) {e.printStackTrace();}});// 处理线程Thread processThread = new Thread(() -> {try {int silenceCounter = 0;ByteArrayOutputStream audioBuffer = new ByteArrayOutputStream();while (isRecording || !audioQueue.isEmpty()) {byte[] chunk = audioQueue.poll(100, TimeUnit.MILLISECONDS);if (chunk == null) continue;// VAD检测boolean hasVoice = isVoiceActive(chunk);if (hasVoice) {silenceCounter = 0;audioBuffer.write(chunk);} else if (audioBuffer.size() > 0) {silenceCounter += CHUNK_SIZE * 1000 / SAMPLE_RATE;if (silenceCounter >= SILENCE_TIMEOUT_MS) {// 发送音频数据wsClient.send(audioBuffer.toByteArray());audioBuffer.reset();silenceCounter = 0;}}}} catch (Exception e) {e.printStackTrace();}});// 启动线程isRecording = true;captureThread.start();processThread.start();// 等待结束System.in.read();isRecording = false;captureThread.join();processThread.join();wsClient.close();}private boolean isVoiceActive(byte[] audio) {// 简单的能量检测VADshort[] samples = new short[audio.length / 2];ByteBuffer.wrap(audio).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(samples);double sum = 0;for (short sample : samples) {sum += sample * sample;}double rms = Math.sqrt(sum / samples.length) / 32768.0;return rms > SILENCE_THRESHOLD;}class WSClient extends WebSocketClient {public WSClient(URI serverUri) {super(serverUri);}@Overridepublic void onOpen(ServerHandshake handshakedata) {System.out.println("连接已建立");// 发送客户端信息send("{\"type\":\"client_info\",\"sample_rate\":16000}");}@Overridepublic void onMessage(String message) {System.out.println("识别结果: " + message);}@Overridepublic void onClose(int code, String reason, boolean remote) {System.out.println("连接关闭: " + reason);}@Overridepublic void onError(Exception ex) {ex.printStackTrace();}}
}
2. 需要添加的依赖(Maven)
<dependencies><!-- WebSocket客户端 --><dependency><groupId>org.java-websocket</groupId><artifactId>Java-WebSocket</artifactId><version>1.5.3</version></dependency><!-- 音频处理 --><dependency><groupId>com.googlecode.soundlibs</groupId><artifactId>tritonus-share</artifactId><version>0.3.7-2</version></dependency>
</dependencies>
3. 高级功能扩展版
import org.json.JSONObject;
import java.util.Base64;public class EnhancedAsrClient extends AsrClient {@Overridepublic void start(String serverUri) throws Exception {// 添加鉴权头Map<String, String> headers = new HashMap<>();headers.put("Authorization", "Bearer your_token");WSClient wsClient = new WSClient(new URI(serverUri), headers);// ...其余代码同基础版...}class EnhancedWSClient extends WSClient {private final Gson gson = new Gson();private int messageId = 0;public EnhancedWSClient(URI serverUri, Map<String, String> headers) {super(serverUri);if (headers != null) {this.addHeaders(headers);}}@Overridepublic void onMessage(String message) {try {JsonObject result = gson.fromJson(message, JsonObject.class);if (result.has("text")) {System.out.printf("[%s] 结果: %s\n", result.get("time").getAsString(),result.get("text").getAsString());}} catch (Exception e) {System.out.println("原始消息: " + message);}}public void sendAudio(byte[] audio) {JSONObject msg = new JSONObject();msg.put("id", ++messageId);msg.put("type", "audio");msg.put("data", Base64.getEncoder().encodeToString(audio));send(msg.toString());}}
}
4. 关键功能说明
-
音频采集:
- 使用
javax.sound.sampled
包实现跨平台音频采集 - 支持16kHz单声道PCM格式
- 使用
-
VAD检测:
// 基于RMS能量的简单VAD double rms = Math.sqrt(sum / samples.length) / 32768.0; return rms > SILENCE_THRESHOLD;
-
协议设计:
- 连接时发送采样率信息
- 支持JSON格式的消息交换
- 音频数据Base64编码传输
-
性能优化:
- 使用双缓冲队列避免阻塞音频线程
- 单独的VAD处理线程
5. 使用说明
-
启动服务端:
python server.py # 使用之前提供的Python服务端
-
运行Java客户端:
mvn compile exec:java -Dexec.mainClass="AsrClient"
-
参数配置:
// 可调参数 private static final float SILENCE_THRESHOLD = 0.02f; // 静音阈值 private static final int CHUNK_SIZE = 1024; // 每次发送的音频块大小
6. 常见问题解决
问题1:无法找到音频设备
- 解决方案:检查系统录音权限,或指定具体设备:
Mixer.Info[] mixers = AudioSystem.getMixerInfo(); // 打印可用设备后指定具体设备 line = AudioSystem.getTargetDataLine(format, mixers[0]);
问题2:WebSocket连接不稳定
- 解决方案:添加重连机制
void reconnect() {try {Thread.sleep(5000);this.reconnectBlocking();} catch (Exception e) {e.printStackTrace();} }
问题3:延迟过高
- 优化建议:
- 减小
CHUNK_SIZE
(但会增加CPU负载) - 使用OPUS编码压缩音频(需服务端支持)
- 减小
这个Java客户端实现了与Python服务端的完整交互流程,支持实时音频处理和标点恢复功能。根据实际需求可以扩展更多功能如:音频压缩、自适应VAD阈值、多语言支持等。