阿里云实时语音识别
前端:
<script setup>
import { ref, onMounted, onBeforeUnmount } from 'vue'const isRecording = ref(false)
const transcript = ref('')
let mediaRecorder = null
let ws = nullonMounted(() => {ws = new WebSocket('ws://localhost:3002')ws.onopen = () => {console.log('WebSocket连接成功')}ws.onerror = (event) => {console.error('WebSocket连接错误:', event)}ws.onmessage = (event) => {console.log("接收到消息:",event.data)transcript.value = event.data}
})onBeforeUnmount(() => {if (ws) ws.close()
})const startRecording = async () => {if (isRecording.value) returnisRecording.value = truetranscript.value = ''// 获取音频流const stream = await navigator.mediaDevices.getUserMedia({ audio: true })// 采集为标准WAV格式mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm;codecs=opus' })mediaRecorder.ondataavailable = (e) => {if (e.data.size > 0 && ws && ws.readyState === 1) {e.data.arrayBuffer().then(buffer => {// console.log(buffer)ws.send(buffer)})}}mediaRecorder.onstop = () => {console.log("停止录音")setTimeout(() => {// ws.send(JSON.stringify({ type: 'voiceToTextEnd'}))}, 500)}// ws.send(JSON.stringify({ type: 'voiceToTextStart'}))mediaRecorder.start(500) // 每500ms发送一次数据
}const stopRecording = () => {if (!isRecording.value) returnisRecording.value = falseif (mediaRecorder) {mediaRecorder.stop()mediaRecorder.stream.getTracks().forEach(track => track.stop())}
}
</script><template><div style="padding: 40px; max-width: 600px; margin: auto;"><el-button type="primary" @click="isRecording ? stopRecording() : startRecording()">{{ isRecording ? '停止录音' : '开始录音' }}</el-button><div style="margin-top: 30px;"><el-card><div>识别文本:</div><div style="min-height: 40px; color: #333;width: 600px;">{{ transcript }}</div></el-card></div></div>
</template><style scoped>
.logo {height: 6em;padding: 1.5em;will-change: filter;transition: filter 300ms;
}
.logo:hover {filter: drop-shadow(0 0 2em #646cffaa);
}
.logo.vue:hover {filter: drop-shadow(0 0 2em #42b883aa);
}
</style>
后端:
const express = require('express');
const http = require('http');
const WebSocket = require('ws');
const Nls = require('alibabacloud-nls');
const { PassThrough } = require('stream');
const ffmpegPath = require('@ffmpeg-installer/ffmpeg').path;
const ffmpeg = require('fluent-ffmpeg');const app = express();
const server = http.createServer(app);
const wss = new WebSocket.Server({ server });const ALI_APP_KEY = '';// 请手动填写有效ALI_APP_KEY
const ALI_TOKEN = ''; // 请手动填写有效tokenffmpeg.setFfmpegPath(ffmpegPath);let transcriber = null;
let ffmpegStream = null;
let inputStream = null;wss.on('connection', (ws) => {console.log('新用户连接WebSocket成功')ws.on('message', async (data) => {if (!ffmpegStream) {inputStream = new PassThrough();ffmpegStream = ffmpeg().input(inputStream).inputFormat('webm').inputOptions('-fflags +genpts').audioCodec('pcm_s16le').audioChannels(1).audioFrequency(16000).format('s16le').outputOptions('-f s16le').outputOptions('-acodec pcm_s16le').outputOptions('-ar 16000').outputOptions('-ac 1').pipe();// 用手动填写的 token 实例化识别对象transcriber = new Nls.SpeechTranscription({url: 'wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1',appkey: ALI_APP_KEY,token: ALI_TOKEN});transcriber.on('started', (msg) => {console.log('开始识别:',msg)});transcriber.on('changed', (msg) => {const data = JSON.parse(msg)console.log('changed:',data)ws.send(JSON.stringify({type: 'changed', content: data.payload?.result||''})); });transcriber.on('completed', (msg) => {const data = JSON.parse(msg)console.log("completed:",data)ws.send(JSON.stringify({type: 'completed', content: data.payload?.result||''}));});transcriber.on('failed', (msg) => {console.log('识别失败:',msg)});transcriber.on('closed', () => {console.log('连接关闭')});// 启动识别try {await transcriber.start(transcriber.defaultStartParams(), true, 6000);} catch (err) {console.log('[识别启动失败] ' + err);return;}ffmpegStream.on('data', (pcmChunk) => {transcriber.sendAudio(pcmChunk);}); ffmpegStream.on('error', (err) => {stopAliyunTranscription()})ffmpegStream.on('end', () => {stopAliyunTranscription();})}inputStream.write(Buffer.from(data));});ws.on('close', async () => {stopAliyunTranscription()});});
const stopAliyunTranscription = () => {if (inputStream) {inputStream.end()inputStream = null}if (ffmpegStream) {ffmpegStream.end()ffmpegStream = null}if (transcriber) {transcriber.shutdown()transcriber = null}
}
server.listen(3002, () => {console.log('WebSocket server running on ws://localhost:3002');
});