微软云语音识别ASR示例Demo
对象存储服务 OSS 对应 Azure Blob Storage
语音识别 ASR 对应 Azure Speech-to-Text
语音合成 TTS 对应 Azure Text-to-Speech
上传..mp3文件或者上传OSS地址 返回音频的文字示例demo
依赖
<dependencies><dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-webflux</artifactId></dependency><dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-web</artifactId></dependency><!-- microsoft ASR --><dependency><groupId>com.microsoft.cognitiveservices.speech</groupId><artifactId>client-sdk</artifactId><version>1.43.0</version></dependency><dependency><groupId>org.projectlombok</groupId><artifactId>lombok</artifactId><optional>true</optional></dependency><dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-test</artifactId><scope>test</scope></dependency><dependency><groupId>io.projectreactor</groupId><artifactId>reactor-test</artifactId><scope>test</scope></dependency></dependencies>
代码 在application.properties或者yaml中配置key和endpoint
package com.example.microsoftasr.controller;import com.microsoft.cognitiveservices.speech.*;
import com.microsoft.cognitiveservices.speech.audio.AudioConfig;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;import java.io.File;
import java.net.URI;
import java.nio.file.Files;@RestController
@RequestMapping("/asr")
public class TestController {@Value("${azure.speech.key}")private String speechKey;@Value("${azure.speech.endpoint}")private String speechEndpoint;@GetMapping("/hello")public String test() {return "Hello World";}@PostMapping("/recognize")public String recognize(@RequestParam(value = "file", required = false) MultipartFile file,@RequestParam(value = "url", required = false) String ossUrl) {if ((file == null || file.isEmpty()) && (ossUrl == null || ossUrl.isBlank())) {return "未提供音频文件或音频地址";}File tempInput = null;File tempWav = null;try {// 1. 保存临时原始音频if (file != null && !file.isEmpty()) {String suffix = getSuffix(file.getOriginalFilename());tempInput = File.createTempFile("audio-input-", "." + suffix);file.transferTo(tempInput);} else {String suffix = getSuffix(ossUrl);tempInput = File.createTempFile("audio-input-", "." + suffix);try (var in = new java.net.URL(ossUrl).openStream()) {Files.copy(in, tempInput.toPath(), java.nio.file.StandardCopyOption.REPLACE_EXISTING);}}// 2. 转换成 WAV(16kHz 单声道)tempWav = File.createTempFile("audio-output-", ".wav");if (!getSuffix(tempInput.getName()).equalsIgnoreCase("wav")) {ProcessBuilder pb = new ProcessBuilder("F:\\ffmpeg-7.1.1-full_build\\ffmpeg-7.1.1-full_build\\bin\\ffmpeg.exe", "-y","-i", tempInput.getAbsolutePath(),"-ar", "16000","-ac", "1",tempWav.getAbsolutePath());Process process = pb.inheritIO().start();int exitCode = process.waitFor();if (exitCode != 0) return "ffmpeg 转换失败,exitCode=" + exitCode;} else {Files.copy(tempInput.toPath(), tempWav.toPath(), java.nio.file.StandardCopyOption.REPLACE_EXISTING);}// 3. 调用微软 ASR 识别SpeechConfig speechConfig = SpeechConfig.fromEndpoint(new URI(speechEndpoint), speechKey);speechConfig.setSpeechRecognitionLanguage("zh-CN");try (AudioConfig audioConfig = AudioConfig.fromWavFileInput(tempWav.getAbsolutePath());SpeechRecognizer recognizer = new SpeechRecognizer(speechConfig, audioConfig)) {SpeechRecognitionResult result = recognizer.recognizeOnceAsync().get();if (result.getReason() == ResultReason.RecognizedSpeech) {return result.getText();} else {return "识别失败: " + result.getReason();}}} catch (Exception e) {e.printStackTrace();return "识别异常: " + e.getMessage();} finally {try {if (tempInput != null) Files.deleteIfExists(tempInput.toPath());if (tempWav != null) Files.deleteIfExists(tempWav.toPath());} catch (Exception ex) {ex.printStackTrace();}}}private String getSuffix(String filenameOrUrl) {if (filenameOrUrl == null || !filenameOrUrl.contains(".")) return "tmp";return filenameOrUrl.substring(filenameOrUrl.lastIndexOf('.') + 1);}}