当前位置: 首页 > news >正文

Unity使用sherpa-onnx实现说话人识别

网友软绵绵的面包人推荐,模型3dspeaker_speech_eres2net_base_200k_sv_zh-cn_16k-common.onnx的效果比3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx要好

在这里插入图片描述

具体代码

using System;
using System.Collections.Generic;
using System.IO;
using SherpaOnnx;
using UnityEngine;public class SpeakerIdentification : MonoBehaviour
{SpeakerEmbeddingExtractor extractor;SpeakerEmbeddingManager manager;string pathRoot;string modelPath;OfflineSpeechDenoiser offlineSpeechDenoiser = null;string[] testFiles;// Start is called before the first frame updatevoid Start(){pathRoot = Util.GetPath();modelPath = pathRoot + "/3dspeaker_speech_eres2net_base_200k_sv_zh-cn_16k-common.onnx";}public void Init(){OfflineSpeechDenoiserGtcrnModelConfig osdgmc = new OfflineSpeechDenoiserGtcrnModelConfig();osdgmc.Model = pathRoot + "/gtcrn_simple.onnx";OfflineSpeechDenoiserModelConfig osdmc = new OfflineSpeechDenoiserModelConfig();osdmc.NumThreads = 1;osdmc.Provider = "cpu";osdmc.Debug = 0;osdmc.Gtcrn = osdgmc;OfflineSpeechDenoiserConfig osdc = new OfflineSpeechDenoiserConfig();osdc.Model = osdmc;offlineSpeechDenoiser = new OfflineSpeechDenoiser(osdc);//byte[] bytes = File.ReadAllBytes(pathRoot + "/xuefei.wav");//float[] data = BytesToFloat(bytes);//DenoisedAudio denoisedAudio = offlineSpeechDenoiser.Run(data, 16000);//if (denoisedAudio.SaveToWaveFile(pathRoot + "/xuefei1.wav"))//{//}var config = new SpeakerEmbeddingExtractorConfig();config.Model = modelPath;config.Debug = 1;extractor = new SpeakerEmbeddingExtractor(config);manager = new SpeakerEmbeddingManager(extractor.Dim);var spk1Files =new string[] {pathRoot+"/xuefei1.wav",};var spk1Vec = new float[spk1Files.Length][];for (int i = 0; i < spk1Files.Length; ++i){spk1Vec[i] = ComputeEmbedding(extractor, spk1Files[i]);}// 给注册音频降噪一下//byte[] bytes = File.ReadAllBytes(pathRoot + "/xuefei1.wav");//float[] data = BytesToFloat(bytes);//DenoisedAudio denoisedAudio = offlineSpeechDenoiser.Run(data, 16000);//if (denoisedAudio.SaveToWaveFile(pathRoot + "/xuefei1.wav"))//{//}//注册说话人if (!manager.Add("xuefei", spk1Vec)){Debug.LogError("Failed to register xuefei");}var allSpeakers = manager.GetAllSpeakers();foreach (var s in allSpeakers){Debug.Log(s);}//验证测试testFiles =new string[] {pathRoot+"/test1.wav",pathRoot+"/test2.wav",pathRoot+"/test3.wav",};float threshold = 0.6f;foreach (var file in testFiles){var embedding = ComputeEmbedding(extractor, file);var name = manager.Search(embedding, threshold);if (name == ""){name = "<Unknown>";}Debug.Log(file + " :" + name);} }/// <summary>/// 说话人识别 用的临时数据/// </summary>List<float> audioData = new List<float>();public void AcceptData(float[] data){audioData.AddRange(data);}float threshold = 0.6f;public void Search(){ string filePath = pathRoot + "/" + DateTime.Now.ToFileTime().ToString() + ".wav";//DenoisedAudio denoisedAudio = offlineSpeechDenoiser.Run(audioData.ToArray(), 16000); //if (denoisedAudio.SaveToWaveFile(filePath))//{//}Util.SaveClip(1, 16000, audioData.ToArray(), filePath);var embedding = ComputeEmbedding(extractor, filePath);string name = manager.Search(embedding, threshold);if (name == ""){name = "<Unknown>";}Debug.Log("name:" + name);audioData.Clear();}public float[] ComputeEmbedding(SpeakerEmbeddingExtractor extractor, string filename){byte[] bytes = File.ReadAllBytes(filename);float[] data = BytesToFloat(bytes);var stream = extractor.CreateStream();stream.AcceptWaveform(16000, data);stream.InputFinished();var embedding = extractor.Compute(stream);return embedding;}public float[] ComputeEmbedding(SpeakerEmbeddingExtractor extractor, int sample, float[] data){var stream = extractor.CreateStream();stream.AcceptWaveform(sample, data);stream.InputFinished();var embedding = extractor.Compute(stream);return embedding;}public float[] BytesToFloat(byte[] byteArray){float[] sounddata = new float[byteArray.Length / 2];for (int i = 0; i < sounddata.Length; i++){sounddata[i] = BytesToFloat(byteArray[i * 2], byteArray[i * 2 + 1]);}return sounddata;}private float BytesToFloat(byte firstByte, byte secondByte){//小端和大端顺序要调整short s;if (BitConverter.IsLittleEndian)s = (short)((secondByte << 8) | firstByte);elses = (short)((firstByte << 8) | secondByte);// convert to range from -1 to (just below) 1return s / 32768.0F;}
}

最后是工程地址

https://github.com/xue-fei/sherpa-onnx-unity

相关文章:

  • DS1302实时时钟模块
  • Java-List集合类全面解析
  • CSS- 4.5 css + div 布局 简易网易云音乐 官网布置实例
  • 「NameCraft · 幻想命名器」开发记:我和 CodeBuddy 的一次奇幻共创之旅
  • 易境通散货拼柜系统:提高货代企业货物配载效率
  • CEF源码历史版本编译避坑指南
  • JSON学习笔记
  • 视频监控中的存储方式有哪些?EasyCVR视频监控汇聚平台如何打造高效监控存储
  • 【STM32】ST-Link V2.1制作
  • 【CF】Day62——Codeforces Round 948 (Div. 2) CD (思维 + LCM + 枚举因数 | 思维 + 哈希)
  • Amazon Q 从入门到精通 – 测试与重构
  • Python的传参过程的小细节
  • k8s1.27集群部署mysql8.0双主双从
  • 第二道re
  • UE 材质基础第二天
  • 线光谱共焦传感器:复杂材质检测
  • 【盈达科技】GEO优化实战策略
  • 基于PetaLinux的Zynq PS应用自启动全攻略
  • 浙江大学python程序设计(陈春晖、翁恺、季江民)习题答案-第五章
  • 大模型(1)——基本概念
  • 国家发改委:不断完善稳就业稳经济的政策工具箱,确保必要时能够及时出台实施
  • 每一笔都是对的!再读周碧初画作有感
  • 专访《风雪夜归人》导演闫锐:在舞台上表现什么是真正的活着
  • 世卫大会连续九年拒绝涉台提案
  • 专访|金七猫奖得主:以非遗为舟,在现实题材中疗愈与成长
  • 全总联合六部门印发工作指引,共保劳动者合法权益