当前位置: 首页 > news >正文

Unity数字人开发笔记——讯飞超拟人语音

基于上一篇:
https://blog.csdn.net/qq_17523181/article/details/148255809?spm=1001.2014.3001.5501
https://blog.csdn.net/qq_17523181/article/details/148264127?spm=1011.2415.3001.5331

讯飞默认的语音非常机械,更换为讯飞的超拟人语音

一、讯飞API

在这里插入图片描述

WebApi : wss://cbm01.cn-huabei-1.xf-yun.com/v1/private/mcd9m97e6

API地址:https://www.xfyun.cn/doc/spark/super%20smart-tts.html

  • 示例
    在这里插入图片描述

二、Unity编写连接脚本

注意1:超拟人语音是属于大模型版块,所以它的鉴权是大模型的鉴权逻辑
注意2:示例python使用的encoding是lame,在unity需要使用raw

  • 建立XunfeiSmartTextToSpeech.cs文件
using System;
using System.Collections;
using System.Collections.Generic;
using System.Net.WebSockets;
using System.Security.Cryptography;
using System.Text;
using System.Threading;
using UnityEngine;public class XunfeiSmartTextToSpeech : TTS
{#region 参数/// <summary>/// 讯飞的应用设置/// </summary>[SerializeField]private XunfeiSettings m_XunfeiSettings;/// <summary>/// host地址/// </summary>[SerializeField] private string m_HostUrl = "cbm01.cn-huabei-1.xf-yun.com";/// <summary>/// 发音人/// </summary>[Header("选择朗读的声音")][SerializeField] private Speaker m_Vcn = Speaker.聆佑佑_童年女声;/// <summary>/// 音量,可选值:[0-100],默认为50/// </summary>[SerializeField] private int m_Volume = 50;/// <summary>/// 语音高,可选值:[0-100],默认为50/// </summary>[SerializeField] private int m_Pitch = 50;/// <summary>/// 语速,可选值:[0-100],默认为50/// </summary>[SerializeField] private int m_Speed = 50;#endregionprivate void Awake(){m_XunfeiSettings = this.GetComponent<XunfeiSettings>();m_PostURL= "wss://cbm01.cn-huabei-1.xf-yun.com/v1/private/mcd9m97e6";}/// <summary>/// 语音合成,返回合成文本/// </summary>/// <param name="_msg"></param>/// <param name="_callback"></param>public override void Speak(string _msg, Action<AudioClip, string> _callback){StartCoroutine(GetSpeech(_msg, _callback));}/// <summary>/// websocket/// </summary>private ClientWebSocket m_WebSocket;private CancellationToken m_CancellationToken;private AudioClip _audioClip;#region 获取鉴权Url/// <summary>/// 获取鉴权url/// </summary>/// <returns></returns>private string GetAuthUrl(){string date = DateTime.UtcNow.ToString("r");Uri uri = new Uri(m_PostURL);Debug.Log(uri);StringBuilder builder = new StringBuilder("host: ").Append(uri.Host).Append("\n").//Append("date: ").Append(date).Append("\n").//Append("GET ").Append(uri.LocalPath).Append(" HTTP/1.1");string sha = HMACsha256(m_XunfeiSettings.m_APISecret, builder.ToString());string authorization = string.Format("api_key=\"{0}\", algorithm=\"{1}\", headers=\"{2}\", signature=\"{3}\"", m_XunfeiSettings.m_APIKey, "hmac-sha256", "host date request-line", sha);string NewUrl = "wss://" + uri.Host + uri.LocalPath;string path1 = "authorization" + "=" + Convert.ToBase64String(System.Text.Encoding.UTF8.GetBytes(authorization));date = date.Replace(" ", "%20").Replace(":", "%3A").Replace(",", "%2C");string path2 = "date" + "=" + date;string path3 = "host" + "=" + uri.Host;NewUrl = NewUrl + "?" + path1 + "&" + path2 + "&" + path3;Debug.Log("NewUrl");Debug.Log(NewUrl);return NewUrl;}public string HMACsha256(string apiSecretIsKey, string buider){byte[] bytes = System.Text.Encoding.UTF8.GetBytes(apiSecretIsKey);System.Security.Cryptography.HMACSHA256 hMACSHA256 = new System.Security.Cryptography.HMACSHA256(bytes);byte[] date = System.Text.Encoding.UTF8.GetBytes(buider);date = hMACSHA256.ComputeHash(date);hMACSHA256.Clear();return Convert.ToBase64String(date);}#endregion#region 语音合成/// <summary>/// 音频长度/// </summary>private int m_AudioLenth;/// <summary>/// 数据队列/// </summary>Queue<float> m_AudioQueue = new Queue<float>();/// <summary>/// 获取语音合成/// </summary>/// <param name="_text"></param>/// <param name="_callback"></param>/// <returns></returns>public IEnumerator GetSpeech(string _text, Action<AudioClip, string> _callback){stopwatch.Restart();yield return null;if (m_WebSocket != null) { m_WebSocket.Abort(); }ConnectHost(_text);_audioClip = AudioClip.Create("audio", 16000 * 60, 1, 16000, true, OnAudioRead);//回调_callback(_audioClip, _text);stopwatch.Stop();UnityEngine.Debug.Log("讯飞超拟人语音合成耗时:" + stopwatch.Elapsed.TotalSeconds);}void OnAudioRead(float[] data){for (int i = 0; i < data.Length; i++){if (m_AudioQueue.Count > 0){data[i] = m_AudioQueue.Dequeue();}else{if (m_WebSocket == null || m_WebSocket.State != WebSocketState.Aborted) m_AudioLenth++;data[i] = 0;}}}/// <summary>/// 连接服务器,合成语音/// </summary>private async void ConnectHost(string text){try{//text = "你好啊,你是谁呀,一起来玩吧";m_WebSocket = new ClientWebSocket();m_CancellationToken = new CancellationToken();Uri uri = new Uri(GetAuthUrl());Debug.Log(uri);await m_WebSocket.ConnectAsync(uri, m_CancellationToken);text = Convert.ToBase64String(Encoding.UTF8.GetBytes(text));//发送的数据string _jsonData = TTSRequestBuilder.BuildTTSRequest(appId: m_XunfeiSettings.m_AppID,headerStatus: 2,vcn: GetVcn(m_Vcn),volume: m_Volume,speed: m_Speed,pitch: m_Pitch,payloadStatus: 2,payloadText: text);await m_WebSocket.SendAsync(new ArraySegment<byte>(Encoding.UTF8.GetBytes(_jsonData)), WebSocketMessageType.Binary, true, m_CancellationToken); //发送数据StringBuilder sb = new StringBuilder();//播放队列.Clear();while (m_WebSocket.State == WebSocketState.Open){var result = new byte[4096];await m_WebSocket.ReceiveAsync(new ArraySegment<byte>(result), m_CancellationToken);//接受数据List<byte> list = new List<byte>(result); while (list[list.Count - 1] == 0x00) list.RemoveAt(list.Count - 1);//去除空字节  var str = Encoding.UTF8.GetString(list.ToArray());sb.Append(str);if (str.EndsWith("}")){//获取返回的数据ResponseData _responseData = JsonUtility.FromJson<ResponseData>(sb.ToString());sb.Clear();if (_responseData.header.code != 0){//返回错误PrintErrorLog(_responseData.header.code);m_WebSocket.Abort();break;}if (_responseData.header.status != 0){byte[] audioBytes = Convert.FromBase64String(_responseData.payload.audio.audio);float[] audioData = ConvertByteToFloat(audioBytes);lock (m_AudioQueue){foreach (float f in audioData) m_AudioQueue.Enqueue(f);}if (_responseData.header.status == 2){m_WebSocket.Abort();break;}}}}}catch (Exception ex){Debug.LogError("报错信息: " + ex.Message);m_WebSocket.Dispose();}}float[] ConvertByteToFloat(byte[] byteArray){// 假设是16位PCM数据float[] floatArray = new float[byteArray.Length / 2];for (int i = 0; i < floatArray.Length; i++){short sample = (short)((byteArray[i * 2 + 1] << 8) | byteArray[i * 2]);floatArray[i] = sample / 32768.0f;}return floatArray;}public static byte[] Base64Decode(string base64String){try{return Convert.FromBase64String(base64String);}catch (FormatException ex){Debug.LogError($"Base64 解码失败: {ex.Message}");return null;}}#endregion#region 工具方法/// <summary>/// 打印错误日志/// </summary>/// <param name="status"></param>private void PrintErrorLog(int status){switch (status) { case 10009:Debug.LogError("输入数据非法 / 检查输入数据");return;case 10010:Debug.LogError("没有授权许可或授权数已满 / 提交工单");return;case 10019:Debug.LogError("session超时 / 检查是否数据发送完毕但未关闭连接");return;case 10043:Debug.LogError("音频解码失败 / 检查aue参数,如果为speex,请确保音频是speex音频并分段压缩且与帧大小一致");return;case 10114:Debug.LogError("session 超时 / 会话时间超时,检查是否发送数据时间超过了60s");return;case 10139:Debug.LogError("参数错误 / 检查参数是否正确");return;case 10160:Debug.LogError("请求数据格式非法 / 检查请求数据是否是合法的json");return;case 10161:Debug.LogError("base64解码失败 / 检查发送的数据是否使用base64编码了");return;case 10163:Debug.LogError("参数校验失败 / 具体原因见详细的描述");return;case 10200:Debug.LogError("读取数据超时 / 检查是否累计10s未发送数据并且未关闭连接");return;case 10222:Debug.LogError("1.上传的数据超过了接口上限; 2.SSL证书无效; / 1.检查接口上传的数据(文本、音频、图片等)是否超越了接口的最大限制,可到相应的接口文档查询具体的上限; 2. 请将log导出发到工单");return;case 10223:Debug.LogError("lb 找不到节点 / 提交工单");return;case 10313:Debug.LogError("appid和apikey不匹配 / 检查appid是否合法");return;case 10317:Debug.LogError("版本非法 / 请到控制台提交工单联系技术人员");return;case 10700:Debug.LogError("引擎异常 / 按照报错原因的描述,对照开发文档检查输入输出,如果仍然无法排除问题,请提供sid以及接口返回的错误信息,到控制台提交工单联系技术人员排查。");return;case 11200:Debug.LogError("功能未授权 / 请先检查appid是否正确,并且确保该appid下添加了相关服务。若没问题,则按照如下方法排查。 1. 确认总调用量是否已超越限制,或者总次数授权已到期,若已超限或者已过期请联系商务人员。 2. 查看是否使用了未授权的功能,或者授权已过期。");return;case 11201:Debug.LogError("该APPID的每日交互次数超过限制 / 根据自身情况提交应用审核进行服务量提额,或者联系商务购买企业级正式接口,获得海量服务量权限以便商用。");return;case 11503:Debug.LogError("服务内部响应数据错误 / 提交工单");return;case 11502:Debug.LogError("服务配置错误 / 提交工单");return;}if (status >= 100001 && status <= 100010) {Debug.LogError("调用引擎时出现错误 / 请根据message中包含的errno前往 5.2引擎错误码 查看对应的说明及处理策略");return;}Debug.LogError("平台未知错误,错误代码:" + status);}/// <summary>/// byte[]数组转化为AudioClip可读取的float[]类型/// </summary>/// <param name="byteArray"></param>/// <returns></returns>public float[] BytesToFloat(byte[] byteArray){float[] sounddata = new float[byteArray.Length / 2];for (int i = 0; i < sounddata.Length; i++){sounddata[i] = BytesToFloat(byteArray[i * 2], byteArray[i * 2 + 1]);}return sounddata;}private float BytesToFloat(byte firstByte, byte secondByte){//小端和大端顺序要调整short s;if (BitConverter.IsLittleEndian)s = (short)((secondByte << 8) | firstByte);elses = (short)((firstByte << 8) | secondByte);// convert to range from -1 to (just below) 1return s / 32768.0F;}#endregion#region 数据定义public class TTSRequestBuilder{public static string BuildTTSRequest(string appId,int headerStatus,string vcn,int volume,int speed,int pitch,int payloadStatus,string payloadText){// 创建请求对象var request = new TTSRequest{header = new Header{app_id = appId,status = headerStatus},parameter = new Parameter{tts = new TTS{vcn = vcn,volume = volume,speed = speed,pitch = pitch,audio = new Audio() // 使用默认值}},payload = new Payload{text = new Text{status = payloadStatus,text = payloadText}}};// 序列化为JSONreturn JsonUtility.ToJson(request, true);}}[System.Serializable]public class TTSRequest{public Header header;public Parameter parameter;public Payload payload;}[System.Serializable]public class Header{public string app_id;public int status;}[System.Serializable]public class Parameter{public TTS tts;}[System.Serializable]public class TTS{public string vcn;public int volume;public int rhy = 0;       // 默认值public int speed;public int pitch;public int bgs = 0;       // 默认值public int reg = 0;       // 默认值public int rdn = 0;       // 默认值public Audio audio;}[System.Serializable]public class Audio{public string encoding = "raw";       // 默认值public int sample_rate = 24000;       // 默认值public int channels = 1;              // 默认值public int bit_depth = 16;            // 默认值public int frame_size = 0;             // 默认值}[System.Serializable]public class Payload{public Text text;}[System.Serializable]public class Text{public string encoding = "utf8";       // 默认值public string compress = "raw";        // 默认值public string format = "plain";        // 默认值public int status;public int seq = 0;                   // 默认值public string text;}/// <summary>/// 获取数据/// </summary>[Serializable]public class ResponseData{public ResHeader header;public ResPayload payload;public string message;}[Serializable]public class ResHeader{public int code;public string message;public string sid;public int status;}[Serializable]public class ResPayload{public ResAudio audio;public ResPybuf pybuf;}[Serializable]public class ResAudio{public string encoding;public int sample_rate;public int channels;public int bit_depth;public int status;public int seq;public int frame_size;public string audio;}[Serializable]public class ResPybuf{public string encoding;public string compress;public string format;public int status;public int seq;public string text;}#endregion#region 设置项public enum Speaker{聆飞逸_男声,聆小璇_女声,聆佑佑_童年女声,聆玉昭_女声,聆小璃_女声,聆飞哲_男声,聆小玥_女声,聆玉言_女声,聆小琪_女声}/// <summary>/// 设置声音/// </summary>/// <param name="_speeker"></param>/// <returns></returns>private string GetVcn(Speaker _speeker){if (_speeker == Speaker.聆飞逸_男声){return "x5_lingfeiyi_flow";}else if (_speeker == Speaker.聆小璇_女声){return "x4_lingxiaoxuan_oral";}else if (_speeker == Speaker.聆佑佑_童年女声){return "x4_lingyouyou_oral";}else if (_speeker == Speaker.聆玉昭_女声){return "x4_lingyuzhao_oral";}else if (_speeker == Speaker.聆小璃_女声){return "x4_lingxiaoli_oral";}else if (_speeker == Speaker.聆飞哲_男声){return "x4_lingfeizhe_oral";}else if (_speeker == Speaker.聆小玥_女声){return "x5_lingxiaoyue_flow";}else if (_speeker == Speaker.聆玉言_女声){return "x5_lingyuyan_flow";}else if (_speeker == Speaker.聆小琪_女声){return "x4_lingxiaoqi_oral";}return "x5_lingfeiyi_flow";}#endregion
}
  • 同样的方法,填写好API后,绑定起来就可以使用了
    在这里插入图片描述
    在这里插入图片描述

相关文章:

  • Python人工智能算法学习 禁忌搜索算法求解旅行商问题(TSP)的研究与实现
  • 央国企迁移国产数据库:数据迁移5步法与4项管理准则
  • 【周输入】517周阅读推荐-3
  • opencv + jpeg_turbo(启用SIMD加速)
  • 【每天一个知识点】LangChain
  • 消息队列从入门到实战:用外卖订单理解高并发系统的核心设计
  • AMBA-AHB的控制信号
  • 水域应急救援可视化平台
  • Ai书签管理工具开发全记录(一):项目总览与技术蓝图
  • c/c++的opencv图像金字塔缩放
  • 在 Linux 中,目录权限,mkdir -m 选项,用法
  • 扇形 圆形 面积公式
  • java读取excel数据中字段是否为金额格式
  • 图像锐化和图像运算
  • Vue CLI创建vue项目,安装插件
  • 前端面试准备-1
  • 算法 Arrays.sort()函数自定义排序(Comparator 接口)
  • XCTF-web-fileinclude
  • 在 MATLAB 2015a 中如何调用 Python
  • Golang | gRPC demo
  • wordpress显示时间插件下载地址/谷歌seo营销
  • 独立商城系统网站建设等服务/seo技术好的培训机构
  • 会展设计案例/武汉seo论坛
  • 公司免费网站建设/百度收录网站入口
  • 网站建设维护与网页设计/东莞今天发生的重大新闻
  • 免费网站流量统计/营销型网站开发公司