当前位置：首页 > news >正文

Unity使用RVM实现实时人物视频抠像（无绿幕）

news 2025/11/6 7:53:54

效果图

在这里插入图片描述
项目地址
https://github.com/PeterL1n/RobustVideoMatting

Unity Sentis已升级为Inference Engine

在这里插入图片描述

https://docs.unity3d.com/Packages/com.unity.ai.inference@2.2/manual/index.html

一些弯路

拿C# onnxruntime搞了一遍，用了cuda但感觉程序没跑在gpu上，只有2fps……
主要代码

using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using UnityEngine;
using UnityEngine.UI;
using Debug = UnityEngine.Debug;public class RobustVideoMatting : MonoBehaviour
{[Header("模型设置")]public string modelPath = "rvm_mobilenetv3_fp32.onnx";public float downsampleRatio = 0.25f;[Header("输入设置")]public Texture2D inputTexture;public bool useWebcam = false;public int webcamWidth = 640;public int webcamHeight = 480;[Header("输出显示")]public Texture2D outputTexture;public RawImage rawImage;private InferenceSession session;private WebCamTexture webCamTexture;// 递归状态（隐藏状态）private Tensor<float>[] recurrentStates;private Tensor<float> downsampleRatioTensor;// 输出名称private readonly string[] outputNames = { "fgr", "pha", "r1o", "r2o", "r3o", "r4o" };private readonly string[] recurrentInputNames = { "r1i", "r2i", "r3i", "r4i" };private readonly string[] recurrentOutputNames = { "r1o", "r2o", "r3o", "r4o" };// 当前帧结果private Texture2D foregroundTexture;private Texture2D alphaTexture;private Texture2D resultTexture;void Start(){InitializeModel();InitializeRecurrentStates();if (useWebcam){InitializeWebcam();} }void InitializeModel(){try{// 加载模型var modelFullPath = Path.Combine(Application.streamingAssetsPath, modelPath);// 创建会话选项var sessionOptions = new SessionOptions();var aps = OrtEnv.Instance().GetAvailableProviders();foreach (var ap in aps){Debug.Log(ap);}// 设置线程数sessionOptions.IntraOpNumThreads = 6;sessionOptions.InterOpNumThreads = 12;sessionOptions.EnableProfiling = true;sessionOptions.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_VERBOSE;sessionOptions.ProfileOutputPathPrefix = "onnxruntime_profile_" + DateTime.Now.ToString("yyyyMMdd_HHmmss");// 设置图优化级别sessionOptions.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_EXTENDED;//sessionOptions.AppendExecutionProvider_DML();//sessionOptions.AppendExecutionProvider_CPU(); sessionOptions.AppendExecutionProvider_CUDA();session = new InferenceSession(modelFullPath, sessionOptions);Debug.Log($"Robust Video Matting模型加载成功: {modelPath}");}catch (Exception e){Debug.LogError($"模型初始化失败: {e.Message}");}}void InitializeRecurrentStates(){try{// 初始化递归状态 [1, 1, 1, 1]recurrentStates = new Tensor<float>[4];var zeroData = new float[1] { 0f };var shape = new int[] { 1, 1, 1, 1 };for (int i = 0; i < 4; i++){recurrentStates[i] = new DenseTensor<float>(zeroData, shape);}// 初始化downsample_ratiovar ratioData = new float[] { downsampleRatio };downsampleRatioTensor = new DenseTensor<float>(ratioData, new int[] { 1 });Debug.Log("递归状态初始化完成");}catch (Exception e){Debug.LogError($"递归状态初始化失败: {e.Message}");}}void InitializeWebcam(){WebCamDevice[] devices = WebCamTexture.devices;if (devices.Length > 0){webCamTexture = new WebCamTexture(devices[0].name, webcamWidth, webcamHeight, 30);webCamTexture.Play();Debug.Log($"启动摄像头: {devices[0].name}");}else{Debug.LogWarning("未找到摄像头设备");}}Texture2D sourceTexture = null;void Update(){if (session == null) return;if (Input.GetMouseButtonDown(0)){TestFrame();}// 获取输入图像if (useWebcam && webCamTexture != null && webCamTexture.isPlaying){sourceTexture = WebCamTextureToTexture2D(webCamTexture);} if (sourceTexture != null){// 处理当前帧ProcessFrame(sourceTexture);// 显示结果if (resultTexture != null){rawImage.texture = resultTexture;}// 清理临时纹理if (useWebcam && sourceTexture != null){DestroyImmediate(sourceTexture);}}}void TestFrame(){if (inputTexture != null){Stopwatch stopwatch = new Stopwatch();stopwatch.Start();// 处理当前帧ProcessFrame(inputTexture);stopwatch.Stop();long lastInferenceTime = stopwatch.ElapsedMilliseconds;// 输出耗时信息Debug.Log($"推理完成！总耗时: {lastInferenceTime}ms");// 显示结果if (resultTexture != null){rawImage.texture = resultTexture;}}}void ProcessFrame(Texture2D sourceTexture){try{// 准备输入张量var inputTensor = PrepareInputTensor(sourceTexture);// 创建输入列表var inputs = new List<NamedOnnxValue>{NamedOnnxValue.CreateFromTensor("src", inputTensor),NamedOnnxValue.CreateFromTensor("downsample_ratio", downsampleRatioTensor)};// 添加递归状态输入for (int i = 0; i < recurrentInputNames.Length; i++){inputs.Add(NamedOnnxValue.CreateFromTensor(recurrentInputNames[i], recurrentStates[i]));}// 运行推理using (var results = session.Run(inputs)){// 获取输出var outputs = ProcessOutputs(results, sourceTexture.width, sourceTexture.height);// 更新递归状态UpdateRecurrentStates(results);}}catch (Exception e){Debug.LogError($"帧处理失败: {e.Message}");}}Tensor<float> PrepareInputTensor(Texture2D texture){// 调整尺寸到模型期望的输入大小int targetWidth = 512;  // 根据模型调整int targetHeight = 512; // 根据模型调整var resizedTexture = ResizeTexture(texture, targetWidth, targetHeight);Color32[] pixels = resizedTexture.GetPixels32();// 创建张量 [1, 3, H, W]float[] dataArray = new float[1 * 3 * targetHeight * targetWidth];int[] shapeArray = new int[] { 1, 3, targetHeight, targetWidth };for (int y = 0; y < targetHeight; y++){for (int x = 0; x < targetWidth; x++){int index = y * targetWidth + x;var pixel = pixels[index];// 归一化到 [0, 1]int rIndex = 0 * targetHeight * targetWidth + y * targetWidth + x;int gIndex = 1 * targetHeight * targetWidth + y * targetWidth + x;int bIndex = 2 * targetHeight * targetWidth + y * targetWidth + x;dataArray[rIndex] = pixel.r / 255.0f;dataArray[gIndex] = pixel.g / 255.0f;dataArray[bIndex] = pixel.b / 255.0f;}}// 清理临时纹理DestroyImmediate(resizedTexture);return new DenseTensor<float>(dataArray, shapeArray, false);}bool ProcessOutputs(IDisposableReadOnlyCollection<DisposableNamedOnnxValue> results, int originalWidth, int originalHeight){try{// 获取前景 (fgr) 和 Alpha (pha) 输出var fgrValue = results.FirstOrDefault(r => r.Name == "fgr");var phaValue = results.FirstOrDefault(r => r.Name == "pha");if (fgrValue == null || phaValue == null){Debug.LogError("缺少必要的输出: fgr 或 pha");return false;}var fgrTensor = fgrValue.AsTensor<float>();var phaTensor = phaValue.AsTensor<float>();// 处理前景纹理foregroundTexture = TensorToTexture(fgrTensor, originalWidth, originalHeight, false);// 处理Alpha纹理alphaTexture = TensorToTexture(phaTensor, originalWidth, originalHeight, true);// 创建合成结果resultTexture = ComposeResult(foregroundTexture, alphaTexture);return true;}catch (Exception e){Debug.LogError($"输出处理失败: {e.Message}");return false;}}void UpdateRecurrentStates(IDisposableReadOnlyCollection<DisposableNamedOnnxValue> results){// 更新递归状态为当前输出的递归状态for (int i = 0; i < recurrentOutputNames.Length; i++){var stateValue = results.FirstOrDefault(r => r.Name == recurrentOutputNames[i]);if (stateValue != null){recurrentStates[i] = stateValue.AsTensor<float>();}}}Texture2D TensorToTexture(Tensor<float> tensor, int targetWidth, int targetHeight, bool isAlpha){var dimensions = tensor.Dimensions.ToArray();if (dimensions.Length < 4){Debug.LogError($"张量维度不足: {dimensions.Length}");return null;}int tensorHeight = dimensions[2];int tensorWidth = dimensions[3];int channels = dimensions[1];// 创建临时纹理var tempTexture = new Texture2D(tensorWidth, tensorHeight,isAlpha ? TextureFormat.RFloat : TextureFormat.RGB24, false);if (isAlpha && channels == 1){// Alpha通道处理for (int y = 0; y < tensorHeight; y++){for (int x = 0; x < tensorWidth; x++){float alpha = Mathf.Clamp(tensor[0, 0, y, x], 0f, 1f);tempTexture.SetPixel(x, y, new Color(alpha, alpha, alpha));}}}else if (!isAlpha && channels == 3){// RGB图像处理for (int y = 0; y < tensorHeight; y++){for (int x = 0; x < tensorWidth; x++){float r = Mathf.Clamp(tensor[0, 0, y, x], 0f, 1f);float g = Mathf.Clamp(tensor[0, 1, y, x], 0f, 1f);float b = Mathf.Clamp(tensor[0, 2, y, x], 0f, 1f);tempTexture.SetPixel(x, y, new Color(r, g, b));}}}tempTexture.Apply();// 调整到目标尺寸var finalTexture = ResizeTexture(tempTexture, targetWidth, targetHeight);DestroyImmediate(tempTexture);return finalTexture;}Texture2D ComposeResult(Texture2D foreground, Texture2D alpha){if (foreground == null || alpha == null ||foreground.width != alpha.width || foreground.height != alpha.height){Debug.LogError("前景和Alpha纹理尺寸不匹配");return null;}var result = new Texture2D(foreground.width, foreground.height, TextureFormat.RGBA32, false);var fgPixels = foreground.GetPixels();var alphaPixels = alpha.GetPixels();for (int i = 0; i < fgPixels.Length; i++){Color fgColor = fgPixels[i];Color alphaColor = alphaPixels[i];// 使用Alpha值设置透明度fgColor.a = alphaColor.r; // Alpha纹理是灰度图，r通道就是alpha值result.SetPixel(i % result.width, i / result.width, fgColor);}result.Apply();return result;}Texture2D ResizeTexture(Texture2D source, int newWidth, int newHeight){var rt = RenderTexture.GetTemporary(newWidth, newHeight);Graphics.Blit(source, rt);var result = new Texture2D(newWidth, newHeight, TextureFormat.RGBA32, false);RenderTexture.active = rt;result.ReadPixels(new Rect(0, 0, newWidth, newHeight), 0, 0);result.Apply();RenderTexture.ReleaseTemporary(rt);return result;}Texture2D WebCamTextureToTexture2D(WebCamTexture webCamTexture){Texture2D tex = new Texture2D(webCamTexture.width, webCamTexture.height, TextureFormat.RGBA32, false);tex.SetPixels32(webCamTexture.GetPixels32());tex.Apply();return tex;}/// <summary>/// 重置递归状态（开始新的视频序列时调用）/// </summary>public void ResetRecurrentStates(){InitializeRecurrentStates();Debug.Log("递归状态已重置");}/// <summary>/// 设置下采样比例/// </summary>public void SetDownsampleRatio(float ratio){downsampleRatio = Mathf.Clamp(ratio, 0.1f, 1.0f);// 更新downsample_ratio值var ratioData = new float[] { downsampleRatio };downsampleRatioTensor = new DenseTensor<float>(ratioData, new int[] { 1 });Debug.Log($"下采样比例设置为: {downsampleRatio}");}/// <summary>/// 获取Alpha遮罩纹理/// </summary>public Texture2D GetAlphaTexture(){return alphaTexture;}/// <summary>/// 获取前景纹理/// </summary>public Texture2D GetForegroundTexture(){return foregroundTexture;}/// <summary>/// 获取结果纹理/// </summary>public Texture2D GetResultTexture(){return resultTexture;}void OnDestroy(){session?.Dispose();if (webCamTexture != null && webCamTexture.isPlaying){webCamTexture.Stop();}}
}

Unity实现效果

在这里插入图片描述

主要代码

using System.Collections;using UnityEngine;
using UnityEngine.UI;//참고
//reference1: https://github.com/PeterL1n/RobustVideoMatting/blob/master/documentation/inference.md
//reference2: https://docs.unity3d.com/Packages/com.unity.sentis@2.1/manual/use-model-output.html
public class VideoMatting : MonoBehaviour
{public RenderTexture OutputTexture => ouputCamera?.targetTexture;[SerializeField] private RenderTexture sourceTexture; //source texture 에 원본 데이터 넣기[SerializeField] private Unity.InferenceEngine.ModelAsset modelAsset;[SerializeField] private Material alphaMaterial;[SerializeField] private RawImage sketchRawImage;[SerializeField] private Camera ouputCamera;[SerializeField] private Vector2 frameResolution = new Vector2(1920, 1080);[SerializeField] private RawImage debugRawImage;private RenderTexture _foregroundTexture;private RenderTexture _alphaTexture;private Unity.InferenceEngine.Worker _worker;private Unity.InferenceEngine.Model _runtimeModel;private RenderTexture _resultRenderTexture;private Unity.InferenceEngine.Tensor<float> _r1, _r2, _r3, _r4, _inputTensor, _downsampleRatioTensor;private Vector2 _previousResolution;void Awake(){//initialize model_runtimeModel = Unity.InferenceEngine.ModelLoader.Load(modelAsset);_worker = new Unity.InferenceEngine.Worker(_runtimeModel, Unity.InferenceEngine.BackendType.GPUCompute);_r1 = new Unity.InferenceEngine.Tensor<float>(new Unity.InferenceEngine.TensorShape(1, 1, 1, 1), new float[] { 0.0f });_r2 = new Unity.InferenceEngine.Tensor<float>(new Unity.InferenceEngine.TensorShape(1, 1, 1, 1), new float[] { 0.0f });_r3 = new Unity.InferenceEngine.Tensor<float>(new Unity.InferenceEngine.TensorShape(1, 1, 1, 1), new float[] { 0.0f });_r4 = new Unity.InferenceEngine.Tensor<float>(new Unity.InferenceEngine.TensorShape(1, 1, 1, 1), new float[] { 0.0f });_inputTensor = new Unity.InferenceEngine.Tensor<float>(new Unity.InferenceEngine.TensorShape(1, 3, 1, 1));_downsampleRatioTensor = new Unity.InferenceEngine.Tensor<float>(new Unity.InferenceEngine.TensorShape(1), new float[] { 1.0f });ouputCamera.backgroundColor = new Color(0, 0, 0, 0);sketchRawImage.material = alphaMaterial;}void Start(){StartCoroutine(ProcessVideoMatting());}void Update(){UpdateResultRenderTexture();UpdateDebugRawImage();}public void SetSourceTexture(RenderTexture sourceTexture){this.sourceTexture = sourceTexture;}void UpdateResultRenderTexture(){bool changedResolution = _previousResolution.x != frameResolution.x || _previousResolution.y != frameResolution.y;GetOrCreateRenderTexture(ref _resultRenderTexture, (int)frameResolution.x, (int)frameResolution.y, "ResultRT", changedResolution);if (ouputCamera != null && ouputCamera.targetTexture == null)ouputCamera.targetTexture = _resultRenderTexture;_previousResolution = frameResolution;}void UpdateDebugRawImage(){if(debugRawImage != null && OutputTexture != null)debugRawImage.texture = OutputTexture;}IEnumerator ProcessVideoMatting(){while (true){if (sourceTexture == null){yield return null;continue;}int textureWidth = sourceTexture.width;int textureHeight = sourceTexture.height;float optimalRatio = CalculateOptimalDownsampleRatio(textureWidth, textureHeight); // get downsaple ratiovar inputShape = new Unity.InferenceEngine.TensorShape(1, 3, textureHeight, textureWidth); // batch, channel, height, widthif (_inputTensor == null || !_inputTensor.shape.Equals(inputShape)){_inputTensor?.Dispose();_inputTensor = new Unity.InferenceEngine.Tensor<float>(inputShape);}Unity.InferenceEngine.TextureConverter.ToTensor(sourceTexture, _inputTensor, new Unity.InferenceEngine.TextureTransform());_downsampleRatioTensor[0] = optimalRatio;_worker.SetInput("src", _inputTensor);_worker.SetInput("r1i", _r1);_worker.SetInput("r2i", _r2);_worker.SetInput("r3i", _r3);_worker.SetInput("r4i", _r4);_worker.SetInput("downsample_ratio", _downsampleRatioTensor);_worker.Schedule();yield return null;var foregroundTensor = _worker.PeekOutput("fgr") as Unity.InferenceEngine.Tensor<float>;var alphaTensor = _worker.PeekOutput("pha") as Unity.InferenceEngine.Tensor<float>;GetOrCreateRenderTexture(ref _foregroundTexture, textureWidth, textureHeight, "ForegroundRT");GetOrCreateRenderTexture(ref _alphaTexture, textureWidth, textureHeight, "AlphaRT");var fgrAwaiter = foregroundTensor.ReadbackAndCloneAsync().GetAwaiter();var alphaAwaiter = alphaTensor.ReadbackAndCloneAsync().GetAwaiter();while (!fgrAwaiter.IsCompleted || !alphaAwaiter.IsCompleted){yield return null;}using (var foregroundOut = fgrAwaiter.GetResult())using (var alphaOut = alphaAwaiter.GetResult()){Unity.InferenceEngine.TextureConverter.RenderToTexture(foregroundTensor, _foregroundTexture);Unity.InferenceEngine.TextureConverter.RenderToTexture(alphaTensor, _alphaTexture);}try{if(sketchRawImage != null){sketchRawImage.material.SetTexture("_FgrTex", _foregroundTexture);sketchRawImage.material.SetTexture("_PhaTex", _alphaTexture);}}catch (System.Exception e){Debug.LogError("NOTE: Please make sure the RawImage has a material using the VideoMatting shader. Exception: " + e.Message);}}}private RenderTexture GetOrCreateRenderTexture(ref RenderTexture renderTexture, int width, int height, string name, bool forceCreate = false){if (renderTexture == null || renderTexture.width != width || renderTexture.height != height || forceCreate){if (renderTexture != null){renderTexture.Release();DestroyImmediate(renderTexture);}renderTexture = new RenderTexture(width, height, 24, RenderTextureFormat.ARGB32);renderTexture.name = name;renderTexture.Create();}return renderTexture;}// | Resolution    | Portrait      | Full-Body      |// | ------------- | ------------- | -------------- |// | <= 512x512    | 1             | 1              |// | 1280x720      | 0.375         | 0.6            |// | 1920x1080     | 0.25          | 0.4            |// | 3840x2160     | 0.125         | 0.2            |// 5번의 다운샘플링이 모델 내에서 이루어지는데, 아래의 값으로 다운샘플링이 5번 이루어질때 홀수값이 나와선 안됨.// width height 변경시 참고private float CalculateOptimalDownsampleRatio(int width, int height){int imagePixelCount = width * height;if (imagePixelCount <= 512 * 512){return 1.0f;     // 원본 크기 유지}else if (imagePixelCount <= 1280 * 720){return 0.6f;}else if (imagePixelCount <= 1920 * 1080){return 0.4f;}else if (imagePixelCount <= 3840 * 2160){return 0.2f;}else{return 0.1f;}}void OnDestroy(){_r1?.Dispose();_r2?.Dispose();_r3?.Dispose();_r4?.Dispose();_inputTensor?.Dispose();_downsampleRatioTensor?.Dispose();_worker?.Dispose();}
}