MiniEngine学习笔记 : CommandQueue
学习CommandQueue类
- 前言
- CommandQueue
- (1) 源码展示
- (2) 源码分析
- (3) 类使用分析
- (4) 类总结
- (5) 类改进
前言
- 书接上回CommandAllocatorPool,本篇文章分析CommandQueue。
CommandQueue
(1) 源码展示
- 类头文件如下:
#pragma once#include <vector>
#include <queue>
#include <mutex>
#include <stdint.h>
#include "CommandAllocatorPool.h"class CommandQueue
{friend class CommandListManager;friend class CommandContext;public:CommandQueue(D3D12_COMMAND_LIST_TYPE Type);~CommandQueue();void Create(ID3D12Device* pDevice);void Shutdown();inline bool IsReady(){return m_CommandQueue != nullptr;}uint64_t IncrementFence(void);bool IsFenceComplete(uint64_t FenceValue);void StallForFence(uint64_t FenceValue);void StallForProducer(CommandQueue& Producer);void WaitForFence(uint64_t FenceValue);void WaitForIdle(void) { WaitForFence(IncrementFence()); }ID3D12CommandQueue* GetCommandQueue() { return m_CommandQueue; }uint64_t GetNextFenceValue() { return m_NextFenceValue; }private:uint64_t ExecuteCommandList(ID3D12CommandList* List);ID3D12CommandAllocator* RequestAllocator(void);void DiscardAllocator(uint64_t FenceValueForReset, ID3D12CommandAllocator* Allocator);ID3D12CommandQueue* m_CommandQueue;const D3D12_COMMAND_LIST_TYPE m_Type;CommandAllocatorPool m_AllocatorPool;std::mutex m_FenceMutex;std::mutex m_EventMutex;// Lifetime of these objects is managed by the descriptor cacheID3D12Fence* m_pFence;uint64_t m_NextFenceValue;uint64_t m_LastCompletedFenceValue;HANDLE m_FenceEventHandle;
};
- 类源文件如下:
#include "pch.h"
#include "CommandQueue.h"CommandQueue::CommandQueue(D3D12_COMMAND_LIST_TYPE Type) :m_Type(Type),m_CommandQueue(nullptr),m_pFence(nullptr),m_NextFenceValue((uint64_t)Type << 56 | 1),m_LastCompletedFenceValue((uint64_t)Type << 56),m_AllocatorPool(Type)
{
}CommandQueue::~CommandQueue()
{Shutdown();
}void CommandQueue::Shutdown()
{if (m_CommandQueue == nullptr)return;m_AllocatorPool.Shutdown();CloseHandle(m_FenceEventHandle);m_pFence->Release();m_pFence = nullptr;m_CommandQueue->Release();m_CommandQueue = nullptr;
}void CommandQueue::Create(ID3D12Device* pDevice)
{ASSERT(pDevice != nullptr);ASSERT(!IsReady());ASSERT(m_AllocatorPool.Size() == 0);D3D12_COMMAND_QUEUE_DESC QueueDesc = {};QueueDesc.Type = m_Type;QueueDesc.NodeMask = 1;pDevice->CreateCommandQueue(&QueueDesc, MY_IID_PPV_ARGS(&m_CommandQueue));m_CommandQueue->SetName(L"CommandListManager::m_CommandQueue");ASSERT_SUCCEEDED(pDevice->CreateFence(0, D3D12_FENCE_FLAG_NONE, MY_IID_PPV_ARGS(&m_pFence)));m_pFence->SetName(L"CommandListManager::m_pFence");m_pFence->Signal((uint64_t)m_Type << 56);m_FenceEventHandle = CreateEvent(nullptr, false, false, nullptr);ASSERT(m_FenceEventHandle != NULL);m_AllocatorPool.Create(pDevice);ASSERT(IsReady());
}uint64_t CommandQueue::ExecuteCommandList(ID3D12CommandList* List)
{std::lock_guard<std::mutex> LockGuard(m_FenceMutex);ASSERT_SUCCEEDED(((ID3D12GraphicsCommandList*)List)->Close());// Kickoff the command listm_CommandQueue->ExecuteCommandLists(1, &List);// Signal the next fence value (with the GPU)m_CommandQueue->Signal(m_pFence, m_NextFenceValue);// And increment the fence value. return m_NextFenceValue++;
}uint64_t CommandQueue::IncrementFence(void)
{std::lock_guard<std::mutex> LockGuard(m_FenceMutex);m_CommandQueue->Signal(m_pFence, m_NextFenceValue);return m_NextFenceValue++;
}bool CommandQueue::IsFenceComplete(uint64_t FenceValue)
{// Avoid querying the fence value by testing against the last one seen.// The max() is to protect against an unlikely race condition that could cause the last// completed fence value to regress.if (FenceValue > m_LastCompletedFenceValue)m_LastCompletedFenceValue = std::max(m_LastCompletedFenceValue, m_pFence->GetCompletedValue());return FenceValue <= m_LastCompletedFenceValue;
}void CommandQueue::StallForFence(uint64_t FenceValue)
{CommandQueue& Producer = Graphics::g_CommandManager.GetQueue((D3D12_COMMAND_LIST_TYPE)(FenceValue >> 56));m_CommandQueue->Wait(Producer.m_pFence, FenceValue);
}void CommandQueue::StallForProducer(CommandQueue& Producer)
{ASSERT(Producer.m_NextFenceValue > 0);m_CommandQueue->Wait(Producer.m_pFence, Producer.m_NextFenceValue - 1);
}void CommandQueue::WaitForFence(uint64_t FenceValue)
{if (IsFenceComplete(FenceValue))return;// TODO: Think about how this might affect a multi-threaded situation. Suppose thread A// wants to wait for fence 100, then thread B comes along and wants to wait for 99. If// the fence can only have one event set on completion, then thread B has to wait for // 100 before it knows 99 is ready. Maybe insert sequential events?{std::lock_guard<std::mutex> LockGuard(m_EventMutex);m_pFence->SetEventOnCompletion(FenceValue, m_FenceEventHandle);WaitForSingleObject(m_FenceEventHandle, INFINITE);m_LastCompletedFenceValue = FenceValue;}
}ID3D12CommandAllocator* CommandQueue::RequestAllocator()
{uint64_t CompletedFence = m_pFence->GetCompletedValue();return m_AllocatorPool.RequestAllocator(CompletedFence);
}void CommandQueue::DiscardAllocator(uint64_t FenceValue, ID3D12CommandAllocator* Allocator)
{m_AllocatorPool.DiscardAllocator(FenceValue, Allocator);
}
(2) 源码分析
类成员变量如下:
// D3D12命令队列对象
ID3D12CommandQueue* m_CommandQueue;// 此命令队列的固定类型
const D3D12_COMMAND_LIST_TYPE m_Type;// 专用命令分配器池
CommandAllocatorPool m_AllocatorPool;// 围栏和事件的互斥锁
std::mutex m_FenceMutex;
std::mutex m_EventMutex;// 这些对象的生命周期由描述符缓存管理
// Lifetime of these objects is managed by the descriptor cache
ID3D12Fence* m_pFence; // D3D12围栏对象指针
uint64_t m_NextFenceValue; // 下一个要使用的围栏值(最后命令的围栏值为此值-1)
uint64_t m_LastCompletedFenceValue; // 上一次查询到完成的围栏值
HANDLE m_FenceEventHandle; // 围栏事件句柄
类方法如下:
/*
* 构造函数,传入命令类型
* 1.记录命令类型到m_Type
* 2.初始化m_CommandQueue和m_pFence为空指针
* 3.根据Type划分围栏范围:
* D3D12_COMMAND_LIST_TYPE_DIRECT = 0,围栏值范围: 0x0100000000000001 - 0x01FFFFFFFFFFFFFF
* D3D12_COMMAND_LIST_TYPE_COMPUTE = 2, 围栏值范围: 0x0200000000000001 - 0x02FFFFFFFFFFFFFF
* D3D12_COMMAND_LIST_TYPE_COPY = 3, 围栏值范围: 0x0300000000000001 - 0x03FFFFFFFFFFFFFF
* 这样不同类型队列的围栏值完全隔离,避免冲突,且可通过位运算快速识别围栏所属的队列类型,
* 这种设计确保了不同命令队列的围栏值永远不会重叠,便于系统管理和同步。
*
* 4.m_NextFenceValue = 围栏范围起始值+1
* 5.m_LastCompletedFenceValue = 围栏范围起始值
* 6.独立命令分配器池m_AllocatorPool(Type)
*/
CommandQueue::CommandQueue(D3D12_COMMAND_LIST_TYPE Type) :m_Type(Type),m_CommandQueue(nullptr),m_pFence(nullptr),m_NextFenceValue((uint64_t)Type << 56 | 1),m_LastCompletedFenceValue((uint64_t)Type << 56),m_AllocatorPool(Type)
{
}// 析构调用Shutdown
CommandQueue::~CommandQueue()
{Shutdown();
}// 关闭
void CommandQueue::Shutdown()
{// 若m_CommandQueue则未调用Create,直接返回if (m_CommandQueue == nullptr)return;// 否则关闭命令池m_AllocatorPool.Shutdown();// 关闭围栏事件句柄,释放对应资源CloseHandle(m_FenceEventHandle);// 释放围栏并置未空m_pFence->Release();m_pFence = nullptr;// 释放D3D12命令队列并置为空m_CommandQueue->Release();m_CommandQueue = nullptr;
}// 创建函数,传入设备对象
void CommandQueue::Create(ID3D12Device* pDevice)
{// 检测设备对象不为空,且未准备 (m_CommandQueue为空)ASSERT(pDevice != nullptr);ASSERT(!IsReady());ASSERT(m_AllocatorPool.Size() == 0);// 命令队列描述,设置类型为m_TypeD3D12_COMMAND_QUEUE_DESC QueueDesc = {};QueueDesc.Type = m_Type;QueueDesc.NodeMask = 1; // 使用默认GPU节点// 创建命令队列对象,并设置名称pDevice->CreateCommandQueue(&QueueDesc, MY_IID_PPV_ARGS(&m_CommandQueue));m_CommandQueue->SetName(L"CommandListManager::m_CommandQueue");// 创建围栏对象ASSERT_SUCCEEDED(pDevice->CreateFence(0, D3D12_FENCE_FLAG_NONE, MY_IID_PPV_ARGS(&m_pFence)));m_pFence->SetName(L"CommandListManager::m_pFence");// 围栏创建时的初始值为0// 立即设置围栏值为范围起始值,确保所有后续的围栏操作都基于这个基准值递增m_pFence->Signal((uint64_t)m_Type << 56);// 创建围栏事件句柄m_FenceEventHandle = CreateEvent(nullptr, false, false, nullptr);ASSERT(m_FenceEventHandle != NULL);// 创建命令分配器池m_AllocatorPool.Create(pDevice);// 确保m_CommandQueue不为空ASSERT(IsReady());
}// 执行命令列表
uint64_t CommandQueue::ExecuteCommandList(ID3D12CommandList* List)
{// 加锁(m_FenceMutex)std::lock_guard<std::mutex> LockGuard(m_FenceMutex);// 首先关闭命令列表(即列表使用时直接记录命令,用户无需关闭)ASSERT_SUCCEEDED(((ID3D12GraphicsCommandList*)List)->Close());// 将命令列表提交到GPU命令队列中执行m_CommandQueue->ExecuteCommandLists(1, &List);// 发出下一个栅栏值信号(通过GPU),即此命令列表执行完毕后,// 围栏m_pFence的围栏值将被设为m_NextFenceValue(初始为范围起点+1)。m_CommandQueue->Signal(m_pFence, m_NextFenceValue);// 递增下一个围栏值 m_NextFenceValue return m_NextFenceValue++;
}// 递增围栏值
uint64_t CommandQueue::IncrementFence(void)
{std::lock_guard<std::mutex> LockGuard(m_FenceMutex);// 注册当前下一个围栏值,然后将其递增m_CommandQueue->Signal(m_pFence, m_NextFenceValue);return m_NextFenceValue++;
}// 查询围栏值FenceValue是否完成
bool CommandQueue::IsFenceComplete(uint64_t FenceValue)
{// 仅当FenceValue大于m_LastCompletedFenceValue(上一次查询的围栏完成结果)时,// 查询围栏当前完成值,即m_pFence->GetCompletedValue(),// 使用std::max是为了处理一个不太可能但理论上存在的竞态条件,保持围栏值的单调递增。if (FenceValue > m_LastCompletedFenceValue)m_LastCompletedFenceValue = std::max(m_LastCompletedFenceValue, m_pFence->GetCompletedValue());// 返回m_LastCompletedFenceValue大于等于FenceValuereturn FenceValue <= m_LastCompletedFenceValue;
}/*核心代码:m_CommandQueue->Wait(Fence, Value),向当前命令队列m_CommandQueue插入命令,执行到此命令暂停队列命令执行,直到围栏Fence达到值Value。因此此函数就是插入阻塞命令,让此命令队列,等待另一个命令队列Producer,完成值FenceValue。在MiniEngine中Graphics命名空间包含几个全局变量:extern ID3D12Device* g_Device;extern CommandListManager g_CommandManager;extern ContextManager g_ContextManager;其中ContextManager包含DIRECT、COMPUTE、COPY三个CommandQueue,此处通过(D3D12_COMMAND_LIST_TYPE)(FenceValue >> 56),从围栏值区分命令队列类型,然后从g_ContextManager获取对应CommandQueue,即Producer。
*/
void CommandQueue::StallForFence(uint64_t FenceValue)
{CommandQueue& Producer = Graphics::g_CommandManager.GetQueue((D3D12_COMMAND_LIST_TYPE)(FenceValue >> 56));m_CommandQueue->Wait(Producer.m_pFence, FenceValue);
}// 同步命令队列Producer,即使此命令队列等待Producer.m_pFence围栏,
// 达到Producer.m_NextFenceValue - 1值 (m_NextFenceValue - 1表示最后提交命令的围栏值)
void CommandQueue::StallForProducer(CommandQueue& Producer)
{ASSERT(Producer.m_NextFenceValue > 0);m_CommandQueue->Wait(Producer.m_pFence, Producer.m_NextFenceValue - 1);
}// 等待围栏值
void CommandQueue::WaitForFence(uint64_t FenceValue)
{// 若完成则返回if (IsFenceComplete(FenceValue))return;{// 加锁(m_EventMutex)std::lock_guard<std::mutex> LockGuard(m_EventMutex);// 设置事件m_FenceEventHandle,在m_pFence达到FenceValue时触发m_pFence->SetEventOnCompletion(FenceValue, m_FenceEventHandle);// 等待m_FenceEventHandle触发,即达到围栏值WaitForSingleObject(m_FenceEventHandle, INFINITE);// 更新最后查询到的完成围栏值m_LastCompletedFenceValue = FenceValue;}// 此处设为阻塞执行,因为若不阻塞,第一次调用等待围栏100,// 第二次调用等待围栏99,则m_pFence->SetEventOnCompletion会覆盖100的调用,// 导致100围栏事件永远不会触发,因此使用阻塞式API。// 或许可以考虑对每个调用创建独立的FenceEventHandle?
}// 是否命令分配器
ID3D12CommandAllocator* CommandQueue::RequestAllocator()
{// 获取当前围栏完成值uint64_t CompletedFence = m_pFence->GetCompletedValue();// 从命令分配器池请求分配器return m_AllocatorPool.RequestAllocator(CompletedFence);
}// 丢弃命令分配器
void CommandQueue::DiscardAllocator(uint64_t FenceValue, ID3D12CommandAllocator* Allocator)
{m_AllocatorPool.DiscardAllocator(FenceValue, Allocator);
}// 判断是否已初始化
inline bool CommandQueue::IsReady()
{return m_CommandQueue != nullptr;
}// 目标:WaitForIdle-等待队列所有命令完成,
// 实现:使用IncrementFence插入下一个围栏值,并使用WaitForFence等待。
void CommandQueue::WaitForIdle(void) { WaitForFence(IncrementFence()); }// 获取D3D12命令队列对象
ID3D12CommandQueue* CommandQueue::GetCommandQueue() { return m_CommandQueue; }// 获取下一个围栏值
uint64_t CommandQueue::GetNextFenceValue() { return m_NextFenceValue; }
(3) 类使用分析
- 可以看到CommandQueue封装了固定命令类型的命令队列,包含ID3D12CommandQueue、ID3D12Fence、CommandAllocatorPool等对象,可独立记录和指令命令了。
- 再次概览CommandQueue功能。
class CommandQueue
{friend class CommandListManager;friend class CommandContext;public:// 构造和析构CommandQueue(D3D12_COMMAND_LIST_TYPE Type);~CommandQueue();// 创建和关闭void Create(ID3D12Device* pDevice);void Shutdown();// 检查是否已创建inline bool IsReady(){return m_CommandQueue != nullptr;}// 递增下一个围栏值uint64_t IncrementFence(void);// 查询围栏是否完成bool IsFenceComplete(uint64_t FenceValue);// 插入等待命令,等待另一队列完成FenceValue,依赖于全局变量void StallForFence(uint64_t FenceValue);// 插入等待命令,等待另一队列Producer完成所有命令void StallForProducer(CommandQueue& Producer);// 等待围栏完成void WaitForFence(uint64_t FenceValue);// 等待所有命令完成void WaitForIdle(void) { WaitForFence(IncrementFence()); }// 获取D3D12命令队列对象ID3D12CommandQueue* GetCommandQueue() { return m_CommandQueue; }// 获取下一个围栏值uint64_t GetNextFenceValue() { return m_NextFenceValue; }private:// 执行命令列表,注册并递增下一个围栏值uint64_t ExecuteCommandList(ID3D12CommandList* List);// 获取和丢弃命令分配器ID3D12CommandAllocator* RequestAllocator(void);void DiscardAllocator(uint64_t FenceValueForReset, ID3D12CommandAllocator* Allocator);ID3D12CommandQueue* m_CommandQueue;const D3D12_COMMAND_LIST_TYPE m_Type;CommandAllocatorPool m_AllocatorPool;std::mutex m_FenceMutex;std::mutex m_EventMutex;// Lifetime of these objects is managed by the descriptor cacheID3D12Fence* m_pFence;uint64_t m_NextFenceValue;uint64_t m_LastCompletedFenceValue;HANDLE m_FenceEventHandle;
};
(4) 类总结
- CommandQueue封装了固定类型的命令队列,可执行命令列表,并增删改查围栏值。
(5) 类改进
- StallForFence(uint64_t FenceValue)方法依赖于全局变量实现,妨碍作为独立组件,主要就是修改此方法,改为显示传入CommandQueue,即StallForProducer(CommandQueue& Producer,uint64_t FenceValue)即可。
- 改进后CommandQueue类代码位于:CommandQueue,可作为独立组件使用。
