Linux学习笔记:PCIe内核篇(3):DPC服务
目录
1. 整体处理流程
2. DPC初始化阶段
2.1 驱动注册
3. DPC触发与处理流程梳理
3.1 硬件触发阶段
3.2 中断处理
1、上半部(dpc_irq)
2、下半部(dpc_irq)
1. 整体处理流程
PCIe DPC调用流程树状图
│
├── 1. 初始化阶段
│ ├── pcie_dpc_init()
│ │ └── pcie_port_service_register()
│ │ └── 注册dpcdriver结构体
│ │ ├── .name = "dpc"
│ │ ├── .probe = dpc_probe
│ │ └── .remove = dpc_remove
│ │
│ └── dpc_probe()
│ ├── pcie_aer_is_native()检查
│ ├── devm_request_threaded_irq()注册中断
│ │ ├── 上半部: dpc_irq
│ │ └── 下半部: dpc_handler
│ ├── 配置DPC控制寄存器
│ │ ├── 读PCI_EXP_DPC_CAP
│ │ └── 写PCI_EXP_DPC_CTL
│ └── pci_add_ext_cap_save_buffer()
│
├── 2. 错误触发流程
│ ├── 硬件自动触发条件
│ │ ├── ERR_FATAL错误
│ │ ├── ERR_NONFATAL错误(需配置)
│ │ └── RP PIO错误(需扩展支持)
│ │
│ └── 软件触发
│ └── 写DPC Control寄存器的Software Trigger位
│
├── 3. 中断处理流程
│ ├── 上半部(dpc_irq)
│ │ ├── 读PCI_EXP_DPC_STATUS
│ │ ├── 验证中断有效性
│ │ └── 清除中断状态位
│ │ └── 条件触发下半部
│ │
│ └── 下半部(dpc_handler)
│ ├── dpc_process_error()
│ │ ├── 解析错误原因(TRIGGER_RSN)
│ │ ├── 处理RP PIO错误(dpc_process_rp_pio_error)
│ │ └── 处理AER错误
│ │ ├── aer_get_device_error_info
│ │ ├── aer_print_error
│ │ └── pci_aer_clear_*_status
│ │
│ └── pcie_do_recovery()
│ ├── 确定恢复范围
│ │ ├── Root Port/下游端口: 自身+下级设备
│ │ └── 其他设备: 同级设备
│ │
│ ├── 广播错误状态
│ │ ├── report_frozen_detected
│ │ └── report_normal_detected
│ │
│ └── dpc_reset_link()
│ ├── 等待链路禁用(pcie_wait_for_link)
│ ├── 清除DPC状态位
│ ├── 等待链路恢复
│ └── 状态标记
│ ├── PCI_DPC_RECOVERING
│ └── PCI_DPC_RECOVERED
│
└── 4. 恢复后处理├── 成功恢复路径│ ├── 广播mmio_enabled消息│ ├── 广播resume消息│ └── 清除AER状态│└── 恢复失败路径├── 发送DISCONNECT事件└── 记录失败日志
2. DPC初始化阶段
2.1 驱动注册
static struct pcie_port_service_driver dpcdriver = {.name = "dpc",.port_type = PCIE_ANY_PORT,.service = PCIE_PORT_SERVICE_DPC,.probe = dpc_probe,.remove = dpc_remove,
};int __init pcie_dpc_init(void)
{return pcie_port_service_register(&dpcdriver);
}
static int dpc_probe(struct pcie_device *dev)
{struct pci_dev *pdev = dev->port;struct device *device = &dev->device;int status;u16 ctl, cap;if (!pcie_aer_is_native(pdev) && !pcie_ports_dpc_native)return -ENOTSUPP;status = devm_request_threaded_irq(device, dev->irq, dpc_irq,dpc_handler, IRQF_SHARED,"pcie-dpc", pdev);if (status) {pci_warn(pdev, "request IRQ%d failed: %d\n", dev->irq,status);return status;}pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CAP, &cap);pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, &ctl);ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN;pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, ctl);pci_info(pdev, "enabled with IRQ %d\n", dev->irq);pci_info(pdev, "error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n",cap & PCI_EXP_DPC_IRQ, FLAG(cap, PCI_EXP_DPC_CAP_RP_EXT),FLAG(cap, PCI_EXP_DPC_CAP_POISONED_TLP),FLAG(cap, PCI_EXP_DPC_CAP_SW_TRIGGER), pdev->dpc_rp_log_size,FLAG(cap, PCI_EXP_DPC_CAP_DL_ACTIVE));pci_add_ext_cap_save_buffer(pdev, PCI_EXT_CAP_ID_DPC, sizeof(u16));return status;
}
- 确认设备原生支持AER或DPC(
pcie_aer_is_native()
) - 使用
devm_request_threaded_irq()
注册线程化中断- 上半部:
dpc_irq()
(快速处理) - 下半部:
dpc_handler()
(耗时操作)
- 上半部:
- 读取DPC Capability和Control寄存器,启用致命错误触发和中断(
PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN
)
3. DPC触发与处理流程梳理
3.1 硬件触发阶段
触发条件:
- 检测到未屏蔽的致命错误(ERR_FATAL)
- 收到下游的ERR_FATAL消息
- RP PIO错误(若支持扩展)
硬件自动行为:
- 冻结下游TLP传输
- 设置
DPC Trigger Status
和DPC Trigger Reason
- 强制链路进入Disabled状态
- 生成中断(若启用)
3.2 中断处理
1、上半部(dpc_irq)
static irqreturn_t dpc_irq(int irq, void *context)
{struct pci_dev *pdev = context;u16 cap = pdev->dpc_cap, status;pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);if (!(status & PCI_EXP_DPC_STATUS_INTERRUPT) || status == (u16)(~0))return IRQ_NONE;pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS,PCI_EXP_DPC_STATUS_INTERRUPT);if (status & PCI_EXP_DPC_STATUS_TRIGGER)return IRQ_WAKE_THREAD;return IRQ_HANDLED;
}
- 检查
PCI_EXP_DPC_STATUS_INTERRUPT
位 - 写
PCI_EXP_DPC_STATUS_INTERRUPT
清除中断标志 - 如果
PCI_EXP_DPC_STATUS_TRIGGER
置位,唤醒下半部线程(IRQ_WAKE_THREAD
)
2、下半部(dpc_irq)
static irqreturn_t dpc_handler(int irq, void *context)
{struct pci_dev *pdev = context;dpc_process_error(pdev);/* We configure DPC so it only triggers on ERR_FATAL */pcie_do_recovery(pdev, pci_channel_io_frozen, dpc_reset_link);return IRQ_HANDLED;
}
- 调用
dpc_process_error()
解析错误类型和来源 - 通过
pcie_do_recovery()
启动恢复流程,并指定恢复方法为dpc_reset_link
void dpc_process_error(struct pci_dev *pdev)
{u16 cap = pdev->dpc_cap, status, source, reason, ext_reason;struct aer_err_info info;pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, &source);pci_info(pdev, "containment event, status:%#06x source:%#06x\n",status, source);reason = (status & PCI_EXP_DPC_STATUS_TRIGGER_RSN) >> 1;ext_reason = (status & PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT) >> 5;pci_warn(pdev, "%s detected\n",(reason == 0) ? "unmasked uncorrectable error" :(reason == 1) ? "ERR_NONFATAL" :(reason == 2) ? "ERR_FATAL" :(ext_reason == 0) ? "RP PIO error" :(ext_reason == 1) ? "software trigger" :"reserved error");/* show RP PIO error detail information */if (pdev->dpc_rp_extensions && reason == 3 && ext_reason == 0)dpc_process_rp_pio_error(pdev);else if (reason == 0 &&dpc_get_aer_uncorrect_severity(pdev, &info) &&aer_get_device_error_info(pdev, &info)) {aer_print_error(pdev, &info);pci_aer_clear_nonfatal_status(pdev);pci_aer_clear_fatal_status(pdev);}
}
- 读取
DPC_STATUS
和DPC_SOURCE_ID
寄存器 - 提取
TRIGGER_RSN
(标准原因)和TRIGGER_RSN_EXT
(扩展原因) - RP PIO错误处理:调用
dpc_process_rp_pio_error()
- AER相关错误处理:
- 通过
aer_get_device_error_info()
获取详细信息 - 打印错误日志(
aer_print_error()
) - 清除AER状态(
pci_aer_clear_xxx_status()
)
- 通过
pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,pci_channel_state_t state,pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev))
{int type = pci_pcie_type(dev);struct pci_dev *bridge;pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);/** If the error was detected by a Root Port, Downstream Port, RCEC,* or RCiEP, recovery runs on the device itself. For Ports, that* also includes any subordinate devices.** If it was detected by another device (Endpoint, etc), recovery* runs on the device and anything else under the same Port, i.e.,* everything under "bridge".*/if (type == PCI_EXP_TYPE_ROOT_PORT ||type == PCI_EXP_TYPE_DOWNSTREAM ||type == PCI_EXP_TYPE_RC_EC ||type == PCI_EXP_TYPE_RC_END)bridge = dev;elsebridge = pci_upstream_bridge(dev);pci_dbg(bridge, "broadcast error_detected message\n");if (state == pci_channel_io_frozen) {pci_walk_bridge(bridge, report_frozen_detected, &status);if (reset_subordinates(bridge) != PCI_ERS_RESULT_RECOVERED) {pci_warn(bridge, "subordinate device reset failed\n");goto failed;}} else {pci_walk_bridge(bridge, report_normal_detected, &status);}if (status == PCI_ERS_RESULT_CAN_RECOVER) {status = PCI_ERS_RESULT_RECOVERED;pci_dbg(bridge, "broadcast mmio_enabled message\n");pci_walk_bridge(bridge, report_mmio_enabled, &status);}if (status == PCI_ERS_RESULT_NEED_RESET) {/** TODO: Should call platform-specific* functions to reset slot before calling* drivers slot_reset callbacks*/status = PCI_ERS_RESULT_RECOVERED;pci_dbg(bridge, "broadcast slot_reset message\n");pci_walk_bridge(bridge, report_slot_reset, &status);}if (status != PCI_ERS_RESULT_RECOVERED)goto failed;pci_dbg(bridge, "broadcast resume message\n");pci_walk_bridge(bridge, report_resume, &status);/** If we have native control of AER, clear error status in the device* that detected the error. If the platform retained control of AER,* it is responsible for clearing this status. In that case, the* signaling device may not even be visible to the OS.*/if (host->native_aer || pcie_ports_native) {pcie_clear_device_status(dev);pci_aer_clear_nonfatal_status(dev);}pci_info(bridge, "device recovery successful\n");return status;failed:pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT);/* TODO: Should kernel panic here? */pci_info(bridge, "device recovery failed\n");return status;
}
- 确定恢复范围:
- Root Port/Downstream Port:恢复自身及下级设备
- 其他设备:恢复同级设备
- 广播
error_detected
消息给相关驱动,根据冻结状态(pci_channel_io_frozen
)调用不同回调 - 调用
dpc_reset_link()
进行链路级重置
/*** dpc_reset_link - 重置因DPC触发的PCIe链路* @pdev: 触发DPC的PCI设备(通常是Root Port或下游端口)* * 返回值:* PCI_ERS_RESULT_RECOVERED - 成功恢复* PCI_ERS_RESULT_DISCONNECT - 需要断开设备*/
pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
{pci_ers_result_t ret;u16 cap;/* 标记设备开始恢复流程 */set_bit(PCI_DPC_RECOVERING, &pdev->priv_flags);/** 硬件已在触发DPC时自动禁用链路,* 此处只需获取DPC Capability位置*/cap = pdev->dpc_cap; // DPC Capability寄存器偏移量/** 阶段1:等待链路完全停用* 检查Data Link Layer Link Active位是否清零* 超时时间:1000ms (符合PCIe规范要求)*/if (!pcie_wait_for_link(pdev, false)) // false表示等待链路非活跃pci_info(pdev, "Data Link Layer Link Active not cleared in 1000 msec\n");/** 阶段2:处理Root Port扩展情况* 如果支持RP扩展,需额外等待内部状态清理*/if (pdev->dpc_rp_extensions && dpc_wait_rp_inactive(pdev)) {clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags);ret = PCI_ERS_RESULT_DISCONNECT; // 标记需要断开设备goto out;}/** 阶段3:清除DPC触发状态* 写PCI_EXP_DPC_STATUS_TRIGGER位(写1清除)* 此操作允许端口退出DPC状态*/pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS,PCI_EXP_DPC_STATUS_TRIGGER);/** 阶段4:等待链路重新激活* 检查Data Link Layer Link Active位是否置位* 超时时间:1000ms*/if (!pcie_wait_for_link(pdev, true)) { // true表示等待链路活跃pci_info(pdev, "Data Link Layer Link Active not set in 1000 msec\n");clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags);ret = PCI_ERS_RESULT_DISCONNECT;} else {/* 成功
整体流程是这样的: