phy降速自愈到100M重试流程分析
我们使用状态机模型,由用户通过Netlink触发的“强制设置速率”事件。
1. 定义状态、事件和Netlink消息类型
// 定义PHY自愈状态
typedef enum {PHY_STATE_INIT,PHY_STATE_2_5G,PHY_STATE_1G,PHY_STATE_100M,PHY_STATE_10M_RETRY,PHY_STATE_LINK_DOWN
} phy_healing_state_t;// 定义驱动事件
typedef enum {EVENT_LINK_UP,EVENT_LINK_DOWN,EVENT_TIMER_EXPIRED,EVENT_CABLE_PLUG,EVENT_USER_SET_SPEED // 新增:用户通过Netlink强制设置速率
} phy_event_t;// 定义Netlink消息类型
#define NETLINK_PHY_FAMILY 31 // 自定义Netlink协议族
#define MSG_TYPE_SET_SPEED 0x14 // 用户设置速率
#define MSG_TYPE_GET_STATUS 0x11 // 用户获取状态// 定义速率常量
#define SPEED_10 10
#define SPEED_100 100
#define SPEED_1000 1000
#define SPEED_2500 2500
2. 状态机核心逻辑(集成Netlink速率设置)
// 全局变量
phy_healing_state_t current_state = PHY_STATE_INIT;
struct timer_list retry_timer;
int retry_interval_sec = 600; // 默认10分钟// PHY状态机主函数
void phy_state_machine_handler(phy_event_t event, void *data) {int target_speed;switch (current_state) {case PHY_STATE_INIT:case PHY_STATE_LINK_DOWN:if (event == EVENT_CABLE_PLUG || event == EVENT_LINK_UP) {printk(KERN_INFO "PHY Healing: Starting negotiation from 2.5G.\n");phy_set_speed_and_restart_autoneg(SPEED_2500);current_state = PHY_STATE_2_5G;} else if (event == EVENT_USER_SET_SPEED) {// 即使在Link Down状态,也尝试设置用户指定的速率target_speed = *(int*)data;printk(KERN_INFO "PHY Healing: User requests speed %d while link is down. Trying...\n", target_speed);phy_set_speed_and_restart_autoneg(target_speed);// 状态不改变,等待Link Up/Down事件来确认结果}break;case PHY_STATE_2_5G:if (event == EVENT_LINK_DOWN) {printk(KERN_INFO "PHY Healing: 2.5G link down, trying 1G.\n");phy_set_speed_and_restart_autoneg(SPEED_1000);current_state = PHY_STATE_1G;} else if (event == EVENT_USER_SET_SPEED) {target_speed = *(int*)data;printk(KERN_INFO "PHY Healing: User requests speed %d, overriding current 2.5G.\n", target_speed);phy_set_speed_and_restart_autoneg(target_speed);// 状态不改变,让自愈流程自然过渡到新速率对应的状态}break;case PHY_STATE_1G:if (event == EVENT_LINK_DOWN) {printk(KERN_INFO "PHY Healing: 1G link down, trying 100M.\n");phy_set_speed_and_restart_autoneg(SPEED_100);current_state = PHY_STATE_100M;} else if (event == EVENT_USER_SET_SPEED) {target_speed = *(int*)data;printk(KERN_INFO "PHY Healing: User requests speed %d, overriding current 1G.\n", target_speed);phy_set_speed_and_restart_autoneg(target_speed);}break;case PHY_STATE_100M:if (event == EVENT_LINK_DOWN) {printk(KERN_INFO "PHY Healing: 100M link down, trying 10M.\n");phy_set_speed_and_restart_autoneg(SPEED_10);current_state = PHY_STATE_10M_RETRY;on_link_established_at_10m();} else if (event == EVENT_USER_SET_SPEED) {target_speed = *(int*)data;printk(KERN_INFO "PHY Healing: User requests speed %d, overriding current 100M.\n", target_speed);phy_set_speed_and_restart_autoneg(target_speed);}break;case PHY_STATE_10M_RETRY:if (event == EVENT_LINK_DOWN) {printk(KERN_ERR "PHY Healing: 10M link down. Connection failed.\n");del_timer_sync(&retry_timer);current_state = PHY_STATE_LINK_DOWN;} else if (event == EVENT_TIMER_EXPIRED) {printk(KERN_INFO "PHY Healing: Timer expired, trying to upgrade from 10M to 100M.\n");phy_set_speed_and_restart_autoneg(SPEED_100);} else if (event == EVENT_USER_SET_SPEED) {// 用户可以随时打破10M的重试循环target_speed = *(int*)data;printk(KERN_INFO "PHY Healing: User requests speed %d, breaking 10M retry loop.\n", target_speed);del_timer_sync(&retry_timer); // 停止自动重试定时器phy_set_speed_and_restart_autoneg(target_speed);}break;}
}// ... 其他辅助函数(phy_set_speed_and_restart_autoneg, retry_timer_callback等)保持不变 ...
// 辅助函数:设置PHY速率并重启自动协商
void phy_set_speed_and_restart_autoneg(int speed) {// 通过MDIO接口写入PHY寄存器来设置速率mdio_write(PHY_ADDR, MII_BMCR, BMCR_ANENABLE | BMCR_ANRESTART | speed_to_bmcr_bits(speed));
}// 定时器到期回调函数
void retry_timer_callback(struct timer_list *t) {// 向状态机发送定时器到期事件phy_state_machine_handler(EVENT_TIMER_EXPIRED);// 重新启动定时器,形成循环mod_timer(&retry_timer, jiffies + msecs_to_jiffies(retry_interval_sec * 1000));
}
3. 事件触发与状态初始化
// 当PHY中断发生时,在中断处理函数中调用
void phy_interrupt_handler() {int link_status = mdio_read(PHY_ADDR, MII_BMSR) & BMSR_LSTATUS;static int last_link_status = 0;if (link_status != last_link_status) {if (link_status) {phy_state_machine_handler(EVENT_LINK_UP);} else {phy_state_machine_handler(EVENT_LINK_DOWN);}last_link_status = link_status;}
}// 当检测到拔插事件时(例如通过PHY的另一个中断引脚)
void cable_unplug_plug_handler() {printk(KERN_INFO "PHY Healing: Cable plug/unplug event detected. Resetting state machine.\n");del_timer_sync(&retry_timer); // 停止定时器current_state = PHY_STATE_INIT; // 重置状态phy_state_machine_handler(EVENT_CABLE_PLUG);
}// 在10M链路成功建立时,启动定时器
void on_link_established_at_10m() {if (current_state == PHY_STATE_10M_RETRY) {mod_timer(&retry_timer, jiffies + msecs_to_jiffies(retry_interval_sec * 1000));}
}
PHY层到MAC层端口监控流程
此部分是整个自愈机制的基础,与之前描述相同。
流程图:
+----------------+ +-----------------+ +---------------------+
| PHY (物理层) | <--> | MDIO Bus (总线) | <--> | MAC Driver (内核驱动) |
+----------------+ +-----------------+ +---------------------+| | || 1. 物理链路变化 | || (插拔、信号质量变差) | |V V V
+----------------+ +-----------------+ +---------------------+
| PHY自动协商 |----->| PHY更新内部寄存器 |----->| 驱动轮询/中断 |
| (Autonegotiation)| | (如BMSR, Status) | | 读取PHY寄存器 |
+----------------+ +-----------------+ +---------------------+|| 2. 读取Link状态和速率V+---------------------+| 驱动解析寄存器值 || (Link Up/Down, Speed)|+---------------------+|| 3. 触发状态机事件V+---------------------+| 调用 phy_state_machine_handler() |+---------------------+
详细步骤:
- 物理事件:网线插入/拔除,或线路质量变化。
- PHY内部处理:PHY芯片检测变化,启动自动协商,并将结果写入其标准寄存器(如BMSR的
Link Status位,PHY Specific Status Register的速率/双工模式)。 - MAC驱动检测:驱动通过中断(高效)或轮询方式,得知PHY状态变化。
- 驱动解析与触发:驱动读取PHY寄存器,解析出链路是Up还是Down,以及当前的速率。然后,它调用
phy_state_machine_handler()函数,并传入相应的事件(EVENT_LINK_UP或EVENT_LINK_DOWN),从而启动自愈逻辑。
用户态到内核态数据交互 - Netlink速率设置流程
流程图:
+----------------+ +-----------------+ +---------------------+
| 用户态App | | 系统调用接口 | | 内核网络驱动 |
| (e.g., ethtool)|----->| |----->| |
+----------------+ +-----------------+ +---------------------+| 1. open socket | 2. ioctl() | 3. .ndo_do_ioctl()| & ioctl() | |V V V
+----------------+ +-----------------+ +---------------------+
| 传递命令和数据 |----->| 内核拷贝数据 |----->| 驱动执行具体操作 |
| (struct ifreq) | | (copy_from_user) | | (读/写驱动变量) |
+----------------+ +-----------------+ +---------------------+|| 4. 返回结果V+---------------------+| 内核拷贝数据回用户 || (copy_to_user) |+---------------------+
详细步骤与代码示例:
我们将创建一个Netlink通道,允许用户态程序直接命令内核驱动设置一个特定的PHY速率。
1. 内核态:创建Netlink套接字并处理“设置速率”消息
这部分代码通常在网络驱动的初始化部分。
#include <net/sock.h>
#include <linux/netlink.h>struct sock *nl_sk = NULL;// Netlink消息接收回调函数
void nl_rcv_msg(struct sk_buff *skb) {struct nlmsghdr *nlh;int pid;int msg_type;int target_speed;nlh = (struct nlmsghdr *)skb->data;pid = nlh->nlmsg_pid; // 发送消息的进程PIDmsg_type = nlh->nlmsg_type;printk(KERN_INFO "PHY Healing: Received Netlink message from user %d, type %d\n", pid, msg_type);switch (msg_type) {case MSG_TYPE_SET_SPEED:// 用户请求设置速率target_speed = *(int *)NLMSG_DATA(nlh);// 简单的有效性检查if (target_speed == SPEED_10 || target_speed == SPEED_100 || target_speed == SPEED_1000 || target_speed == SPEED_2500) {printk(KERN_INFO "PHY Healing: User command to set speed to %dMbps.\n", target_speed);// 关键:调用状态机,传递用户设置速率事件phy_state_machine_handler(EVENT_USER_SET_SPEED, &target_speed);} else {printk(KERN_WARNING "PHY Healing: Invalid speed %d requested by user.\n", target_speed);}// 可以选择发送一个确认消息回用户态break;case MSG_TYPE_GET_STATUS:// 用户请求获取状态// send_status_to_user(pid); // 实现此函数以返回状态break;default:printk(KERN_WARNING "PHY Healing: Unknown Netlink message type %d\n", msg_type);}
}// 在驱动初始化函数中创建Netlink套接字
int init_netlink(void) {struct netlink_kernel_cfg cfg = {.input = nl_rcv_msg,};nl_sk = netlink_kernel_create(&init_net, NETLINK_PHY_FAMILY, &cfg);if (!nl_sk) {printk(KERN_ALERT "PHY Healing: Error creating Netlink socket.\n");return -10;}printk(KERN_INFO "PHY Healing: Netlink socket created for speed control.\n");return 0;
}// 在驱动退出函数中销毁Netlink套接字
void exit_netlink(void) {if (nl_sk) {netlink_kernel_release(nl_sk);printk(KERN_INFO "PHY Healing: Netlink socket released.\n");}
}
2. 用户态:通过Netlink发送“设置速率”命令
这是一个简单的C程序,用于演示如何向内核驱动发送设置速率的命令。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/socket.h>
#include <linux/netlink.h>#define NETLINK_PHY_FAMILY 31
#define MSG_TYPE_SET_SPEED 0x14
#define MAX_PAYLOAD 256 // 足够容纳一个intint main(int argc, char **argv) {int sock_fd;struct sockaddr_nl src_addr, dest_addr;struct nlmsghdr *nlh = NULL;int target_speed;if (argc != 2) {fprintf(stderr, "Usage: %s <speed>\n", argv[0]);fprintf(stderr, "Example: %s 1000\n", argv[0]);return -1;}target_speed = atoi(argv[1]);// 1. 创建Netlink套接字sock_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_PHY_FAMILY);if (sock_fd < 0) {perror("socket");return -1;}// 2. 绑定源地址memset(&src_addr, 0, sizeof(src_addr));src_addr.nl_family = AF_NETLINK;src_addr.nl_pid = getpid();bind(sock_fd, (struct sockaddr*)&src_addr, sizeof(src_addr));// 3. 准备目标地址(内核)memset(&dest_addr, 0, sizeof(dest_addr));dest_addr.nl_family = AF_NETLINK;dest_addr.nl_pid = 0; // 0表示内核// 4. 准备消息nlh = (struct nlmsghdr *)malloc(NLMSG_SPACE(MAX_PAYLOAD));memset(nlh, 0, NLMSG_SPACE(MAX_PAYLOAD));nlh->nlmsg_len = NLMSG_SPACE(MAX_PAYLOAD);nlh->nlmsg_pid = getpid();nlh->nlmsg_flags = 0;nlh->nlmsg_type = MSG_TYPE_SET_SPEED;// 将目标速率拷贝到消息数据区memcpy(NLMSG_DATA(nlh), &target_speed, sizeof(int));// 5. 发送消息到内核printf("Sending command to set PHY speed to %d Mbps...\n", target_speed);int ret = sendto(sock_fd, nlh, nlh->nlmsg_len, 0, (struct sockaddr*)&dest_addr, sizeof(dest_addr));if (ret < 0) { perror("sendto"); } else {printf("Command sent successfully.\n");}// 6. 清理close(sock_fd);free(nlh);return 0;
}
定时器与重试逻辑(核心新需求)
根据你的要求,我们将创建一个独立的C程序,它内部包含定时和重试逻辑,而不是使用cron。
重要提示:让一个用户态程序24/7运行并每秒检查时间,在资源消耗上不如cron高效。但为了满足你的具体要求,我们按此方式实现。
phy_scheduler.c - 完整的C程序代
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <time.h>
#include <sys/socket.h>
#include <linux/netlink.h>#define NETLINK_PHY_FAMILY 31
#define MSG_TYPE_SET_SPEED 0x14
#define MSG_TYPE_GET_STATUS 0x11
#define MAX_PAYLOAD 256
#define SLEEP_INTERVAL_SEC 60 // 每分钟检查一次时间// Netlink通信函数
int communicate_with_kernel(int msg_type, int data, char* response_buffer) {struct sockaddr_nl src_addr, dest_addr;struct nlmsghdr *nlh = NULL;int sock_fd;int ret = -1;sock_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_PHY_FAMILY);if (sock_fd < 0) {perror("socket");return -1;}memset(&src_addr, 0, sizeof(src_addr));src_addr.nl_family = AF_NETLINK;src_addr.nl_pid = getpid();bind(sock_fd, (struct sockaddr*)&src_addr, sizeof(src_addr));memset(&dest_addr, 0, sizeof(dest_addr));dest_addr.nl_family = AF_NETLINK;dest_addr.nl_pid = 0; // 内核nlh = (struct nlmsghdr *)malloc(NLMSG_SPACE(MAX_PAYLOAD));if (!nlh) {close(sock_fd);return -1;}memset(nlh, 0, NLMSG_SPACE(MAX_PAYLOAD));nlh->nlmsg_len = NLMSG_SPACE(MAX_PAYLOAD);nlh->nlmsg_pid = getpid();nlh->nlmsg_flags = 0;nlh->nlmsg_type = msg_type;memcpy(NLMSG_DATA(nlh), &data, sizeof(int));ret = sendto(sock_fd, nlh, nlh->nlmsg_len, 0, (struct sockaddr*)&dest_addr, sizeof(dest_addr));if (ret < 0) {perror("sendto");free(nlh);close(sock_fd);return -1;}// 接收内核响应recv(sock_fd, response_buffer, MAX_PAYLOAD, 0);free(nlh);close(sock_fd);return 0;
}// 执行5次重试的核心逻辑
void perform_9am_retry() {char response[MAX_PAYLOAD];printf("[%s] 9:00 AM check triggered.\n", get_timestamp());// 1. 获取当前PHY状态if (communicate_with_kernel(MSG_TYPE_GET_STATUS, 0, response) != 0) {printf("[%s] Failed to get status from kernel.\n", get_timestamp());return;}printf("[%s] Current kernel status: %s\n", get_timestamp(), response);// 2. 检查状态是否为 "10M_RETRY" 并且速率为 "10"// 注意:这里的字符串匹配需要根据内核返回的实际格式调整if (strstr(response, "State: 10M_RETRY") && strstr(response, "Speed: 10")) {printf("[%s] Port is stable at 10M. Starting 5 retries to 100M.\n", get_timestamp());for (int i = 1; i <= 5; i++) {printf("[%s] Attempt %d/5: Setting speed to 100M.\n", get_timestamp(), i);// 3. 发送设置100M的命令if (communicate_with_kernel(MSG_TYPE_SET_SPEED, SPEED_100, response) != 0) {printf("[%s] Failed to send set-speed command.\n", get_timestamp());continue;}// 4. 等待链路稳定sleep(5);// 5. 再次检查状态if (communicate_with_kernel(MSG_TYPE_GET_STATUS, 0, response) != 0) {printf("[%s] Failed to get status after attempt %d.\n", get_timestamp(), i);continue;}if (strstr(response, "Speed: 100")) {printf("[%s] Success! Port upgraded to 100M on attempt %d.\n", get_timestamp(), i);return; // 成功则退出} else {printf("[%s] Attempt %d failed. Current status: %s\n", get_timestamp(), i, response);}}printf("[%s] All 5 retry attempts failed.\n", get_timestamp());} else {printf("[%s] Port is not in the required 10M_RETRY state. No action taken.\n", get_timestamp());}
}// 获取当前时间戳字符串
char* get_timestamp() {static char buffer[80];time_t rawtime;struct tm *timeinfo;time(&rawtime);timeinfo = localtime(&rawtime);strftime(buffer, sizeof(buffer), "%Y-%m-%d %H:%M:%S", timeinfo);return buffer;
}int main() {time_t rawtime;struct tm *timeinfo;int last_day = -1;printf("PHY Scheduler started. Waiting for 9:00 AM to trigger retry logic...\n");while (1) {time(&rawtime);timeinfo = localtime(&rawtime);// 检查是否是9:00整,并且不是同一天内已经触发过的if (timeinfo->tm_hour == 9 && timeinfo->tm_min == 0 && timeinfo->tm_mday != last_day) {perform_9am_retry();last_day = timeinfo->tm_mday; // 标记今天已触发}// 每分钟检查一次,避免CPU空转sleep(SLEEP_INTERVAL_SEC);}return 0;
}
