当前位置：首页 > news >正文

分析下kernel6.6中如何获取下一次的cpu频率

news 2025/9/18 15:07:07

一.get_next_freq函数实现

/**
120   * get_next_freq - Compute a new frequency for a given cpufreq policy.
121   * @sg_policy: schedutil policy object to compute the new frequency for.
122   * @util: Current CPU utilization.
123   * @max: CPU capacity.
124   *
125   * If the utilization is frequency-invariant, choose the new frequency to be
126   * proportional to it, that is
127   *
128   * next_freq = C * max_freq * util / max
129   *
130   * Otherwise, approximate the would-be frequency-invariant utilization by
131   * util_raw * (curr_freq / max_freq) which leads to
132   *
133   * next_freq = C * curr_freq * util_raw / max
134   *
135   * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8.
136   *
137   * The lowest driver-supported frequency which is equal or greater than the raw
138   * next_freq (as calculated above) is returned, subject to policy min/max and
139   * cpufreq driver limitations.
140   */
141  static unsigned int get_next_freq(struct sugov_policy *sg_policy,
142  				  unsigned long util, unsigned long max)
143  {
144  	struct cpufreq_policy *policy = sg_policy->policy;
145  	unsigned int freq = arch_scale_freq_invariant() ?
146  				policy->cpuinfo.max_freq : policy->cur;
147  	unsigned long next_freq = 0;
148  
149  	util = map_util_perf(util);
150  	trace_android_vh_map_util_freq(util, freq, max, &next_freq, policy,
151  			&sg_policy->need_freq_update);
152  	if (next_freq)
153  		freq = next_freq;
154  	else
155  		freq = map_util_freq(util, freq, max);
156  
157  	if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update)
158  		return sg_policy->next_freq;
159  
160  	sg_policy->cached_raw_freq = freq;
161  	return cpufreq_driver_resolve_freq(policy, freq);
162  }
163  
164  static void sugov_get_util(struct sugov_cpu *sg_cpu)
165  {
166  	unsigned long util = cpu_util_cfs_boost(sg_cpu->cpu);
167  	struct rq *rq = cpu_rq(sg_cpu->cpu);
168  
169  	sg_cpu->bw_dl = cpu_bw_dl(rq);
170  	sg_cpu->util = effective_cpu_util(sg_cpu->cpu, util,
171  					  FREQUENCY_UTIL, NULL);
172  }
173  static inline unsigned long map_util_freq(unsigned long util,
27  					unsigned long freq, unsigned long cap)
28  {
29  	return freq * util / cap;
30  }
31  
32  static inline unsigned long map_util_perf(unsigned long util)
33  {
34  	return util + (util >> 2);
35  }

1.util = map_util_perf(util);

即 (freq + (freq >> 2)) = 1.25

2.freq = map_util_freq(util, freq, max);

freq * util / cap  即 1.25 * freq / cap

数学含义：

引入 C=1.25 的增益系数
当 (util/max) = 0.8 时，next_freq = 1.25 * base * 0.8 = base
实现了“80% 利用率触达基准频率”的 tipping point 效果

二、调试方法

1. 查看频率决策过程

# 开启 ftrace
echo 'get_next_freq' > /sys/kernel/debug/tracing/set_ftrace_filter
echo function > /sys/kernel/debug/tracing/current_tracer

# 观察输出
cat /sys/kernel/debug/tracing/trace_pipe

输出示例：

chrome-1234 [001] ...1 123.456789: get_next_freq: util=820 max=1024 → raw_freq=2050000 → final=2000000

三.通过真实硬件平台的详细案例

带您一步步计算 get_next_freq() 的完整过程。我们将使用一个典型的 ARM64 移动设备（如高通骁龙 8 Gen 2）作为示例

一、假设硬件配置

CPU 架构	ARM64 (big.LITTLE)	支持频率不变性
LITTLE cluster	Cortex-A710 × 4	最大频率 2.5GHz
big cluster	Cortex-X3 × 1 + A710 × 3	最大频率 3.2GHz
`capacity-dmips-mhz`	LITTLE=512, big=1024	性能容量比例 1:2
当前 governor	schedutil	使用我们分析的逻辑

二、案例 1：LITTLE 核心上的中等负载（浏览器滚动）

场景描述

用户正在滑动网页
被调度到 LITTLE cluster 的某个核心
利用率监控显示 util = 600
该核心最大容量 max = 512

执行步骤

1. 获取基准频率

unsigned int freq = arch_scale_freq_invariant() ?policy->cpuinfo.max_freq : policy->cur;

arch_scale_freq_invariant() → true（ARM64 支持）
policy->cpuinfo.max_freq = 2500000 Hz (2.5GHz)
✅ 所以 freq = 2500000

2. 预处理利用率

util = map_util_perf(util); // 假设无特殊处理
// util = 600

3. Vendor Hook 检查

trace_android_vh_map_util_freq(...);
if (next_freq) ... else ...

假设没有厂商 hook 干预
进入标准路径

4. 计算目标频率

freq = map_util_freq(util, freq, max);
// = (1.25 * freq) * util / max
// = (1.25 * 2500000) * 600 / 512
// = (3,125,000) * 600 / 512
// = 1,875,000,000 / 512
// = **3,662,109 Hz**

⚠️ 注意：这里出现了超过最大频率的中间值！

5. 缓存检查与最终解析

if (freq == sg_policy->cached_raw_freq && !need_update) → skip
sg_policy->cached_raw_freq = 3662109;return cpufreq_driver_resolve_freq(policy, 3662109);

`cpufreq_driver_resolve_freq()` 处理：

应用策略约束：

/**
559   * cpufreq_driver_resolve_freq - Map a target frequency to a driver-supported
560   * one.
561   * @policy: associated policy to interrogate
562   * @target_freq: target frequency to resolve.
563   *
564   * The target to driver frequency mapping is cached in the policy.
565   *
566   * Return: Lowest driver-supported frequency greater than or equal to the
567   * given target_freq, subject to policy (min/max) and driver limitations.
568   */
569  unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy,
570  					 unsigned int target_freq)
571  {
572  	return __resolve_freq(policy, target_freq, CPUFREQ_RELATION_LE);
573  }
574  EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq);

static unsigned int __resolve_freq(struct cpufreq_policy *policy,
541  		unsigned int target_freq, unsigned int relation)
542  {
543  	unsigned int idx;
544  	unsigned int old_target_freq = target_freq;
545  
546  	target_freq = clamp_val(target_freq, policy->min, policy->max);
547  	trace_android_vh_cpufreq_resolve_freq(policy, &target_freq, old_target_freq);
548  
549  	if (!policy->freq_table)
550  		return target_freq;
551  
552  	idx = cpufreq_frequency_table_target(policy, target_freq, relation);
553  	policy->cached_resolved_idx = idx;
554  	policy->cached_target_freq = target_freq;
555  	return policy->freq_table[idx].frequency;
556  }

target_freq = clamp_val(target_freq, policy->min, policy->max);

freq = clamp_val(3662109, policy->min, policy->max);
// policy->max = 2500000
// → freq = 2500000

2.匹配驱动支持频率（假设频率表）：

frequency_table[] = {
{0, 600000},
{1, 900000},
{2, 1200000},
{3, 1500000},
{4, 1800000},
{5, 2100000},
{6, 2500000}, ← 最接近且 ≥ 2.5M 的是它

......
{CPUFREQ_TABLE_END}
};

✅ 最终返回：2,500,000 Hz (2.5GHz)

三、案例 2：big 核心上的高性能需求（游戏渲染）

场景描述

游戏进入战斗场景
任务被迁移到 big cluster
监控到 util = 850
该核心 max = 1024

执行过程

1. 基准频率选择

freq = policy->cpuinfo.max_freq = 3200000 (3.2GHz)

2. 计算原始目标

freq = (1.25 * 3200000) * 850 / 1024
= (4,000,000) * 850 / 1024
= 3,400,000,000 / 1024
= **3,320,312 Hz**

3. 驱动解析

cpufreq_driver_resolve_freq(policy, 3320312)

假设 big cluster 频率表：

{0, 800000}
{1, 1200000}
{2, 1600000}
{3, 2000000}
{4, 2400000}
{5, 2800000}
{6, 3200000} ← 最接近的可用频率

✅ 最终返回：3,200,000 Hz (3.2GHz)

🔍 观察：即使利用率只有 83% (850/1024)，也达到了最高频 —— 这正是 C=1.25 tipping point 的效果！

四、案例 3：低负载后台同步

场景描述

后台应用进行数据同步
util = 200
max = 1024 (big core)

计算过程

freq = (1.25 * 3200000) * 200 / 1024
= 4,000,000 * 200 / 1024
= 800,000,000 / 1024
= **781,250 Hz**

驱动解析：

查找 ≥ 781,250 的最小频率
假设最接近的是 800,000 Hz

✅ 最终返回：800,000 Hz

💡 此时功耗极低，适合后台任务。

浏览器滚动	600/512≈1.17	`(1.25×2.5G)×600/512`	3.66GHz	2.5GHz
游戏渲染	850/1024≈0.83	`(1.25×3.2G)×850/1024`	3.32GHz	3.2GHz
后台同步	200/1024≈0.195	`(1.25×3.2G)×200/1024`	781MHz	800MHz

📊 关键观察：

即使利用率未达 100%，也可能触发满频（tipping point 效应）
驱动会将连续频率“量化”为离散档位
实际频率不会超过 max_freq

五、特殊情况：Vendor Hook 干预

假设某厂商在 Android Vendor Hook 中添加激进策略：

// vendor_hook.c
void android_vh_map_util_freq(unsigned long util, unsigned int freq,unsigned long cap, unsigned long *next_freq,struct cpufreq_policy *policy, bool *update)
{if (current->comm == "UnityMain" && util > 700) {*next_freq = policy->cpuinfo.max_freq; // 强制满频return;}
}

对案例 2 的影响：

原本计算出 3.32GHz
Vendor Hook 直接设置 *next_freq = 3200000
跳过 map_util_freq() 计算
最终仍为 3.2GHz，但决策更快更确定

六、实际验证方法

1. 在真机上监控

# 监控 utilization 和频率变化
while true; do
util=$(cat /proc/$(pidof chrome)/sched | grep util_avg | awk '{print $2}')
freq=$(cat /sys/devices/system/cpu/cpufreq/policy0/scaling_cur_freq)
echo "$(date +%T) util=$util freq=$freq"
sleep 0.1
done

输出示例：

14:30:01 util=600 freq=2500000
14:30:02 util=850 freq=3200000
14:30:03 util=200 freq=800000

2. 使用 ftrace 精确追踪

echo 1 > /sys/kernel/debug/tracing/events/power/cpu_frequency/enable
cat /sys/kernel/debug/tracing/trace_pipe

输出：

chrome-1234 [005] ...1 123.456789: cpu_frequency: state=2500000 cpu=5
chrome-1234 [007] ...1 123.457890: cpu_frequency: state=3200000 cpu=7

七、总结：频率计算的本质

虽然公式看起来复杂，但其核心思想非常清晰：

1. 归一化利用率

将不同频率下的利用率统一到“性能等效”尺度：

等效利用率 = util × (current_freq / max_freq)

2. 引入安全增益

乘以 C=1.25 提供性能缓冲，防止瞬时 spike 导致卡顿。

3. 硬件适配

通过 cpufreq_driver_resolve_freq() 将理想频率映射到实际可用档位。

这种设计使得 Linux 内核能够在保持简洁算法的同时，适应从嵌入式设备到服务器的各种复杂硬件环境，是操作系统工程的典范之作。

八、与传统 governor 的对比

schedutil	提升到中间频率（如 800MHz）	渐进式响应
ondemand	可能保持最低频	滞后响应
conservative	缓慢提升	过度保守
performance	始终最高频	浪费电量