debug一个cpu频率一直最低的问题
现象
发现 scaling_max_freq 频率和 scaling_min_freq 一样都是最小值
neo:/sys/devices/system/cpu/cpufreq/policy0 # cat scaling_min_freq
691200
neo:/sys/devices/system/cpu/cpufreq/policy0 # cat scaling_max_freq
691200
neo:/sys/devices/system/cpu/cpufreq/policy0 # echo 1996800 > sca
scaling_available_frequencies scaling_boost_frequencies scaling_driver scaling_max_freq scaling_setspeed
scaling_available_governors scaling_cur_freq scaling_governor scaling_min_freq
neo:/sys/devices/system/cpu/cpufreq/policy0 # echo 1996800 > scaling_max_freq
neo:/sys/devices/system/cpu/cpufreq/policy0 # cat scaling_max_freq
691200
neo:/sys/devices/system/cpu/cpufreq/policy0 #
这样导致 一直无法调频, cpu频率一直是 1996800
排查
打开调频相关的 tracepoint
neo:/sys/kernel/tracing # echo 0 > events/enable
neo:/sys/kernel/tracing # echo 0 > trace
neo:/sys/kernel/tracing # echo 1 > events/power/pm_qos_update_target/enable
neo:/sys/kernel/tracing # echo 1 > events/power/cpu_frequency/enable
neo:/sys/kernel/tracing # echo 1 > events/power/cpu_frequency_limits/enable
neo:/sys/kernel/tracing # echo 1 > tracing
tracing_cpumask tracing_on tracing_thresh
neo:/sys/kernel/tracing # echo 1 > tracing_on
neo:/sys/kernel/tracing # cat trace
# tracer: nop
#
# entries-in-buffer/entries-written: 40/40 #P:4
#
# _-----=> irqs-off
# / _----=> need-resched
# | / _---=> hardirq/softirq
# || / _--=> preempt-depth
# ||| / delay
# TASK-PID CPU# |||| TIMESTAMP FUNCTION
# | | | |||| | |kworker/0:1H-193 [000] .... 544.113497: pm_qos_update_target: action=UPDATE_REQ prev_value=691200 curr_value=940800kworker/0:0-4169 [000] .... 544.113572: cpu_frequency_limits: min=691200 max=691200 cpu_id=0kworker/2:2H-465 [002] .... 544.220977: pm_qos_update_target: action=UPDATE_REQ prev_value=2147483647 curr_value=44kworker/2:2H-465 [002] .... 544.220993: pm_qos_update_target: action=UPDATE_REQ prev_value=2000000000 curr_value=44kworker/2:2H-465 [002] .... 544.221000: pm_qos_update_target: action=UPDATE_REQ prev_value=2000000000 curr_value=44kworker/2:2H-465 [002] .... 544.221005: pm_qos_update_target: action=UPDATE_REQ prev_value=2147483647 curr_value=44kworker/0:1H-193 [000] .... 544.232162: pm_qos_update_target: action=UPDATE_REQ prev_value=940800 curr_value=691200kworker/0:0-4169 [000] .... 544.232245: cpu_frequency_limits: min=691200 max=691200 cpu_id=0kworker/2:3-314 [002] .... 544.495547: pm_qos_update_target: action=UPDATE_REQ prev_value=44 curr_value=2147483647kworker/2:3-314 [002] .... 544.495569: pm_qos_update_target: action=UPDATE_REQ prev_value=44 curr_value=2000000000kworker/2:3-314 [002] .... 544.495579: pm_qos_update_target: action=UPDATE_REQ prev_value=44 curr_value=2000000000kworker/2:3-314 [002] .... 544.495585: pm_qos_update_target: action=UPDATE_REQ prev_value=44 curr_value=2147483647kworker/0:1H-193 [000] .... 544.516929: pm_qos_update_target: action=UPDATE_REQ prev_value=691200 curr_value=940800kworker/0:0-4169 [000] .... 544.517008: cpu_frequency_limits: min=691200 max=691200 cpu_id=0kworker/0:1H-193 [000] .... 544.636145: pm_qos_update_target: action=UPDATE_REQ prev_value=940800 curr_value=691200kworker/0:0-4169 [000] .... 544.636223: cpu_frequency_limits: min=691200 max=691200 cpu_id=0kworker/0:1H-193 [000] .... 544.915474: pm_qos_update_target: action=UPDATE_REQ prev_value=691200 curr_value=940800kworker/0:0-4169 [000] .... 544.915542: cpu_frequency_limits: min=691200 max=691200 cpu_id=0kworker/0:1H-193 [000] .... 545.032146: pm_qos_update_target: action=UPDATE_REQ prev_value=940800 curr_value=691200kworker/0:0-4169 [000] .... 545.032208: cpu_frequency_limits: min=691200 max=691200 cpu_id=0kworker/0:1H-193 [000] .... 545.317990: pm_qos_update_target: action=UPDATE_REQ prev_value=691200 curr_value=940800kworker/0:0-4169 [000] .... 545.318064: cpu_frequency_limits: min=691200 max=691200 cpu_id=0kworker/0:1H-193 [000] .... 545.436160: pm_qos_update_target: action=UPDATE_REQ prev_value=940800 curr_value=691200kworker/0:0-4169 [000] .... 545.436226: cpu_frequency_limits: min=691200 max=691200 cpu_id=0kworker/0:1H-193 [000] .... 545.717840: pm_qos_update_target: action=UPDATE_REQ prev_value=691200 curr_value=940800kworker/0:0-4169 [000] .... 545.717957: cpu_frequency_limits: min=691200 max=691200 cpu_id=0kworker/0:1H-193 [000] .... 545.836155: pm_qos_update_target: action=UPDATE_REQ prev_value=940800 curr_value=691200kworker/0:0-4169 [000] .... 545.836222: cpu_frequency_limits: min=691200 max=691200 cpu_id=0kworker/0:1H-193 [000] .... 546.122850: pm_qos_update_target: action=UPDATE_REQ prev_value=691200 curr_value=940800kworker/0:0-4169 [000] .... 546.122919: cpu_frequency_limits: min=691200 max=691200 cpu_id=0kworker/0:1H-193 [000] .... 546.240167: pm_qos_update_target: action=UPDATE_REQ prev_value=940800 curr_value=691200kworker/0:0-4169 [000] .... 546.240233: cpu_frequency_limits: min=691200 max=691200 cpu_id=0kworker/0:1H-193 [000] .... 546.523900: pm_qos_update_target: action=UPDATE_REQ prev_value=691200 curr_value=940800kworker/0:0-4169 [000] .... 546.523970: cpu_frequency_limits: min=691200 max=691200 cpu_id=0kworker/0:1H-193 [000] .... 546.640185: pm_qos_update_target: action=UPDATE_REQ prev_value=940800 curr_value=691200kworker/0:0-4169 [000] .... 546.640259: cpu_frequency_limits: min=691200 max=691200 cpu_id=0kworker/0:1H-193 [000] .... 546.925955: pm_qos_update_target: action=UPDATE_REQ prev_value=691200 curr_value=940800kworker/0:0-4169 [000] .... 546.926023: cpu_frequency_limits: min=691200 max=691200 cpu_id=0kworker/0:1H-193 [000] .... 547.044132: pm_qos_update_target: action=UPDATE_REQ prev_value=940800 curr_value=691200kworker/0:0-4169 [000] .... 547.044197: cpu_frequency_limits: min=691200 max=691200 cpu_id=0
neo:/sys/kernel/tracing #
看到 cpu_frequency_limits: min=691200 max=691200 cpu_id=0
一直被限频
代码实现
scaling_max_freq
int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node,enum pm_qos_req_action action, int value)
{int prev_value, curr_value, new_value;unsigned long flags;struct freq_qos_request *req;pr_err("[%s:%d] %s called for %s, value %d\n", __FILE__, __LINE__, __func__, "rebort", value);spin_lock_irqsave(&pm_qos_lock, flags);prev_value = pm_qos_get_value(c);if (value == PM_QOS_DEFAULT_VALUE)new_value = c->default_value;elsenew_value = value;pr_err("[%s:%d] %s called for %s, prev_value %d, new_value %d\n", __FILE__, __LINE__, __func__, "rebort", prev_value, new_value);switch (action) {case PM_QOS_REMOVE_REQ:pr_err("[%s:%d] %s called for %s\n", __FILE__, __LINE__, __func__, "rebort");plist_del(node, &c->list);break;case PM_QOS_UPDATE_REQ:pr_err("[%s:%d] %s called for %s\n", __FILE__, __LINE__, __func__, "rebort");/** To change the list, atomically remove, reinit with new value* and add, then see if the aggregate has changed.*/plist_del(node, &c->list);fallthrough;case PM_QOS_ADD_REQ:pr_err("[%s:%d] %s called for %s\n", __FILE__, __LINE__, __func__, "rebort");plist_node_init(node, new_value);plist_add(node, &c->list);break;default:/* no action */;}curr_value = pm_qos_get_value(c);pm_qos_set_value(c, curr_value);// plist_for_each_entry(node, &c->list, node_list) {// pr_err("[%s:%d] %s called for %s, Node value %d\n", // __FILE__, __LINE__, __func__, "rebort", node->prio);// }// plist_for_each_entry(iter, &c->list.node_list, node_list) {// pr_err("[%s:%d] %s called for %s, Node value %d\n", // __FILE__, __LINE__, __func__, "rebort", iter->prio);// }// plist_for_each_entry(si, &swap_active_head, list) {// if (!WARN_ON(!si->frontswap_map))// set_bit(si->type, a);// }plist_for_each_entry(req, &c->list, pnode) {pr_err("[%s:%d] %s called for %s, Node value %d\n", __FILE__, __LINE__, __func__, "rebort", req->pnode.prio);}pr_err("[%s:%d] %s called for %s, prev_value %d, new_value %d, curr_value %d\n", __FILE__, __LINE__, __func__, "rebort", prev_value, new_value, curr_value);spin_unlock_irqrestore(&pm_qos_lock, flags);trace_pm_qos_update_target(action, prev_value, curr_value);if (prev_value == curr_value)return 0;pr_err("[%s:%d] %s called for %s, prev_value %d, new_value %d, curr_value %d\n", __FILE__, __LINE__, __func__, "rebort", prev_value, new_value, curr_value);if (c->notifiers)blocking_notifier_call_chain(c->notifiers, curr_value, NULL);return 1;
}int freq_qos_apply(struct freq_qos_request *req,enum pm_qos_req_action action, s32 value)
{int ret;pr_err("freq_qos_apply rebort value %d, req->type %d\n", value, req->type);switch(req->type) {case FREQ_QOS_MIN:ret = pm_qos_update_target(&req->qos->min_freq, &req->pnode,action, value);break;case FREQ_QOS_MAX:ret = pm_qos_update_target(&req->qos->max_freq, &req->pnode,action, value);break;default:ret = -EINVAL;}return ret;
}int freq_qos_update_request(struct freq_qos_request *req, s32 new_value)
{pr_err("freq_qos_update_request rebort value %d\n", new_value);pr_err("freq_qos_update_request rebort req->pnode.prio %d\n", req->pnode.prio);pr_err("freq_qos_update_request rebort req->type %d\n", req->type);if (new_value == 691200) {dump_stack();}pr_err("[%s:%d] %s called for %s\n", __FILE__, __LINE__, __func__, "rebort");if (!req)return -EINVAL;pr_err("[%s:%d] %s called for %s\n", __FILE__, __LINE__, __func__, "rebort");if (WARN(!freq_qos_request_active(req),"%s() called for unknown object\n", __func__))return -EINVAL;pr_err("[%s:%d] %s called for %s\n", __FILE__, __LINE__, __func__, "rebort");if (req->pnode.prio == new_value)return 0;pr_err("[%s:%d] %s called for %s\n", __FILE__, __LINE__, __func__, "rebort");trace_android_vh_freq_qos_update_request(req, new_value);if (req->pnode.prio == new_value)return 0;pr_err("[%s:%d] %s called for %s\n", __FILE__, __LINE__, __func__, "rebort");return freq_qos_apply(req, PM_QOS_UPDATE_REQ, new_value);
}
EXPORT_SYMBOL_GPL(freq_qos_update_request);#define store_one(file_name, object) \
static ssize_t store_##file_name \
(struct cpufreq_policy *policy, const char *buf, size_t count) \
{ \unsigned long val; \int ret; \\ret = sscanf(buf, "%lu", &val); \if (ret != 1) \return -EINVAL; \pr_err("store___file_name rebort value %d, count %d\n", val, count); \ret = freq_qos_update_request(policy->object##_freq_req, val);\pr_err("store___file_name rebort ret %d\n", ret); \return ret >= 0 ? count : ret; \
}store_one(scaling_min_freq, min);
store_one(scaling_max_freq, max);
一步一步 加了一些log之后, 在开机之后抓到了一些kmsg 如下
[ 48.704656] freq_qos_update_request robert value 691200
[ 48.710749] freq_qos_update_request robert req->pnode.prio 1996800
[ 48.717886] freq_qos_update_request robert req->type 2
[ 48.735537] CPU: 2 PID: 1245 Comm: thermal-engine- Tainted: G O 5.10.218-debug-g1a7a265f4972-dirty #1
[ 48.735550] Hardware name: Qualcomm Technologies, Inc. NEO LUNA-V2 (DT)
[ 48.735559] Call trace:
[ 48.735577] dump_backtrace.cfi_jt+0x0/0x8
[ 48.735589] show_stack+0x1c/0x2c
[ 48.735601] dump_stack_lvl+0xf0/0x164
[ 48.735612] freq_qos_update_request+0x64/0x1ec
[ 48.735625] cpufreq_set_cur_state+0x88/0x108
[ 48.735636] cur_state_store+0xa4/0x18c
[ 48.735648] dev_attr_store+0x38/0x88
[ 48.735658] sysfs_kf_write+0xa0/0xfc
[ 48.735669] kernfs_fop_write_iter+0x134/0x200
[ 48.735680] vfs_write+0x2f4/0x368
[ 48.735689] ksys_write+0x7c/0xec
[ 48.735699] __arm64_sys_write+0x20/0x30
[ 48.735710] el0_svc_common.llvm.11541834636853004741+0xd8/0x20c
[ 48.735719] do_el0_svc+0x28/0x98
[ 48.735730] el0_svc+0x24/0x38
[ 48.735739] el0_sync_handler+0x88/0xec
[ 48.735749] el0_sync+0x1ac/0x1c0
[ 48.735760] [kernel/power/qos.c:609] freq_qos_update_request called for robert
[ 48.745510] [kernel/power/qos.c:613] freq_qos_update_request called for robert
[ 48.756987] [kernel/power/qos.c:618] freq_qos_update_request called for robert
[ 48.777088] [kernel/power/qos.c:621] freq_qos_update_request called for robert
[ 48.786550] [kernel/power/qos.c:626] freq_qos_update_request called for robert
[ 48.800133] freq_qos_apply robert value 691200, req->type 2
[ 48.831713] [kernel/power/qos.c:108] pm_qos_update_target called for robert, value 691200
[ 48.874713] [kernel/power/qos.c:117] pm_qos_update_target called for robert, prev_value 1996800, new_value 691200
[ 48.885909] [kernel/power/qos.c:125] pm_qos_update_target called for robert
[ 48.893702] [kernel/power/qos.c:134] pm_qos_update_target called for robert
[ 48.901492] [kernel/power/qos.c:164] pm_qos_update_target called for robert, Node value 691200
[ 48.901497] [kernel/power/qos.c:164] pm_qos_update_target called for robert, Node value 1996800
[ 48.901502] [kernel/power/qos.c:164] pm_qos_update_target called for robert, Node value 2147483647
[ 48.901508] [kernel/power/qos.c:167] pm_qos_update_target called for robert, prev_value 1996800, new_value 691200, curr_value 691200
[ 48.904122] [kernel/power/qos.c:175] pm_qos_update_target called for robert, prev_value 1996800, new_value 691200, curr_value 691200
看到
[ 48.735577] dump_backtrace.cfi_jt+0x0/0x8
[ 48.735589] show_stack+0x1c/0x2c
[ 48.735601] dump_stack_lvl+0xf0/0x164
[ 48.735612] freq_qos_update_request+0x64/0x1ec
[ 48.735625] cpufreq_set_cur_state+0x88/0x108
[ 48.735636] cur_state_store+0xa4/0x18c
看到这 cpufreq_set_cur_state应该知道了 是 thermal 压频了
16:09:10 suhui@server:~/workspace/leo/vendor/kernel_platform/msm-kernel $ grep cpufreq_set_cur_state . -nr
./drivers/acpi/processor_thermal.c:82:static int cpufreq_set_cur_state(unsigned int cpu, int state)
./drivers/acpi/processor_thermal.c:168:static int cpufreq_set_cur_state(unsigned int cpu, int state)
./drivers/acpi/processor_thermal.c:252: cpufreq_set_cur_state(pr->id, state);
./drivers/acpi/processor_thermal.c:254: cpufreq_set_cur_state(pr->id, max_pstate);
./drivers/thermal/cpu_cooling.c:310: * cpufreq_set_cur_state - callback function to set the current cooling state.
./drivers/thermal/cpu_cooling.c:319:static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
./drivers/thermal/cpu_cooling.c:487: .set_cur_state = cpufreq_set_cur_state,
./drivers/thermal/cpu_cooling.c:493: .set_cur_state = cpufreq_set_cur_state,
./drivers/thermal/cpufreq_cooling.c:415: * cpufreq_set_cur_state - callback function to set the current cooling state.
./drivers/thermal/cpufreq_cooling.c:424:static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
./drivers/thermal/cpufreq_cooling.c:462: .set_cur_state = cpufreq_set_cur_state,
16:09:12 suhui@server:~/workspace/leo/vendor/kernel_platform/msm-kernel $
那么手动去看下到底是哪个 thermal设备压频了
neo:/sys/class/thermal # ls
cooling_device0 cooling_device15 cooling_device21 cooling_device8 thermal_zone13 thermal_zone2 thermal_zone8
cooling_device1 cooling_device16 cooling_device22 cooling_device9 thermal_zone14 thermal_zone20 thermal_zone9
cooling_device10 cooling_device17 cooling_device3 thermal_zone0 thermal_zone15 thermal_zone3
cooling_device11 cooling_device18 cooling_device4 thermal_zone1 thermal_zone16 thermal_zone4
cooling_device12 cooling_device19 cooling_device5 thermal_zone10 thermal_zone17 thermal_zone5
cooling_device13 cooling_device2 cooling_device6 thermal_zone11 thermal_zone18 thermal_zone6
cooling_device14 cooling_device20 cooling_device7 thermal_zone12 thermal_zone19 thermal_zone7
neo:/sys/class/thermal # cat cooling_device*/type
thermal-cpufreq-0
cpu-hotplug0
pause-cpu1
thermal-pause-2
pause-cpu2
pause-cpu3
thermal-pause-4
thermal-pause-8
thermal-devfreq-0
gpu
cdsp_hw
cdsp
ddr-cdev
wsa
wsa2
qcom,cnss_cdev1
cpu-hotplug1
cpu-hotplug2
cpu-hotplug3
display-fps
cpu-cluster0
pause-cpu0
thermal-pause-1
neo:/sys/class/thermal #
可以直接解除 cooling_device0 的 thermal 压频
neo:/sys/class/thermal/cooling_device0 # cat /sys/devices/system/cpu/cpufreq/policy0/sca
scaling_available_frequencies scaling_cur_freq scaling_max_freq
scaling_available_governors scaling_driver scaling_min_freq
scaling_boost_frequencies scaling_governor scaling_setspeed
at /sys/devices/system/cpu/cpufreq/policy0/scaling_max_freq <
691200
neo:/sys/class/thermal/cooling_device0 #
neo:/sys/class/thermal/cooling_device0 # cat /sys/devices/system/cpu/cpufreq/policy0/scaling_max_freq
691200
neo:/sys/class/thermal/cooling_device0 # ls
cur_state max_state power stats subsystem type uevent
neo:/sys/class/thermal/cooling_device0 # cat *
5
5
cat: power: Is a directory
cat: stats: Is a directory
cat: subsystem: Is a directory
thermal-cpufreq-0
1|neo:/sys/class/thermal/cooling_device0 # echo 0 > cur_state
neo:/sys/class/thermal/cooling_device0 # cat /sys/devices/system/cpu/cpufreq/policy0/scaling_max_freq
1996800
neo:/sys/class/thermal/cooling_device0 #