linux 用户态时间性能优化工具perf/strace/gdb/varlind/gprof
1. perf top -g或者top分析卡顿(cpu占用比较高的函数)
gdb 是 GNU 调试器,可以用于分析程序的时间性能。虽然 info time 不是直接用于性能分析的命令,但 gdb 提供了与时间相关的功能,例如通过 timer 命令设置计时器或通过 info proc 查看进程的时间信息。
#include <stdio.h>
#include <unistd.h>void loop()
{int i,j,k,sum = 0;for(i=0; i<1000; i++)for(j=0; j<1000; j++)for(k=0; k<1000000; k++)sum = i+j+k;
}int main()
{printf("Starting program...n");loop(); // 模拟耗时操作printf("Program finished.n");return 0;
}
top分析
lark@ubuntu:~$ top
top - 23:04:14 up 42 min, 1 user, load average: 0.68, 0.20, 0.20
Tasks: 334 total, 2 running, 332 sleeping, 0 stopped, 0 zombie
%Cpu(s): 12.5 us, 0.0 sy, 0.0 ni, 87.4 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
MiB Mem : 1927.8 total, 108.6 free, 999.3 used, 819.8 buff/cache
MiB Swap: 2048.0 total, 2048.0 free, 0.0 used. 762.2 avail MemPID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
2743 lark 20 0 2496 572 508 R 100.0 0.0 0:58.44 time
perf top -p pid 分析单个进程
root@ubuntu:/home/lark# perf top -p 2743 > perf.txt //分析查看perf.txt
PerfTop: 1952 irqs/sec kernel: 0.0% exact: 0.0% lost: 0/0 drop: 0/0 [4000Hz cpu-clock:pppH], (target_pid: 2743)
-------------------------------------------------------------------------------100.00% time [.] loop
0.00% [kernel] [k] handle_softirqs
0.00% [kernel] [k] _raw_spin_unlock_irqrestore
perf 是 Linux 内置的性能分析工具,用于分析 CPU 使用情况、函数调用关系等。perf top -g 选项可以显示调用图,帮助开发者了解函数调用关系和性能瓶颈。
2,在 gdb 中info proc stat运行程序并检查时间信息:
(gdb) info proc stat
process 2812
Process: 2812
Exec file: a.out
State: t
Parent process: 2024
Process group: 2812
Session id: 2024
TTY: 34816
TTY owner process group: 2812
Flags: 0x40400000
Minor faults (no memory page): 76
Minor faults, children: 0
Major faults (memory page faults): 0
Major faults, children: 0
utime: 2428
stime: 0
utime, children: 0
stime, children: 0
jiffies remaining in current time slice: 20
'nice' value: 0
jiffies until next timeout: 1
jiffies until next SIGALRM: 0
start time (jiffies since system boot): 446983
Virtual memory size: 2555904
Resident set size: 161
rlim: 18446744073709551615
Start of text: 0x5620d01db000
End of text: 0x5620d01db275
Start of stack: 0x7ffecee218b0
如果utime
或stime
长时间不变 → 进程卡在系统调用或死循环,用bt
检查堆栈。
lark@ubuntu:~$ cat /proc/2812/status
Name: a.out
Umask: 0002
State: R (running)
Tgid: 2812
Ngid: 0
Pid: 2812
PPid: 2024
TracerPid: 2817
Uid: 1000 1000 1000 1000
Gid: 1000 1000 1000 1000
FDSize: 256
Groups: 4 24 27 30 46 120 133 134 1000
NStgid: 2812
NSpid: 2812
NSpgid: 2812
NSsid: 2024
VmPeak: 2496 kB
VmSize: 2496 kB
VmLck: 0 kB
VmPin: 0 kB
VmHWM: 644 kB
VmRSS: 644 kB
RssAnon: 76 kB
RssFile: 568 kB
RssShmem: 0 kB
VmData: 176 kB
VmStk: 132 kB
VmExe: 4 kB
VmLib: 1652 kB
VmPTE: 40 kB
VmSwap: 0 kB
HugetlbPages: 0 kB
CoreDumping: 0
THP_enabled: 1
Threads: 1
SigQ: 0/7336
SigPnd: 0000000000000000
ShdPnd: 0000000000000000
SigBlk: 0000000000000000
SigIgn: 0000000000000000
SigCgt: 0000000000000000
CapInh: 0000000000000000
CapPrm: 0000000000000000
CapEff: 0000000000000000
CapBnd: 000001ffffffffff
CapAmb: 0000000000000000
NoNewPrivs: 0
Seccomp: 0
Seccomp_filters: 0
Speculation_Store_Bypass: thread vulnerable
SpeculationIndirectBranch: conditional enabled
Cpus_allowed: ffffffff,ffffffff,ffffffff,ffffffff
Cpus_allowed_list: 0-127
Mems_allowed: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001
Mems_allowed_list: 0
voluntary_ctxt_switches: 2
nonvoluntary_ctxt_switches: 569
3, strace -tt 和strace -c 定位查看函数运行时间,系统调用的次数、错误数和总时间
(1). strace -tt -T -f -e trace= function ./pid/ -c ./pid
strace 是用于跟踪系统调用的工具。strace -c 选项会生成一个系统调用的统计报告,包括每个系统调用的次数、错误数和总时间。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <errno.h>
#include <sys/select.h>#define PORT 8080
#define TIMEOUT_SEC 5int main() {int server_fd, client_sock;struct sockaddr_in addr;fd_set readfds;struct timeval tv;// 创建socketif ((server_fd = socket(AF_INET, SOCK_STREAM, 0)) == -1) {perror("socket failed");exit(EXIT_FAILURE);}addr.sin_family = AF_INET;addr.sin_addr.s_addr = INADDR_ANY;addr.sin_port = htons(PORT);// 绑定端口if (bind(server_fd, (struct sockaddr*)&addr, sizeof(addr)) < 0) {perror("bind failed");exit(EXIT_FAILURE);}// 监听if (listen(server_fd, 1) < 0) {perror("listen failed");exit(EXIT_FAILURE);}printf("Waiting for client (timeout: %d sec)...\n", TIMEOUT_SEC);// 设置select超时FD_ZERO(&readfds);FD_SET(server_fd, &readfds);tv.tv_sec = TIMEOUT_SEC;tv.tv_usec = 0;// 等待客户端连接(带超时)int ret = select(server_fd + 1, &readfds, NULL, NULL, &tv);if (ret == -1) {perror("select error");} else if (ret == 0) {printf("Timeout! No client connected.\n");} else {if ((client_sock = accept(server_fd, NULL, NULL)) < 0) {perror("accept failed");} else {printf("Client connected!\n");close(client_sock);}}close(server_fd);return 0;
}
使用 strace -tt -T -f -e trace= function进行跟踪:
lark@ubuntu:~$ gcc 1_strace.c -o 1_strace
lark@ubuntu:~$ strace -tt -T -f -e trace=network,select,poll,epoll_wait,epoll_ctl,epoll_pwait ./1_strace
23:22:18.212702 socket(AF_INET, SOCK_STREAM, IPPROTO_IP) = 3 <0.000309>
23:22:18.213439 bind(3, {sa_family=AF_INET, sin_port=htons(8080), sin_addr=inet_addr("0.0.0.0")}, 16) = 0 <0.000069>
23:22:18.213678 listen(3, 1) = 0 <0.000063>
Waiting for client (timeout: 5 sec)...
23:22:18.214434 select(4, [3], NULL, NULL, {tv_sec=5, tv_usec=0}) = 0 (Timeout) <5.005219>
Timeout! No client connected.
23:22:23.220836 +++ exited with 0 +++
(2). strace -c ./pid
#include <stdio.h>
#include <unistd.h>void functiona()
{sleep(2); // 模拟耗时操作
}void functionb()
{sleep(2); // 模拟耗时操作
}int main()
{printf("Starting program...n");functiona();functionb();printf("Program finished.n");return 0;
}
lark@ubuntu:~$ strace -c ./2_strace
Starting program...nProgram finished.n% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ----------------
59.83 0.000137 68 2 clock_nanosleep
23.14 0.000053 17 3 brk
17.03 0.000039 39 1 write
0.00 0.000000 0 1 read
0.00 0.000000 0 2 close
0.00 0.000000 0 3 fstat
0.00 0.000000 0 7 mmap
0.00 0.000000 0 3 mprotect
0.00 0.000000 0 1 munmap
0.00 0.000000 0 6 pread64
0.00 0.000000 0 1 1 access
0.00 0.000000 0 1 execve
0.00 0.000000 0 2 1 arch_prctl
0.00 0.000000 0 2 openat
------ ----------- ----------- --------- --------- ---------
4. valgrind callgrind
valgrind 是一个内存调试和性能分析工具。callgrind 是 valgrind 的一个工具,用于生成函数调用图和性能报告。
#include <stdio.h>
#include <unistd.h>void functiona()
{sleep(2); // 模拟耗时操作
}void functionb()
{sleep(2); // 模拟耗时操作
}int main()
{printf("Starting program...n");functiona();functionb();printf("Program finished.n");return 0;
}
使用 valgrind 分析:
gcc -o a.out main.c
valgrind --tool=callgrind ./a.out
5. gprof 编译加 -pg
gprof 是一个程序性能分析工具,需要在编译时使用 -pg 选项。它通过统计函数调用次数和时间来生成性能报告。
#include <stdio.h>
#include <unistd.h>void functiona()
{sleep(2); // 模拟耗时操作
}void functionb()
{sleep(2); // 模拟耗时操作
}int main()
{printf("Starting program...n");functiona();functionb();printf("Program finished.n");return 0;
}
gcc -pg -o a.out main.c
./a.out
gprof a.out gmon.out > profile.txt # 生成性能报告
cat profile.txt
总结
这些工具提供了不同的性能分析视角:
- gdb 适合调试和查看进程时间信息。
- strace 适合分析系统调用。
- perf 适合分析 CPU 使用情况和函数调用关系。
- valgrind 适合分析内存使用和函数调用图。
- gprof 适合分析函数调用次数和时间。
通过这些工具,开发者可以全面了解程序的性能瓶颈并进行优化。