linux strace调式定位系统问题
strace 的基本功能
strace
的主要功能包括:
- 跟踪系统调用:显示进程执行时调用的系统函数及其参数和返回值。
- 监控信号:记录进程接收到的信号。
- 性能分析:统计系统调用的执行时间和次数。
- 调试支持:帮助定位程序崩溃、性能瓶颈或其他异常行为。
- strace 的常用参数
-
以下是
strace
常用参数及其功能: -p <PID>
:跟踪指定进程 ID 的系统调用。-c
:统计系统调用的执行时间、次数和错误次数。-e <事件>
:指定要跟踪的事件类型,例如-e trace=open,read,write
。-f
:跟踪由fork()
创建的子进程。-t
:在输出中显示时间戳(秒)。-tt
:显示更精确的时间戳(包括微秒)。-T
:显示每个系统调用花费的时间。-d
:输出strace
本身的调试信息。
1,查看某个函数的调用情况:
strace -e trace=function, function2 -p $pid
root@ubuntu:/home/lark# strace -e trace=epoll_wait -p 1051
strace: Process 1051 attached
epoll_wait(3,[], 1, 10000) = 0
epoll_wait(3,
[], 1, 10000) = 0
epoll_wait(3,
2,系统调用 strace ls
lark@ubuntu:~$ strace ls
execve("/usr/bin/ls", ["ls"], 0x7ffe76d6d020 /* 48 vars */) = 0
brk(NULL) = 0x55dca912f000
arch_prctl(0x3001 /* ARCH_??? */, 0x7fffb5cf4f70) = -1 EINVAL (Invalid argument)
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=89543, ...}) = 0
mmap(NULL, 89543, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f7aac2b7000
close(3) = 0
mprotect(0x7f7aac290000, 135168, PROT_NONE) = 0
pread64(3,"\4\0\0\0\24\0\0\0\3\0\0\0GNU\0\\\373\211m\324\17\220\252\214l\213\270V\0M\37"..., 68, 824) = 68
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f7aabfdc000
arch_prctl(ARCH_SET_FS, 0x7f7aabfdd400) = 0
mprotect(0x7f7aac280000, 16384, PROT_READ) = 0
munmap(0x7f7aac2b7000, 89543) = 0
set_tid_address(0x7f7aabfdd6d0) = 2021
set_robust_list(0x7f7aabfdd6e0, 24) = 0
rt_sigaction(SIGRT_1, {sa_handler=0x7f7aabfe4c90, sa_mask=[], sa_flags=SA_RESTORER|SA_RESTART|SA_SIGINFO, sa_restorer=0x7f7aabff2420}, NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0
prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0
statfs("/sys/fs/selinux", 0x7fffb5cf4ec0) = -1 ENOENT (No such file or directory)
brk(NULL) = 0x55dca912f000
brk(0x55dca9150000) = 0x55dca9150000
openat(AT_FDCWD, "/proc/filesystems", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
read(3, "nodev\tsysfs\nnodev\ttmpfs\nnodev\tbd"..., 1024) = 387
read(3, "", 1024) = 0
ioctl(1, TIOCGWINSZ, {ws_row=45, ws_col=203, ws_xpixel=0, ws_ypixel=0}) = 0
getdents64(3, /* 24 entries */, 32768) = 768
close(3) = 0
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(0x88, 0), ...}) = 0
write(1, "1.txt Desktop\tDocuments Downlo"..., 871.txt Desktop Documents Downloads Music Pictures Public share Templates Videos
) = 87
close(1) = 0
close(2) = 0
exit_group(0) = ?
+++ exited with 0 +++
3,监控信号:strace -p pid -e trace=signal -e trace=sigsegv
示例 1:监控进程接收到的所有信号
假设进程 ID 为 1234
,可以使用以下命令:
strace -p 1234 -e trace=signal -e trace=sigsegv
lark@ubuntu:~$ kill -9 3242
lark@ubuntu:~$ strace -p 3242 -e trace=signal
strace: Process 3242 attached
+++ killed by SIGKILL +++
参数trace=sigsegv可选择。
4,查看耗时 strace -T ls
ark@ubuntu:~$ strace -T ls
execve("/usr/bin/ls", ["ls"], 0x7ffe710124f8 /* 49 vars */) = 0 <0.000544>
brk(NULL) = 0x56352a5b2000 <0.000145>
arch_prctl(0x3001 /* ARCH_??? */, 0x7ffe72b0ca50) = -1 EINVAL (Invalid argument) <0.000143>
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) <0.000188>
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 <0.000167>
fstat(3, {st_mode=S_IFREG|0644, st_size=89543, ...}) = 0 <0.000144>
mmap(NULL, 89543, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f9c94839000 <0.000328>
close(3) = 0 <0.000112>
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libselinux.so.1", O_RDONLY|O_CLOEXEC) = 3 <0.000093>
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0@p\0\0\0\0\0\0"..., 832) = 832 <0.000059>
fstat(3, {st_mode=S_IFREG|0644, st_size=163200, ...}) = 0 <0.000096>
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f9c94837000 <0.000060>
mmap(NULL, 174600, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f9c9480c000 <0.000061>
查看ls系统的耗时 strace -tt ls,使用 -tt
显示微秒级时间戳
lark@ubuntu:~$ strace -tt ls
07:03:25.416052 execve("/usr/bin/ls", ["ls"], 0x7ffc6f804288 /* 49 vars */) = 0
07:03:25.416881 brk(NULL) = 0x56445be62000
07:03:25.417351 arch_prctl(0x3001 /* ARCH_??? */, 0x7ffccbd1e540) = -1 EINVAL (Invalid argument)
07:03:25.418487 access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
07:03:25.418825 openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
07:03:25.419105 fstat(3, {st_mode=S_IFREG|0644, st_size=89543, ...}) = 0
07:03:25.419404 mmap(NULL, 89543, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f1ede6b8000
07:03:25.419930 close(3) = 0
07:03:25.420121 openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libselinux.so.1", O_RDONLY|O_CLOEXEC) = 3
07:03:25.420316 read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0@p\0\0\0\0\0\0"..., 832) = 832
07:03:25.420479 fstat(3, {st_mode=S_IFREG|0644, st_size=163200, ...}) = 0
5,系统调用次数统计
strace -c
参数能够对程序执行过程中的系统调用进行统计分析,输出以下信息:
- 调用次数:每个系统调用被调用的总次数。
- 错误次数:每个系统调用失败的次数。
- 总耗时:每个系统调用所消耗的总时间。
strace -c ./my_program
查看my_program(while(1)循环函数)的调用次数
lark@ubuntu:~$ strace -c ./a.out
^Cstrace: Process 3382 detached
% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ----------------
0.00 0.000000 0 1 read
0.00 0.000000 0 2 close
0.00 0.000000 0 2 fstat
0.00 0.000000 0 7 mmap
0.00 0.000000 0 3 mprotect
0.00 0.000000 0 1 munmap
0.00 0.000000 0 1 brk
0.00 0.000000 0 6 pread64
0.00 0.000000 0 1 1 access
0.00 0.000000 0 1 execve
0.00 0.000000 0 2 1 arch_prctl
0.00 0.000000 0 2 openat
------ ----------- ----------- --------- --------- ----------------
100.00 0.000000 29 2 total
6,strace 的 -d
参数用于输出调试信息,帮助用户更清晰地理解系统调用的细节。这些信息包括:
- 系统调用的名称和参数。
- 调用的返回值。
- 错误信息(如果有)。
- 时间戳(如果与
-tt
参数结合使用)。