Java资源持续监控(依赖 jps)
说明:通过 Java 类名定时循环监控 java 进程资源以及机器内存情况占用并写入文件
例如:每 60s 记录 Spark 执行器资源使用情况:
watch -n 60 "./watch_jps_resource.sh CoarseGrainedExecutorBackend logs"
结果:在 logs 目录中,以 PID 为名写出每个执行器资源,并将机器内存写入 free.log 中。
输出:控制台打印资源变化情况。
脚本内容:
#!/bin/sh# watch -d "./watch_jps_resource.sh CoarseGrainedExecutorBackend logs"# check params
if [ $# -lt 2 ]; thenecho "Usage: $0 <process_name> <output_directory> [ps_options]"exit 1
fi# get params
PROCESS_NAME=$1
OUTPUT_DIR=$2
shift 2# create output dir
mkdir -p "$OUTPUT_DIR"# find ids
pids=$(jps | grep -i "$PROCESS_NAME" | cut -d ' ' -f 1)for pid in $pids
doCURRENT_TIME=$(date +"%Y-%m-%d %H:%M:%S")OUTPUT_FILE="$OUTPUT_DIR/$pid"# 如果是首次运行,添加标题行if [ ! -f "$OUTPUT_FILE" ]; thenecho "TIME USER PID CPU% MEM% MEM_GB MEM_KB DISK_READ DISK_WRITE" > "$OUTPUT_FILE"fi# 写入数据行if [ $# -eq 0 ]; thenps -q $pid -aux --no-headers | awk -v current_time="$CURRENT_TIME" '{if (NF >= 11) { # 确保有足够的字段# 获取进程信息user = $1pid = $2cpu_percent = $3mem_percent = $4rss = $6vsz = $5read_bytes = $7write_bytes = $8command = $11# 计算内存值 (GB 和 KB)mem_gb = sprintf("%.2f", rss/1024/1024) # 转换为GBmem_kb = rss # RSS已经是KB单位# 输出格式化数据 (只包含CPU、内存、磁盘占用)printf "%-20s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n",current_time, user, pid, cpu_percent, mem_percent, mem_gb, mem_kb, read_bytes, write_bytes}}' >> "$OUTPUT_FILE"elseps -q $pid "$@" --no-headers | awk -v current_time="$CURRENT_TIME" '{if (NF >= 11) { # 确保有足够的字段# 获取进程信息user = $1pid = $2cpu_percent = $3mem_percent = $4rss = $6vsz = $5read_bytes = $7write_bytes = $8command = $11# 计算内存值 (GB 和 KB)mem_gb = sprintf("%.2f", rss/1024/1024) # 转换为GBmem_kb = rss # RSS已经是KB单位# 输出格式化数据 (只包含CPU、内存、磁盘占用)printf "%-20s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n",current_time, user, pid, cpu_percent, mem_percent, mem_gb, mem_kb, read_bytes, write_bytes}}' >> "$OUTPUT_FILE"fi
done# 写入 free -g 到 free.log
FREE_LOG="$OUTPUT_DIR/free.log"if [ ! -f "$FREE_LOG" ] || [ ! -s "$FREE_LOG" ]; then# 文件不存在或为空时,写入标题行echo "TIME TOTAL_GI USED_GI FREE_GI SHARED_GI BUFFERS_GI CACHE_GI SWAP_TOTAL-GI SWAP_USED-GI SWAP_FREE-GI" > "$FREE_LOG"
fi# 获取 free -g 信息并格式化为一行
CURRENT_TIME=$(date +"%Y-%m-%d %H:%M:%S")
FREE_OUTPUT=$(free -g | awk -v current_time="$CURRENT_TIME" '
NR == 1 {# 处理标题行,不输出任何内容next
}
NR == 2 {# 处理内存数据行(Mem行)# 这里输出所有需要的列:总内存、已用、空闲、共享、缓冲区、缓存printf "%-10s %-10s %-10s %-10s %-10s %-10s %-10s", $2, $3, $4, $5, $6, $7, $8
}
NR == 3 {# 处理交换数据行(Swap行)# 继续写入 swap 总量、已用、空闲printf " %-10s %-10s %-10s", $2, $3, $4
}')
echo "$CURRENT_TIME $FREE_OUTPUT" >> "$FREE_LOG"# 提取最新数据并输出到控制台
echo "=== Resource Usage Summary ==="
echo "Time: $CURRENT_TIME"# 输出进程资源使用情况(从最后一个文件中获取最新数据)
if [ -n "$pids" ]; thenfor pid in $pids; doif [ -f "$OUTPUT_DIR/$pid" ]; then# 获取最后一行数据LAST_LINE=$(tail -n 1 "$OUTPUT_DIR/$pid")if [ -n "$LAST_LINE" ]; thenecho "Process $pid:"echo " TIME USER PID CPU% MEM% MEM_GB MEM_KB DISK_READ DISK_WRITE"echo " $LAST_LINE"fifidone
fi# 输出内存使用情况
if [ -f "$FREE_LOG" ]; thenLAST_FREE_LINE=$(tail -n 1 "$FREE_LOG")if [ -n "$LAST_FREE_LINE" ]; thenecho "Memory Status:"echo " TIME TOTAL_GI USED_GI FREE_GI SHARED_GI BUFFERS_GI CACHE_GI SWAP_TOTAL-GI SWAP_USED-GI SWAP_FREE-GI"echo " $LAST_FREE_LINE"fi
fi
