clickhouse-client --query "SELECT query_id, query, elapsed, read_rows FROM system.processes"
查询时间<1s
查询时间>10s
CPU使用率高
线程数过多
clickhouse-client --query "SELECT * FROM system.processes"
并发查询<10
并发查询>50
CPU使用率高
压缩算法CPU密集
clickhouse-client --query "SELECT format, sum(compressed_size) FROM system.parts GROUP BY format"
LZ4
ZSTD(高CPU)
CPU使用率高
后台合并任务
clickhouse-client --query "SELECT * FROM system.merges"
合并数<5
合并数>20
CPU使用率高
索引构建
clickhouse-client --query "SELECT * FROM system.mutations"
无进行中
有进行中
CPU使用率高
数据导入
iostat -x 1
%util<70%
%util>90%
📋 常用排查命令速查
1. 查看当前运行的查询
# 查看所有正在执行的查询
clickhouse-client --query "SELECT query_id, user, address, query, elapsed, read_rows, read_bytes, memory_usage, formatReadableSize(memory_usage) as memory FROM system.processes ORDER BY elapsed DESC"# 查看查询详情(包含CPU时间)
clickhouse-client --query "SELECT query_id, query, elapsed, read_rows, formatReadableSize(read_bytes) as read_bytes, formatReadableSize(memory_usage) as memory, formatReadableSize(read_bytes/elapsed) as read_speed FROM system.processes WHERE query != '' ORDER BY elapsed DESC FORMAT Vertical"# 查看长时间运行的查询
clickhouse-client --query "SELECT query_id, user, query, elapsed, read_rows FROM system.processes WHERE elapsed > 10 ORDER BY elapsed DESC"
2. 查看查询历史(慢查询)
# 查看最近100条慢查询(超过1秒)
clickhouse-client --query "SELECT query_id, user, query, query_start_time, query_duration_ms, read_rows, read_bytes, memory_usage, formatReadableSize(memory_usage) as memory FROM system.query_log WHERE type=2 AND query_duration_ms > 1000 ORDER BY query_duration_ms DESC LIMIT 100 FORMAT Vertical"# 查看最近1小时的慢查询统计
clickhouse-client --query "SELECT toStartOfHour(event_time) as hour, count() as slow_queries, avg(query_duration_ms) as avg_duration_ms, max(query_duration_ms) as max_duration_ms FROM system.query_log WHERE type=2 AND event_time > now() - INTERVAL 1 HOUR AND query_duration_ms > 1000 GROUP BY hour ORDER BY hour DESC"
# 查看正在进行的合并任务
clickhouse-client --query "SELECT database, table, elapsed, progress, merge_type, merge_algorithm, num_parts_to_merge, total_rows_to_merge, total_bytes_to_merge, formatReadableSize(total_bytes_to_merge) as total_size FROM system.merges ORDER BY elapsed DESC"# 查看正在进行的Mutation任务
clickhouse-client --query "SELECT database, table, mutation_id, command, create_time, is_done, latest_failed_part, latest_fail_time, latest_fail_reason FROM system.mutations WHERE is_done=0 ORDER BY create_time DESC"# 查看后台任务统计
clickhouse-client --query "SELECT count() as active_merges FROM system.merges"
clickhouse-client --query "SELECT count() as active_mutations FROM system.mutations WHERE is_done=0"
5. 查看表和数据分区状态
# 查看所有表的分区信息
clickhouse-client --query "SELECT database, table, partition, name, rows, bytes_on_disk, formatReadableSize(bytes_on_disk) as size, modification_time FROM system.parts WHERE active=1 ORDER BY bytes_on_disk DESC LIMIT 50"# 查看表的数据压缩情况
clickhouse-client --query "SELECT database, table, format, count() as parts_count, sum(rows) as total_rows, sum(bytes_on_disk) as total_bytes, formatReadableSize(sum(bytes_on_disk)) as total_size, avg(compression_ratio) as avg_compression FROM system.parts WHERE active=1 GROUP BY database, table, format ORDER BY total_bytes DESC"# 查看分区大小分布(找出大分区)
clickhouse-client --query "SELECT database, table, partition, count() as parts, sum(rows) as rows, formatReadableSize(sum(bytes_on_disk)) as size FROM system.parts WHERE active=1 GROUP BY database, table, partition ORDER BY sum(bytes_on_disk) DESC LIMIT 20"
6. 查看系统配置
# 查看最大并发查询数
clickhouse-client --query "SELECT name, value FROM system.settings WHERE name LIKE '%max_concurrent%'"# 查看线程池配置
clickhouse-client --query "SELECT name, value FROM system.settings WHERE name LIKE '%thread%' OR name LIKE '%pool%'"# 查看查询限制配置
clickhouse-client --query "SELECT name, value FROM system.settings WHERE name LIKE '%max_%' AND (name LIKE '%query%' OR name LIKE '%memory%' OR name LIKE '%time%')"
# 1. 查看当前并发查询
clickhouse-client --query "SELECT count() as concurrent_queries FROM system.processes WHERE query != ''"# 2. 查看正在运行的查询
clickhouse-client --query "SELECT query_id, user, query, elapsed FROM system.processes WHERE query != '' ORDER BY elapsed DESC"# 3. 临时降低并发限制(在config.xml中修改)# <max_concurrent_queries>8</max_concurrent_queries># 4. 或者kill长时间运行的查询
clickhouse-client --query "KILL QUERY WHERE query_id='xxx'"
问题2:慢查询导致CPU高
# 1. 找出慢查询
clickhouse-client --query "SELECT query_id, query, query_duration_ms, read_rows FROM system.query_log WHERE type=2 AND query_duration_ms > 5000 ORDER BY query_duration_ms DESC LIMIT 10"# 2. 分析慢查询(查看执行计划)
clickhouse-client --query "EXPLAIN SELECT ..."# 替换为实际慢查询# 3. 优化查询(添加索引、优化WHERE条件、减少读取数据量)# 4. 设置查询超时
clickhouse-client --query "SET max_execution_time=60"# 60秒超时
问题3:后台合并任务导致CPU高
# 1. 查看合并任务
clickhouse-client --query "SELECT database, table, elapsed, progress, total_rows_to_merge FROM system.merges ORDER BY elapsed DESC"# 2. 调整合并策略(在config.xml中)# <max_bytes_to_merge_at_max_space_in_pool>161061273600</max_bytes_to_merge_at_max_space_in_pool># <background_pool_size>16</background_pool_size># 3. 等待合并完成或手动触发合并
clickhouse-client --query "OPTIMIZE TABLE database.table FINAL"
问题4:Mutation任务导致CPU高
# 1. 查看Mutation任务
clickhouse-client --query "SELECT database, table, mutation_id, command, create_time, is_done FROM system.mutations WHERE is_done=0"# 2. 如果Mutation卡住,可以取消(谨慎操作)# clickhouse-client --query "KILL MUTATION WHERE database='xxx' AND table='xxx' AND mutation_id='xxx'"# 3. 避免在高峰期执行大量Mutation
问题5:压缩算法CPU密集
# 1. 查看当前使用的压缩算法
clickhouse-client --query "SELECT format, count() as parts, sum(bytes_on_disk) as total_bytes FROM system.parts WHERE active=1 GROUP BY format"# 2. 如果使用ZSTD等高CPU压缩,考虑改为LZ4# ALTER TABLE table_name MODIFY COLUMN column_name String CODEC(LZ4)# 3. 或者调整压缩级别# ALTER TABLE table_name MODIFY COLUMN column_name String CODEC(ZSTD(1)) # 降低压缩级别
问题6:数据导入导致CPU高
# 1. 查看导入任务
clickhouse-client --query "SELECT query_id, query, elapsed FROM system.processes WHERE query LIKE '%INSERT%'"# 2. 降低插入并发# SET max_insert_threads=4# 3. 批量插入而不是单条插入# INSERT INTO table VALUES (...), (...), (...) # 批量插入