DeepSeek辅助编写转换DuckDB explain_analyze_json 格式执行计划到postgresql_plan 的程序
explain analyze的json格式执行计划的标签命名与explain 的json计划不同,我让DeepSeek单独为它编写一个转换到postgresql格式的程序,输出第一层的children的children下所有信息,其余功能不变。
他给出的程序如下:
import json
import sysdef explain_analyze_json_to_postgresql_plan(json_str):"""将 EXPLAIN ANALYZE 的 JSON 格式转换为 PostgreSQL 格式"""try:plan_data = json.loads(json_str)except json.JSONDecodeError as e:return f"错误:JSON格式不正确 - {e}"# 递归处理执行计划节点def process_node(node, level=0):indent = " " * (level * 2)result = []# 使用 operator_name 或 operator_type 作为节点名称node_name = node.get("operator_name") or node.get("operator_type", "UNKNOWN")# 构建节点描述node_desc = f"{indent}{node_name}"# 添加额外信息info_parts = []# 处理时间信息if "operator_timing" in node and node["operator_timing"] > 0:timing = node["operator_timing"]info_parts.append(f"time={timing:.6f}")# 处理行数信息if "operator_cardinality" in node:cardinality = node["operator_cardinality"]info_parts.append(f"rows={cardinality}")# 处理投影信息extra_info = node.get("extra_info", {})if "Projections" in extra_info:projections = extra_info["Projections"]info_parts.append(f"Projections: {projections}")# 处理聚合信息if "Aggregates" in extra_info and extra_info["Aggregates"]:info_parts.append(f"Aggregates: {extra_info['Aggregates']}")# 处理预估基数if "Estimated Cardinality" in extra_info:estimated = extra_info["Estimated Cardinality"]info_parts.append(f"Estimated: {estimated}")# 处理扫描行数if "operator_rows_scanned" in node and node["operator_rows_scanned"] > 0:scanned = node["operator_rows_scanned"]info_parts.append(f"scanned={scanned}")# 如果有额外信息,添加到节点描述中if info_parts:node_desc += f" ({', '.join(info_parts)})"result.append(node_desc)# 递归处理子节点for child in node.get("children", []):result.extend(process_node(child, level + 1))return result# 生成完整的执行计划plan_lines = ["QUERY PLAN", "-" * 50]# 获取第一层的 children 的 children(跳过 EXPLAIN_ANALYZE 外层)if "children" in plan_data and len(plan_data["children"]) > 0:explain_analyze_node = plan_data["children"][0]if "children" in explain_analyze_node and len(explain_analyze_node["children"]) > 0:# 处理所有内层节点for inner_node in explain_analyze_node["children"]:plan_lines.extend(process_node(inner_node))else:plan_lines.append("No execution plan found in the JSON data")else:plan_lines.append("Invalid JSON structure for EXPLAIN ANALYZE")return "\n".join(plan_lines)def convert_explain_analyze_json_file(json_file_path, output_file_path=None):"""从外部 EXPLAIN ANALYZE JSON 文件读取执行计划并转换为 PostgreSQL 格式Args:json_file_path (str): JSON文件路径output_file_path (str, optional): 输出文件路径Returns:str: PostgreSQL格式的执行计划"""try:# 读取JSON文件with open(json_file_path, 'r', encoding='utf-8') as file:json_content = file.read()# 转换为PostgreSQL格式postgresql_plan = explain_analyze_json_to_postgresql_plan(json_content)# 输出到控制台if output_file_path:# 保存到文件with open(output_file_path, 'w', encoding='utf-8') as file:file.write(postgresql_plan)print(f"执行计划已保存到: {output_file_path}")else:# 输出到屏幕print(postgresql_plan)return postgresql_planexcept FileNotFoundError:error_msg = f"错误:找不到文件 {json_file_path}"print(error_msg)return error_msgexcept Exception as e:error_msg = f"错误:处理文件时发生异常 - {e}"print(error_msg)return error_msg# 修改后的main部分
if __name__ == "__main__":if len(sys.argv) < 2:print("使用方法: python script.py <explain_analyze_json文件路径> [pg计划输出文件路径]")sys.exit(1)json_file = sys.argv[1]output_file = sys.argv[2] if len(sys.argv) > 2 else Noneconvert_explain_analyze_json_file(json_file, output_file)
使用步骤
1.生成explain_analyze计划,注意生成json格式时,analyze要写在括号中
.mode list
.output planb.json
explain (analyze, format json) select count(*) from (values(1),(2))t(a);
.output
.exit
生成的json文件没有标题,如下所示:
{"query_name": "","blocked_thread_time": 0.0,"system_peak_buffer_memory": 0,"system_peak_temp_dir_size": 0,"cpu_time": 0.0,"extra_info": {},"cumulative_cardinality": 0,"cumulative_rows_scanned": 0,"result_set_size": 0,"latency": 0.0,"rows_returned": 0,"total_bytes_read": 0,"total_bytes_written": 0,"children": [{"cpu_time": 0.0,"extra_info": {},"cumulative_cardinality": 0,"operator_name": "EXPLAIN_ANALYZE","operator_type": "EXPLAIN_ANALYZE","operator_cardinality": 0,"cumulative_rows_scanned": 0,"operator_rows_scanned": 0,"operator_timing": 0.0,"result_set_size": 0,"total_bytes_read": 0,"total_bytes_written": 0,"children": [{"cpu_time": 0.0,"extra_info": {"Aggregates": "count_star()"},"cumulative_cardinality": 0,"operator_name": "UNGROUPED_AGGREGATE","operator_type": "UNGROUPED_AGGREGATE","operator_cardinality": 1,"cumulative_rows_scanned": 0,"operator_rows_scanned": 0,"operator_timing": 0.0000059,"result_set_size": 8,"total_bytes_read": 0,"total_bytes_written": 0,"children": [{"cpu_time": 0.0,"extra_info": {"Projections": "42","Estimated Cardinality": "2"},"cumulative_cardinality": 0,"operator_name": "PROJECTION","operator_type": "PROJECTION","operator_cardinality": 2,"cumulative_rows_scanned": 0,"operator_rows_scanned": 0,"operator_timing": 6e-7,"result_set_size": 8,"total_bytes_read": 0,"total_bytes_written": 0,"children": [{"cpu_time": 0.0,"extra_info": {"Estimated Cardinality": "2"},"cumulative_cardinality": 0,"operator_name": "COLUMN_DATA_SCAN","operator_type": "COLUMN_DATA_SCAN","operator_cardinality": 2,"cumulative_rows_scanned": 0,"operator_rows_scanned": 0,"operator_timing": 0.0000024,"result_set_size": 8,"total_bytes_read": 0,"total_bytes_written": 0,"children": []}]}]}]}]
}
2.转换
C:\d>python convplana.py planb.json planbpg.txt
执行计划已保存到: planbpg.txt
保存的计划内容如下
QUERY PLAN
--------------------------------------------------
UNGROUPED_AGGREGATE (time=0.000006, rows=1, Aggregates: count_star())PROJECTION (time=0.000001, rows=2, Projections: 42, Estimated: 2)COLUMN_DATA_SCAN (time=0.000002, rows=2, Estimated: 2)
将其与如下explain计划比较,
QUERY PLAN
--------------------------------------------------
UNGROUPED_AGGREGATE (Aggregates: count_star())PROJECTION (Projections: 4, 2, Estimated Rows: 2)COLUMN_DATA_SCAN (Estimated Rows: 2)
可见explain_analyze计划提供了实际执行时间和实际读取行数,更准确。略作修改还能提取到更多json中的信息。
