import pandas as pd
import json
import os
def excel_to_json_append(excel_path, json_path):
# 1. 读取Excel数据到字典列表
df = pd.read_excel(excel_path, usecols=["question", "answer"])
new_data = [
{"question": str(row["question"]), "answer": str(row["answer"])}
for _, row in df.iterrows()
]
# 2. 读取原有JSON数据(如果文件存在)
existing_data = []
if os.path.exists(json_path):
try:
with open(json_path, "r", encoding="utf-8") as f:
existing_data = json.load(f)
except json.JSONDecodeError:
print("警告:JSON文件内容格式异常,将覆盖写入")
# 3. 合并新旧数据(可选去重逻辑)
combined_data = existing_data + new_data
# # 4.去重
# seen = set()
# unique_data = []
# for item in combined_data:
# key = item["question"]
# if key not in seen:
# seen.add(key)
# unique_data.append(item)
# combined_data = unique_data
# 4. 写入更新后的JSON文件
with open(json_path, "w", encoding="utf-8") as f:
json.dump(combined_data, f, ensure_ascii=False, indent=2)
print(f"成功追加{len(new_data)}条数据到{json_path}")
# 示例用法
excel_to_json_append("input.xlsx", "train_qa.json")