当前位置：首页 > news >正文

excel文件有两列，循环读取文件两列赋值到字典列表。字典的有两个key,分别为question和answer。将最终结果追加到json文件

news 2025/7/19 8:22:41

import pandas as pd
import json
import os

def excel_to_json_append(excel_path, json_path):
    # 1. 读取Excel数据到字典列表
    df = pd.read_excel(excel_path, usecols=["question", "answer"])
    new_data = [
        {"question": str(row["question"]), "answer": str(row["answer"])}
        for _, row in df.iterrows()
    ]

    # 2. 读取原有JSON数据（如果文件存在）
    existing_data = []
    if os.path.exists(json_path):
        try:
            with open(json_path, "r", encoding="utf-8") as f:
                existing_data = json.load(f)
        except json.JSONDecodeError:
            print("警告：JSON文件内容格式异常，将覆盖写入")

    # 3. 合并新旧数据（可选去重逻辑）
    combined_data = existing_data + new_data
    # # 4.去重
    # seen = set()
    # unique_data = []
    # for item in combined_data:
    #     key = item["question"]
    #     if key not in seen:
    #         seen.add(key)
    #         unique_data.append(item)
    # combined_data = unique_data

    # 4. 写入更新后的JSON文件
    with open(json_path, "w", encoding="utf-8") as f:
        json.dump(combined_data, f, ensure_ascii=False, indent=2)

    print(f"成功追加{len(new_data)}条数据到{json_path}")

# 示例用法
excel_to_json_append("input.xlsx", "train_qa.json")

查看全文

http://www.dtcms.com/a/77283.html