大模型系列-dify
大模型系列-dify
- 1. 批量添加分段
1. 批量添加分段
把excel里面的Q/A内容批量插入知识库,代码的方式插入有知识库,其中问题整理.xlsx
excel文件格式为问题、答案两列:格式如下:
问题1 答案1
问题2 答案2
import requests
import json
import pandas as pd# pip install pandas openpyxldef send_post_request(document_id="9e1538d6-1e14-4543-b49f-e890154b0737", question="你好", answer="你好!"):# 请求URLurl = "http://ip:port/console/api/datasets/075ecd79-b1a5-4d2e-a466-2012c0dda14c/documents/9e1538d6-1e14-4543-b49f-e890154b0737/segment"# 请求头token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyX2lkIjoiMzhmMzE1M2YtZDhiOS00MTgxLWI0ZmItZjU2MzdkMGU0Mzk5IiwiZXhwIjoxNzYwNDI2NjU1LCJpc3MiOiJTRUxGX0hPU1RFRCIsInN1YiI6IkNvbnNvbGUgQVBJIFBhc3Nwb3J0In0.F62P8qvUvC_bAxc_9KjQNcFN3ucyiB3Bi_fZL_Q_KKA"headers = {# 2. 确保Bearer后面有空格,格式正确"Authorization": f"Bearer {token}","Content-Type": "application/json"}# 请求数据data = {"id": "5d640c23-1495-4751-aea7-554fc588df31","position": 5,"document_id": document_id,"content": question,"sign_content": "c","answer": answer,"word_count": 2,"tokens": 1,"keywords": None,"index_node_id": "f029fc4b-6014-4373-a89a-6faa4f700f78","index_node_hash": "2f7cfb4fa6fef769c6daff15772f3b1193b19b20d7a1534ca6d2542658ec69e5","hit_count": 0,"enabled": True,"disabled_at": None,"disabled_by": None,"status": "completed","created_by": "38f3153f-d8b9-4181-b4fb-f5637d0e4399","created_at": 1760422745,"updated_at": 1760422745,"updated_by": None,"indexing_at": 1760422745,"completed_at": 1760422745,"error": None,"stopped_at": None,"child_chunks": []}try:# 发送POST请求response = requests.post(url, headers=headers, json=data)# 检查响应状态码response.raise_for_status()# 打印响应内容print("请求成功!")print("响应状态码:", response.status_code)print("响应内容:", json.dumps(response.json(), indent=2, ensure_ascii=False))return response.json()except requests.exceptions.RequestException as e:print(f"请求失败: {e}")return Nonedef batch_insert():# 读取Excel文件file_path = '/home/geekplusa/ai/projects/data/问题整理.xlsx'df = pd.read_excel(file_path, sheet_name='内部知识库')# 初始化结果列表qa_list = []# 遍历每一行for index, row in df.iterrows():question = row.iloc[0] # 第二列是问题answer = row.iloc[1] # 第三列是答案send_post_request(question=question, answer=answer)print(f"问题: {question}, 答案: {answer}")if __name__ == "__main__":batch_insert()