import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer'''
效果理想
'''class SelfLearningQASystem:def __init__(self, excel_path):# 读取Excel文件self.df = pd.read_excel(excel_path)# 假定有 'question' 和 'answer' 两列self.questions = self.df['question'].astype(str).tolist()self.answers = self.df['answer'].astype(str).tolist()# 使用预训练的多语言句子嵌入模型self.model = SentenceTransformer(r'D:\modelproject\paraphrase-multilingual-MiniLM-L12-v2')# 将所有问题向量化self.question_vectors = self.model.encode(self.questions)def ask(self, user_question):# 将用户问题向量化user_vector = self.model.encode(user_question)# 确保向量是二维的if len(user_vector.shape) == 1:user_vector = user_vector.reshape(1, -1)# 如果question_vectors是一维的,转换为二维question_vectors = self.question_vectorsif len(question_vectors.shape) == 1:question_vectors = question_vectors.reshape(1, -1)# 计算用户问题与所有已有问题的相似度similarities = cosine_similarity(user_vector, question_vectors)# print(similarities)# 找出最相似的问题的索引most_similar_idx = similarities.argmax()# print(most_similar_idx)max_similarity = similarities[0, most_similar_idx]print("问题相似度",max_similarity)# 可选择设置一个阈值,低于则认为没有匹配到threshold = 0.5 # 可调,根据实际情况调整if max_similarity < threshold:return "抱歉,我暂时无法回答这个问题。您可以更新我的知识库。"# 返回最相似问题对应的答案return self.answers[most_similar_idx]def add_qa_pair(self, new_question, new_answer, excel_path=None):# 自学习追加# 追加到内存self.questions.append(new_question)self.answers.append(new_answer)# 重新训练向量化self.question_vectors = self.model.encode(self.questions)# 可选:将新数据写入 Excelif excel_path:new_data = pd.DataFrame({'question': [new_question],'answer': [new_answer]})try:existing = pd.read_excel(excel_path)updated = pd.concat([existing, new_data], ignore_index=True)updated.to_excel(excel_path, index=False)except FileNotFoundError:new_data.to_excel(excel_path, index=False)if __name__ == "__main__":# 初始化问答系统,传入你的Excel路径inputfile = "input.xlsx"qa_system = SelfLearningQASystem(inputfile)qa_system.add_qa_pair("今天周几","星期三",inputfile)print("=== 欢迎使用自学习问答系统,输入 '退出' 结束对话 ===")while True:user_input = input("\n你:")if user_input.strip() in ['退出', 'exit', 'quit']:print("系统:再见!")breakanswer = qa_system.ask(user_input)print(f"系统:{answer}")