langchain4j中使用milvus向量数据库做RAG增加索引
安装milvus向量数据库
官方网址 https://milvus.io/zh
使用docker安装milvus
mkdir -p /data/docker/milvus
cd /data/docker/milvus
wget https://raw.githubusercontent.com/milvus-io/milvus/master/scripts/standalone_embed.sh#在docker中启动milvus
sh standalone_embed.sh start
docker ps -a#停止、删除
sh standalone_embed.sh stop
sh standalone_embed.sh delete
浏览器访问
http://192.168.2.205:9091/webui/
在langchain中使用milvus
- 在
pom.xml
中引入依赖
<dependency><groupId>dev.langchain4j</groupId><artifactId>langchain4j-milvus</artifactId><version>${langchain4j.version}</version></dependency>
- 设置配置信息
milvus:host: 192.168.1.131port: 19530langchain4j:community:dashscope:chat-model:api-key: ${dashscope.key}model-name: qwen-maxembedding-model:api-key: ${dashscope.key}model-name: text-embedding-v3streaming-chat-model:api-key: ${dashscope.key}model-name: qwen-plus
- 配置向量库
@Configuration
@Slf4j
public class EmbeddingStoreConfig {@Autowiredprivate EmbeddingModel embeddingModel;@Value("${milvus.host}")private String host;@Value("${milvus.port}")private Integer port;@Beanpublic EmbeddingStore embeddingStore() {log.info("==========开始创建Milvus的Collection");MilvusEmbeddingStore store = MilvusEmbeddingStore.builder().host(host).port(port).collectionName("langchain_01").dimension(1536).indexType(IndexType.FLAT).metricType(MetricType.COSINE)
// .username("username")
// .password("password").consistencyLevel(ConsistencyLevelEnum.EVENTUALLY).autoFlushOnInsert(true).idFieldName("id").textFieldName("text").metadataFieldName("metadata").vectorFieldName("vector").build();log.info("==========创建Milvus的Collection完成");return store;}}
- 使用向量库存储数据
@SpringBootTest
public class EmbeddingTest {@Autowiredprivate EmbeddingModel embeddingModel;@Autowiredprivate EmbeddingStore embeddingStore;@Testpublic void testEmbeddingModel() {Response<Embedding> embed = embeddingModel.embed("你好");System.out.println("向量维度:" + embed.content().vector().length);System.out.println("向量输出:" + embed.toString());}/*** 将文本转换成向量,然后存储到pinecone中* <p>* 参考:* https://docs.langchain4j.dev/tutorials/embedding-stores*/@Testpublic void testPineconeEmbeded() {//将文本转换成向量TextSegment segment1 = TextSegment.from("我喜欢羽毛球");Embedding embedding1 = embeddingModel.embed(segment1).content();//存入向量数据库embeddingStore.add(embedding1, segment1);TextSegment segment2 = TextSegment.from("今天天气很好");Embedding embedding2 = embeddingModel.embed(segment2).content();embeddingStore.add(embedding2, segment2);}/*** 相似度匹配*/@Testpublic void embeddingSearch() {//提问,并将问题转成向量数据Embedding queryEmbedding = embeddingModel.embed("你最喜欢的运动是什么?").content();//创建搜索请求对象EmbeddingSearchRequest searchRequest = EmbeddingSearchRequest.builder().queryEmbedding(queryEmbedding).maxResults(1) //匹配最相似的一条记录//.minScore(0.8).build();//根据搜索请求 searchRequest 在向量存储中进行相似度搜索EmbeddingSearchResult<TextSegment> searchResult =embeddingStore.search(searchRequest);//searchResult.matches():获取搜索结果中的匹配项列表。//.get(0):从匹配项列表中获取第一个匹配项EmbeddingMatch<TextSegment> embeddingMatch = searchResult.matches().get(0);//获取匹配项的相似度得分System.out.println(embeddingMatch.score()); // 0.8144288515898701//返回文本结果System.out.println(embeddingMatch.embedded().text());}@Testpublic void testUploadKnowledgeLibrary() {//使用FileSystemDocumentLoader读取指定目录下的知识库文档//并使用默认的文档解析器对文档进行解析Document document1 = FileSystemDocumentLoader.loadDocument("D:/knowledge/文档1.md");Document document2 = FileSystemDocumentLoader.loadDocument("D:/knowledge/文档2.md");Document document3 = FileSystemDocumentLoader.loadDocument("D:/knowledge/文档3.md");List<Document> documents = Arrays.asList(document1, document2, document3);//文本向量化并存入向量数据库:将每个片段进行向量化,得到一个嵌入向量EmbeddingStoreIngestor.builder().embeddingStore(embeddingStore).embeddingModel(embeddingModel).build().ingest(documents);}
}
- 配置Agent属性
@Configuration
public class AgentConfig {@Autowiredprivate MongoChatMemoryStore mongoChatMemoryStore;@Autowiredprivate EmbeddingStore embeddingStore;@Autowiredprivate EmbeddingModel embeddingModel;@Beanpublic ChatMemoryProvider chatMemoryProviderXiaozhi() {return memoryId -> MessageWindowChatMemory.builder().id(memoryId).maxMessages(20).chatMemoryStore(mongoChatMemoryStore).build();}@BeanContentRetriever contentRetriever() {// 创建一个 EmbeddingStoreContentRetriever 对象,用于从嵌入存储中检索内容return EmbeddingStoreContentRetriever.builder()// 设置用于生成嵌入向量的嵌入模型.embeddingModel(embeddingModel)// 指定要使用的嵌入存储.embeddingStore(embeddingStore)// 设置最大检索结果数量,这里表示最多返回 1 条匹配结果.maxResults(1)// 设置最小得分阈值,只有得分大于等于 0.8 的结果才会被返回.minScore(0.8)// 构建最终的 EmbeddingStoreContentRetriever 实例.build();}
}
- 配置AIService
@AiService(wiringMode = EXPLICIT,
// chatModel = "qwenChatModel",streamingChatModel = "qwenStreamingChatModel",chatMemoryProvider = "chatMemoryProviderXiaozhi",tools = "appointmentTools",contentRetriever = "contentRetriever")
public interface Agent {@SystemMessage(fromResource = "zhaozhi-prompt-template.txt")Flux<String> chat(@MemoryId Long memoryId, @UserMessage String userMessage);
}
- Controller类
@RestController
@RequestMapping("/agent")
public class AgentController {@Autowiredprivate Agent agent;@Operation(summary = "对话")@PostMapping(value = "/chat", produces = "text/stream;charset=utf-8")public Flux<String> chat(@RequestBody ChatForm chatForm) {return agent.chat(chatForm.getMemoryId(), chatForm.getMessage());}
}
源码地址:
https://gitee.com/galen.zhang/langchain-ai-demo/java-ai-langchain4j