SpringBoot 集成 LangChain4j RAG PostgreSQL 搜索
SpringBoot 集成 LangChain4j RAG PostgreSQL 搜索
- 1 依赖
- 2 配置
- 3 代码
- 1 RagConf
- 2 RagController
- 4 测试
LangChain4j 提供了一系列官方封装的 ONNX 模型包,全部可离线使用。
模型名 | artifactId | 语言 | 向量维度 | 特点 |
---|---|---|---|---|
all-MiniLM-L6-v2 | langchain4j-embeddings-all-minilm-l6-v2 | 英文 | 384 | 轻量、快速、英文检索推荐 |
bge-small-en-v1.5-q | langchain4j-embeddings-bge-small-en-v15-q | 英文 | 384 | 比MiniLM准确度更高(特别是长句) |
bge-small-zh-v1.5-q | langchain4j-embeddings-bge-small-zh-v15-q | 中文 | 384 | 中文优化模型,性能优异 |
bge-large-zh-v1.5-q | langchain4j-embeddings-bge-large-zh-v15-q | 中文 | 1024 | 高精度中文语义模型 |
e5-small-v2 | langchain4j-embeddings-e5-small-v2 | 英文 | 384 | 优化了通用信息检索性能(英文) |
gte-small | langchain4j-embeddings-gte-small | 英文 | 384 | 平衡速度与精度的通用英文模型 |
gte-base | langchain4j-embeddings-gte-base | 英文 | 768 | 准确度更高,速度略慢 |
gte-large | langchain4j-embeddings-gte-large | 英文 | 1024 | 高精度版本(对语义检索更强) |
bge-m3 | langchain4j-embeddings-bge-m3 | 多语言 | 1024 | 多语言语义统一空间 |
1 依赖
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0"xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"><modelVersion>4.0.0</modelVersion><parent><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-parent</artifactId><version>3.5.6</version><relativePath/> <!-- lookup parent from repository --></parent><groupId>com.xu</groupId><artifactId>lang-chain-pg</artifactId><version>1.0.0</version><name>lang-chain-pg</name><description>Demo project for Spring Boot</description><properties><java.version>25</java.version></properties><dependencyManagement><dependencies><dependency><groupId>dev.langchain4j</groupId><artifactId>langchain4j-bom</artifactId><version>1.6.0</version><type>pom</type><scope>import</scope></dependency></dependencies></dependencyManagement><dependencies><!--Spring Boot 的 Web starter--><dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-web</artifactId></dependency><!--LangChain4j 的 Spring Boot starter--><dependency><groupId>dev.langchain4j</groupId><artifactId>langchain4j-spring-boot-starter</artifactId></dependency><!--LangChain4j 针对 OpenAI 的 Spring Boot starter--><dependency><groupId>dev.langchain4j</groupId><artifactId>langchain4j-open-ai-spring-boot-starter</artifactId></dependency><!--LangChain4j 对 PostgreSQL 向量扩展(pgvector)的支持库--><dependency><groupId>dev.langchain4j</groupId><artifactId>langchain4j-pgvector</artifactId></dependency><!-- 本地模型 --><dependency><groupId>dev.langchain4j</groupId><artifactId>langchain4j-embeddings-bge-small-zh-v15-q</artifactId><version>1.7.1-beta14</version></dependency><!-- 文档解析 --><dependency><groupId>dev.langchain4j</groupId><artifactId>langchain4j-document-parser-apache-tika</artifactId></dependency><!--Spring Boot 的开发工具,提供热部署、自动重启等功能,加速开发过程--><dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-devtools</artifactId><scope>runtime</scope><optional>true</optional></dependency><!--简化 Java 代码的工具库--><dependency><groupId>org.projectlombok</groupId><artifactId>lombok</artifactId><optional>true</optional></dependency><!--Spring Boot 的测试 starter--><dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-test</artifactId><scope>test</scope></dependency></dependencies><build><plugins><plugin><groupId>org.apache.maven.plugins</groupId><artifactId>maven-compiler-plugin</artifactId><configuration><annotationProcessorPaths><path><groupId>org.projectlombok</groupId><artifactId>lombok</artifactId></path></annotationProcessorPaths></configuration></plugin><plugin><groupId>org.springframework.boot</groupId><artifactId>spring-boot-maven-plugin</artifactId><configuration><excludes><exclude><groupId>org.projectlombok</groupId><artifactId>lombok</artifactId></exclude></excludes></configuration></plugin></plugins></build></project>
2 配置
spring:application:name: lang-chain-rag
3 代码
1 RagConf
package com.xu.conf;import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.model.embedding.onnx.bgesmallzhv15q.BgeSmallZhV15QuantizedEmbeddingModelFactory;
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;@Configuration
public class RagConf {@Beanpublic EmbeddingModel embeddingModel() {//return new AllMiniLmL6V2EmbeddingModelFactory().create();// 使用 BGE-small-zh-v1.5-quantized 模型return new BgeSmallZhV15QuantizedEmbeddingModelFactory().create();}@Beanpublic EmbeddingStore<TextSegment> embeddingStore() {return PgVectorEmbeddingStore.builder().dropTableFirst(true).createTable(true).table("embedding").user("postgres").host("localhost").password("123456").database("postgres").port(5432).dimension(512).build();}}
2 RagController
package com.xu.controller;import dev.langchain4j.data.document.DocumentSplitter;
import dev.langchain4j.data.document.loader.FileSystemDocumentLoader;
import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser;
import dev.langchain4j.data.document.splitter.DocumentByWordSplitter;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.EmbeddingStoreIngestor;
import lombok.AllArgsConstructor;
import org.springframework.http.ResponseEntity;
import org.springframework.util.CollectionUtils;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;import java.util.HashMap;
import java.util.Map;
import java.util.stream.Collectors;@RestController
@AllArgsConstructor
@RequestMapping("/rag")
public class RagController {private final EmbeddingModel embeddingModel;private final EmbeddingStore<TextSegment> embeddingStore;/*** RAG数据入库** @return 数量*/@GetMapping("/save")public Object chat() {// 1️⃣ 使用 Tika 解析 docx/pdf 等文件var documents = FileSystemDocumentLoader.loadDocuments("D:\\SourceCode\\简历",new ApacheTikaDocumentParser());// 2️⃣ 定义自定义的文本拆分器 chunkSize=100 表示每段最大 500 tokens,overlap=30 表示重叠 30 tokensvar splitter = new DocumentByWordSplitter(100, 30);// 3️⃣ 构建带自定义拆分器的 ingestorvar ingestor = EmbeddingStoreIngestor.builder().documentSplitter(splitter).embeddingStore(embeddingStore).build();// 4️⃣ 执行嵌入生成与存储ingestor.ingest(documents);return documents.size();}/*** RAG数据查询** @param query 查询* @return 结果*/@GetMapping("/search")public Object search(@RequestParam String query) {var searchRequest = EmbeddingSearchRequest.builder().queryEmbedding(embeddingModel.embed(TextSegment.from(query)).content()).maxResults(5).minScore(0.6).build();var matches = embeddingStore.search(searchRequest).matches();var results = matches.stream().map(match -> {Map<String, Object> map = new HashMap<>();map.put("embeddingScore", match.score());map.put("embeddingId", match.embeddingId());return map;}).collect(Collectors.toList());return ResponseEntity.ok(results);}}