当前位置: 首页 > news >正文

Elasticsearch高效文章搜索实践

功能

image-20250610211130171

创建索引和映射

image-20250610213446050

使用postman添加映射和查询

image-20250610213733416

查询所有的文章信息,批量导入到es索引库中

image-20250610214358841
server:port: 9999
spring:application:name: es-articledatasource:driver-class-name: com.mysql.jdbc.Driverurl: jdbc:mysql://localhost:3306/leadnews_article?useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTCusername: rootpassword: root
# 设置Mapper接口所对应的XML文件位置,如果你在Mapper接口中有自定义方法,需要进行该配置
mybatis-plus:mapper-locations: classpath*:mapper/*.xml# 设置别名包扫描路径,通过该属性可以给包中的类注册别名type-aliases-package: com.heima.model.article.pojos#自定义elasticsearch连接配置
elasticsearch:host: 192.168.200.130port: 9200

导入到es索引库

/*** 注意:数据量的导入,如果数据量过大,需要分页导入** @throws Exception*/
@Test
public void init() throws Exception {// 查询所有符合条件的文章数据List<SearchArticleVo> searchArticleVos = apArticleMapper.loadArticleList();// 批量导入到es索引库BulkRequest bulkRequest = new BulkRequest("app_info_article");for (SearchArticleVo searchArticleVo : searchArticleVos) {IndexRequest indexRequest = new IndexRequest().id(searchArticleVo.getId().toString()).source(JSON.toJSONString(searchArticleVo), XContentType.JSON);// 批量添加数据bulkRequest.add(indexRequest);}restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
}

需求说明

image-20250610221421008

搜索接口定义

image-20250610222047807

UserSearchDto

image-20250610222355123

实现步骤

image-20250612193842072

image-20250612193949445

image-20250612194110538

文章搜索服务实现

/*** es文章分页检索** @param dto* @return*/@Overridepublic ResponseResult search(UserSearchDto dto) throws IOException {//1.检查参数if(dto == null || StringUtils.isBlank(dto.getSearchWords())){return ResponseResult.errorResult(AppHttpCodeEnum.PARAM_INVALID);}ApUser user = AppThreadLocalUtil.getUser();//异步调用 保存搜索记录if(user != null && dto.getFromIndex() == 0){apUserSearchService.insert(dto.getSearchWords(), user.getId());}//2.设置查询条件SearchRequest searchRequest = new SearchRequest("app_info_article");SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();//布尔查询BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();//关键字的分词之后查询QueryStringQueryBuilder queryStringQueryBuilder = QueryBuilders.queryStringQuery(dto.getSearchWords()).field("title").field("content").defaultOperator(Operator.OR);boolQueryBuilder.must(queryStringQueryBuilder);//查询小于mindate的数据RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery("publishTime").lt(dto.getMinBehotTime().getTime());boolQueryBuilder.filter(rangeQueryBuilder);//分页查询searchSourceBuilder.from(0);searchSourceBuilder.size(dto.getPageSize());//按照发布时间倒序查询searchSourceBuilder.sort("publishTime", SortOrder.DESC);//设置高亮  titleHighlightBuilder highlightBuilder = new HighlightBuilder();highlightBuilder.field("title");highlightBuilder.preTags("<font style='color: red; font-size: inherit;'>");highlightBuilder.postTags("</font>");searchSourceBuilder.highlighter(highlightBuilder);searchSourceBuilder.query(boolQueryBuilder);searchRequest.source(searchSourceBuilder);SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);//3.结果封装返回List<Map> list = new ArrayList<>();SearchHit[] hits = searchResponse.getHits().getHits();for (SearchHit hit : hits) {String json = hit.getSourceAsString();Map map = JSON.parseObject(json, Map.class);//处理高亮if(hit.getHighlightFields() != null && hit.getHighlightFields().size() > 0){Text[] titles = hit.getHighlightFields().get("title").getFragments();String title = StringUtils.join(titles);//高亮标题map.put("h_title",title);}else {//原始标题map.put("h_title",map.get("title"));}list.add(map);}return ResponseResult.okResult(list);}

新增文章同步添加索引

/*** 创建文章索引** @param apArticle* @param content* @param path*/
private void createArticleEsIndex(ApArticle apArticle, String content, String path) {SearchArticleVo vo = new SearchArticleVo();BeanUtils.copyProperties(apArticle, vo);vo.setContent(content);vo.setStaticUrl(path);kafkaTemplate.send(ArticleConstants.ARTICLE_ES_SYNC_TOPIC, JSON.toJSONString(vo));
}

同步文章数据

@Component
@Slf4j
public class SyncArticleListener {@Autowiredprivate RestHighLevelClient  restHighLevelClient;/*** 同步文章数据* @param message*/@KafkaListener(topics = ArticleConstants.ARTICLE_ES_SYNC_TOPIC)public void onMessage(String message) {if (StringUtils.isNotBlank(message)) {SearchArticleVo searchArticleVo = JSON.parseObject(message, SearchArticleVo.class);IndexRequest indexRequest = new IndexRequest("app_info_article");indexRequest.id(searchArticleVo.getId().toString());indexRequest.source(message, XContentType.JSON);try {restHighLevelClient.index(indexRequest, RequestOptions.DEFAULT);} catch (IOException e) {log.error("sync es error = {}", e.getMessage(), e);}}}
}

搜索记录

需求说明

image-20250613221520518

数据存储说明

image-20250613221753764

保存搜索记录-实现思路

image-20250613230006106

image-20250613230334610

image-20250613230414766

image-20250613230621594

用户搜索服务实现

保存用户搜索历史记录

/*** 保存用户搜索历史记录** @param keyword* @param userId*/@Override@Asyncpublic void insert(String keyword, Integer userId) {//1.查询当前用户的搜索关键词Query query = Query.query(Criteria.where("userId").is(userId).and("keyword").is(keyword));ApUserSearch apUserSearch = mongoTemplate.findOne(query, ApUserSearch.class);//2.存在 更新创建时间if (apUserSearch != null) {apUserSearch.setCreatedTime(new Date());mongoTemplate.save(apUserSearch);return;}//3.不存在,判断当前历史记录总数量是否超过10apUserSearch = new ApUserSearch();apUserSearch.setUserId(userId);apUserSearch.setKeyword(keyword);apUserSearch.setCreatedTime(new Date());Query query1 = Query.query(Criteria.where("userId").is(userId));query1.with(Sort.by(Sort.Direction.DESC, "createdTime"));List<ApUserSearch> apUserSearchList = mongoTemplate.find(query1, ApUserSearch.class);if (apUserSearchList == null || apUserSearchList.size() < 10) {mongoTemplate.save(apUserSearch);} else {ApUserSearch lastUserSearch = apUserSearchList.get(apUserSearchList.size() - 1);mongoTemplate.findAndReplace(Query.query(Criteria.where("id").is(lastUserSearch.getId())), apUserSearch);}}

查询用户搜索历史记录

/*** 查询用户搜索历史记录* @return*/@Overridepublic ResponseResult findUserSearch() {// 获取当前用户ApUser user = AppThreadLocalUtil.getUser();if (user == null) {return ResponseResult.errorResult(AppHttpCodeEnum.NEED_LOGIN);}// 根据当前用户查询数据,按照时间倒序List<ApUserSearch> apUserSearches = mongoTemplate.find(Query.query(Criteria.where("userId").is(user.getId())).with(Sort.by(Sort.Direction.DESC, "createdTime")), ApUserSearch.class);return ResponseResult.okResult(apUserSearches);}

删除用户搜索历史记录

/*** 删除用户搜索历史记录* @param dto* @return*/@Overridepublic ResponseResult delUserSearch(HistorySearchDto dto) {// 检查参数if (dto.getId() == null) {return ResponseResult.errorResult(AppHttpCodeEnum.PARAM_INVALID);}// 判断是否登录ApUser user = AppThreadLocalUtil.getUser();if (user == null) {return ResponseResult.errorResult(AppHttpCodeEnum.NEED_LOGIN);}// 删除mongoTemplate.remove(Query.query(Criteria.where("userId").is(user.getId()).and("id").is(dto.getId())), ApUserSearch.class);return ResponseResult.okResult(AppHttpCodeEnum.SUCCESS);}

image-20250614173542679

image-20250614173634456

关键字联想词服务实现

image-20250614175924606

image-20250614180314561

image-20250614180446769

联想词查询

/*** 联想词查询* @param dto* @return*/@Override
public ResponseResult search(UserSearchDto dto) {// 检查参数if (StringUtils.isBlank(dto.getSearchWords())) {return ResponseResult.errorResult(AppHttpCodeEnum.PARAM_INVALID);}// 分页检查if (dto.getPageSize() > 20) {dto.setPageSize(20);}// 执行查询,模糊查询Query query = Query.query(Criteria.where("associateWords").regex(".*?\\" + dto.getSearchWords() + ".*"));query.limit(dto.getPageSize());List<ApAssociateWords> apAssociateWords = mongoTemplate.find(query, ApAssociateWords.class);return ResponseResult.okResult(apAssociateWords);
}

相关文章:

  • RLHF调参实战手册:实用Trick、现象排查与解决思路(持续更新)
  • 【CSS-14】深入解析CSS定位:从基础到高级应用
  • 数据库期末
  • 19 - SAFM模块
  • 共享项目中使用Wpf和Winform——c# CAD二次开发
  • 拓扑推理:把邻接矩阵和节点特征形式数据集转换为可以训练CNN等序列模型的数据集
  • 展开说说Android之Glide详解_源码解析
  • YOLOV8模型优化-选择性视角类别整合模块(SPCI):遥感目标检测的注意力增强模型详解
  • [论文阅读] 人工智能 | Gen-n-Val:利用代理技术革新计算机视觉数据生成
  • U盘不识别 个别U盘不识别
  • 《人工智能时代与人类价值》读书简要笔记
  • 超强人工智能解决方案套件InfiniSynapse:精准的业务理解、对各种数据源进行全模态联合智能分析--部署安装@Ubuntu22.04 @Docker
  • Burn 开源程序是下一代深度学习框架,在灵活性、效率和可移植性方面毫不妥协
  • Day 49 训练
  • 【0.4 漫画计算机网络基础】
  • 基于Python学习《Head First设计模式》第十四章 剩下的模式
  • SparkUI依赖问题解决方法
  • pyspark非安装使用graphframes
  • 【生活系列】金刚经
  • Spark DAG、Stage 划分与 Task 调度底层原理深度剖析
  • java做网站用的是什么/软文范例大全
  • wordpress编辑器哪个好用吗/推广关键词优化公司
  • 海外公司网站 国内做备案/平台开发
  • 网站怎么做分页/什么是交换链接
  • dw成品网站成品视频教学/google搜索中文入口
  • 陕西手机网站建设公司哪家好/百度知道app官方下载