基于用户的协同过滤算法实现小说推荐算法
推荐算法实现
1. 修改 NovelRecommendService 接口
// NovelRecommendService.java
import java.util.List;
import java.util.Map;public interface NovelRecommendService {/*** 为用户推荐小说 - 基于用户协同过滤算法* @param userId 用户ID* @param topN 推荐数量* @return 推荐的小说列表*/List<BookInfo> recommendForUser(Long userId, int topN);/*** 预计算用户相似度矩阵(用于提高推荐性能)*/void precomputeUserSimilarity();/*** 获取用户行为向量(用于协同过滤计算)* @param userId 用户ID* @return 用户行为向量,key为小说ID,value为评分/权重*/Map<Long, Double> getUserBehaviorVector(Long userId);
}
2. 实现基于用户协同过滤的推荐服务
// NovelRecommendServiceImpl.java
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.data.redis.core.RedisTemplate;
import org.springframework.stereotype.Service;import java.util.*;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;@Service
@RequiredArgsConstructor
@Slf4j
public class NovelRecommendServiceImpl implements NovelRecommendService {private final UserBookshelfMapper userBookshelfMapper;private final UserBrowseMapper userBrowseMapper;private final BookInfoMapper bookInfoMapper;private final UserInfoMapper userInfoMapper;private final RedisTemplate<String, Object> redisTemplate;// 相似用户数量阈值private static final int SIMILAR_USER_COUNT = 50;// 推荐候选集大小private static final int CANDIDATE_SIZE = 100;@Overridepublic List<BookInfo> recommendForUser(Long userId, int topN) {try {// 1. 获取目标用户行为向量Map<Long, Double> targetUserVector = getUserBehaviorVector(userId);// 2. 处理冷启动情况if (targetUserVector.isEmpty()) {return getColdStartRecommendations(topN);}// 3. 获取相似用户List<Long> similarUsers = getSimilarUsers(userId, targetUserVector);// 4. 基于相似用户生成推荐return generateRecommendations(userId, targetUserVector, similarUsers, topN);} catch (Exception e) {log.error("协同过滤推荐失败,用户ID: {}", userId, e);return getColdStartRecommendations(topN);}}@Overridepublic Map<Long, Double> getUserBehaviorVector(Long userId) {String cacheKey = "user:behavior:" + userId;// 尝试从缓存获取@SuppressWarnings("unchecked")Map<Long, Double> cachedVector = (Map<Long, Double>) redisTemplate.opsForValue().get(cacheKey);if (cachedVector != null) {return cachedVector;}Map<Long, Double> behaviorVector = new HashMap<>();// 1. 获取书架数据(高权重)QueryWrapper<UserBookshelf> bookshelfWrapper = new QueryWrapper<>();bookshelfWrapper.eq("user_id", userId);List<UserBookshelf> bookshelfList = userBookshelfMapper.selectList(bookshelfWrapper);for (UserBookshelf bookshelf : bookshelfList) {behaviorVector.put(bookshelf.getBookId(), 5.0); // 书架权重}// 2. 获取浏览数据(根据时长和次数加权)QueryWrapper<UserBrowse> browseWrapper = new QueryWrapper<>();browseWrapper.eq("user_id", userId);List<UserBrowse> browseList = userBrowseMapper.selectList(browseWrapper);Map<Long, List<UserBrowse>> browseGroup = browseList.stream().collect(Collectors.groupingBy(UserBrowse::getBookId));for (Map.Entry<Long, List<UserBrowse>> entry : browseGroup.entrySet()) {Long bookId = entry.getKey();List<UserBrowse> browses = entry.getValue();// 计算浏览权重:基础权重 + 次数加权double baseWeight = browses.stream().mapToDouble(browse -> Math.min(browse.getDuration() / 60.0, 5.0)) // 最大5分.sum();double countWeight = Math.log(browses.size() + 1); // 次数对数加权double finalWeight = Math.min(baseWeight * countWeight, 3.0); // 最大3分// 如果该书已经在书架中,则不重复计算if (!behaviorVector.containsKey(bookId)) {behaviorVector.put(bookId, finalWeight);}}// 缓存用户行为向量(1小时过期)redisTemplate.opsForValue().set(cacheKey, behaviorVector, 1, TimeUnit.HOURS);return behaviorVector;}/*** 获取相似用户列表*/private List<Long> getSimilarUsers(Long targetUserId, Map<Long, Double> targetVector) {String cacheKey = "user:similar:" + targetUserId;// 尝试从缓存获取@SuppressWarnings("unchecked")List<Long> cachedSimilarUsers = (List<Long>) redisTemplate.opsForValue().get(cacheKey);if (cachedSimilarUsers != null) {return cachedSimilarUsers;}// 获取所有用户ID(排除目标用户)QueryWrapper<UserInfo> userWrapper = new QueryWrapper<>();userWrapper.select("id");List<UserInfo> allUsers = userInfoMapper.selectList(userWrapper);List<Long> allUserIds = allUsers.stream().map(UserInfo::getId).filter(id -> !id.equals(targetUserId)).collect(Collectors.toList());// 计算相似度Map<Long, Double> similarityMap = new HashMap<>();for (Long userId : allUserIds) {Map<Long, Double> userVector = getUserBehaviorVector(userId);double similarity = calculateCosineSimilarity(targetVector, userVector);// 只保留相似度大于阈值的用户if (similarity > 0.1) {similarityMap.put(userId, similarity);}}// 按相似度排序,取前N个List<Long> similarUsers = similarityMap.entrySet().stream().sorted(Map.Entry.<Long, Double>comparingByValue().reversed()).limit(SIMILAR_USER_COUNT).map(Map.Entry::getKey).collect(Collectors.toList());// 缓存相似用户列表(30分钟过期)redisTemplate.opsForValue().set(cacheKey, similarUsers, 30, TimeUnit.MINUTES);return similarUsers;}/*** 生成推荐列表*/private List<BookInfo> generateRecommendations(Long userId, Map<Long, Double> targetVector, List<Long> similarUsers, int topN) {// 获取用户已交互的小说ID(用于过滤)Set<Long> interactedBooks = new HashSet<>(targetVector.keySet());// 收集相似用户喜欢的小说Map<Long, Double> recommendationScores = new HashMap<>();for (Long similarUserId : similarUsers) {Map<Long, Double> similarUserVector = getUserBehaviorVector(similarUserId);double similarity = calculateCosineSimilarity(targetVector, similarUserVector);// 为相似用户喜欢的小说计算推荐分数for (Map.Entry<Long, Double> entry : similarUserVector.entrySet()) {Long bookId = entry.getKey();Double rating = entry.getValue();// 过滤已交互的小说if (!interactedBooks.contains(bookId)) {double score = rating * similarity;recommendationScores.merge(bookId, score, Double::sum);}}}// 按推荐分数排序,取前topNreturn recommendationScores.entrySet().stream().sorted(Map.Entry.<Long, Double>comparingByValue().reversed()).limit(Math.min(topN * 2, CANDIDATE_SIZE)) // 取更多候选,后续可进一步过滤.map(Map.Entry::getKey).map(bookId -> {try {return bookInfoMapper.selectById(bookId);} catch (Exception e) {return null;}}).filter(Objects::nonNull).limit(topN).collect(Collectors.toList());}/*** 计算余弦相似度*/private double calculateCosineSimilarity(Map<Long, Double> vectorA, Map<Long, Double> vectorB) {// 找到共同的键Set<Long> commonKeys = new HashSet<>(vectorA.keySet());commonKeys.retainAll(vectorB.keySet());if (commonKeys.isEmpty()) {return 0.0;}// 计算点积double dotProduct = commonKeys.stream().mapToDouble(key -> vectorA.get(key) * vectorB.get(key)).sum();// 计算向量模长double magnitudeA = Math.sqrt(vectorA.values().stream().mapToDouble(v -> v * v).sum());double magnitudeB = Math.sqrt(vectorB.values().stream().mapToDouble(v -> v * v).sum());if (magnitudeA == 0 || magnitudeB == 0) {return 0.0;}return dotProduct / (magnitudeA * magnitudeB);}/*** 冷启动推荐*/private List<BookInfo> getColdStartRecommendations(int topN) {String cacheKey = "recommend:coldstart";// 尝试从缓存获取@SuppressWarnings("unchecked")List<BookInfo> cachedRecommendations = (List<BookInfo>) redisTemplate.opsForValue().get(cacheKey);if (cachedRecommendations != null) {return cachedRecommendations.stream().limit(topN).collect(Collectors.toList());}// 推荐热门小说QueryWrapper<BookInfo> queryWrapper = new QueryWrapper<>();queryWrapper.orderByDesc("visit_count").last("LIMIT " + (topN * 3)); // 获取更多候选List<BookInfo> hotBooks = bookInfoMapper.selectList(queryWrapper);// 缓存热门推荐(1小时过期)redisTemplate.opsForValue().set(cacheKey, hotBooks, 1, TimeUnit.HOURS);return hotBooks.stream().limit(topN).collect(Collectors.toList());}@Overridepublic void precomputeUserSimilarity() {log.info("开始预计算用户相似度矩阵");// 获取所有用户QueryWrapper<UserInfo> userWrapper = new QueryWrapper<>();userWrapper.select("id");List<UserInfo> allUsers = userInfoMapper.selectList(userWrapper);int totalUsers = allUsers.size();int processed = 0;for (UserInfo user : allUsers) {try {Long userId = user.getId();Map<Long, Double> userVector = getUserBehaviorVector(userId);getSimilarUsers(userId, userVector); // 这会自动缓存相似用户processed++;if (processed % 100 == 0) {log.info("预计算进度: {}/{}", processed, totalUsers);}} catch (Exception e) {log.error("预计算用户 {} 相似度失败", user.getId(), e);}}log.info("用户相似度矩阵预计算完成");}
}
3. 添加缓存管理器以提高性能
// RecommendationCacheManager.javaimport lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.data.redis.core.RedisTemplate;
import org.springframework.stereotype.Component;import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;@Component
@RequiredArgsConstructor
@Slf4j
public class RecommendationCacheManager {private final RedisTemplate<String, Object> redisTemplate;/*** 缓存用户行为向量*/public void cacheUserBehaviorVector(Long userId, Map<Long, Double> behaviorVector) {String key = "recommend:user:behavior:" + userId;redisTemplate.opsForValue().set(key, behaviorVector, 1, TimeUnit.HOURS);}/*** 获取缓存的用户行为向量*/@SuppressWarnings("unchecked")public Map<Long, Double> getUserBehaviorVector(Long userId) {String key = "recommend:user:behavior:" + userId;return (Map<Long, Double>) redisTemplate.opsForValue().get(key);}/*** 缓存用户相似度*/public void cacheUserSimilarity(Long userId, List<Long> similarUsers) {String key = "recommend:user:similar:" + userId;redisTemplate.opsForValue().set(key, similarUsers, 30, TimeUnit.MINUTES);}/*** 获取缓存的相似用户*/@SuppressWarnings("unchecked")public List<Long> getSimilarUsers(Long userId) {String key = "recommend:user:similar:" + userId;return (List<Long>) redisTemplate.opsForValue().get(key);}/*** 缓存推荐结果*/public void cacheRecommendations(Long userId, List<?> recommendations) {String key = "recommend:result:" + userId;redisTemplate.opsForValue().set(key, recommendations, 10, TimeUnit.MINUTES);}/*** 获取缓存的推荐结果*/@SuppressWarnings("unchecked")public List<?> getRecommendations(Long userId) {String key = "recommend:result:" + userId;return (List<?>) redisTemplate.opsForValue().get(key);}
}
性能优化要点
- 
缓存机制: - 用户行为向量缓存1小时
- 用户相似度缓存30分钟
- 推荐结果缓存10分钟
- 热门推荐缓存1小时
 
- 
计算优化: - 限制相似用户数量(前50个)
- 限制候选推荐数量
- 使用余弦相似度算法
- 分批处理大量数据
 
- 
冷启动处理: - 新用户推荐热门小说
- 行为数据不足时使用基础推荐
 
- 
预计算机制: - 支持离线预计算相似度矩阵
- 定时任务更新推荐数据
 
这个实现方案兼顾了推荐准确性与系统性能,通过多层缓存和合理的算法优化,能够满足高性能推荐需求。
