[免费]基于Python的协同过滤电影推荐系统(Django+Vue+sqlite+爬虫)【论文+源码+SQL脚本】
大家好,我是python222_小锋老师,看到一个不错的基于Python的Django+Vue图书借阅推荐系统,分享下哈。
项目视频演示
https://www.bilibili.com/video/BV1EMaRzEEGn/
项目介绍
从信息匮乏到信息过载,人们的生产生活遇到了巨大的挑战。社会各行业均以数据作为研究对象,与此同时也累积了大量具有潜在价值的数据,现今已步入大数据时代。我们进入各种搜索引擎网站亦或是APP[5],在输入关键信息后检索想要得的信息。尽管我们可以通过搜索引擎来获取我们想要的信息,但是对于很多人来说,却并不明确自己需要什么具体的信息。例如当人们进入超市或商店进行购物时,并不太明确自己一定就做好了买什么的准备,而是去了货架、储物柜才知道。通过分析自身的喜好和需求,最终决定买什么。我们身处于一个信息爆炸的时代,让用户在有限的时间和无限的海量信息中去找所需要的信息,就像是大海捞针,这就是大家经常所称的信息过载现象。推荐系统能够为客户提供推荐服务,现在越来越多的科研工作者投入到此领域当中。如何能让这些海量信息自己主动“找上门”,从以前的主动寻找所需物品,变为被动的去接受。因此,这样也就形成了推荐系统的雏形。
系统展示
部分代码
"""movierecomend URL ConfigurationThe `urlpatterns` list routes URLs to views. For more information please see:https://docs.djangoproject.com/en/2.0/topics/http/urls/
Examples:
Function views1. Add an import: from my_app import views2. Add a URL to urlpatterns: path('', views.home, name='home')
Class-based views1. Add an import: from other_app.views import Home2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
Including another URLconf1. Import the include() function: from django.urls import include, path2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
"""
from django.conf import settings
from django.conf.urls.static import static
from django.contrib import admin
from django.urls import path, includefrom movie import ajax_views as views
# 用于前后端连接
urlpatterns = [path("", include("index.urls")),path("admin/", admin.site.urls),path("api/", include([path("login/", views.login, name="login"),path("register/", views.register, name="register"),path("user/", views.get_user, name="get_user"),path("recent_movies/", views.recent_movies),path("movies/", views.movies),path("search_movies/", views.search_movies),path("user_recommend/", views.user_recommend,name="user_recommend"), # 用户推荐path("all_tags/", views.all_tags, name="all_tags"),path("movie/<int:movie_id>/", views.movie, name="movie"),path("item_recommend/", views.item_recommend,name="item_recommend"), # 物品推荐path("score/<int:movie_id>/", views.score, name="score"),path("collect/<int:movie_id>/", views.collect, name="collect"),path("decollect/<int:movie_id>/", views.decollect, name="decollect"),path("comment/<int:movie_id>/", views.make_comment, name="comment"),path("personal/", views.personal),path("mycollect/", views.mycollect, name="mycollect"),path("my_comments/", views.my_comments, name="my_comments"),path("my_rate/", views.my_rate, name="my_rate"),path("delete_comment/<int:comment_id>",views.delete_comment, name="delete_comment"),path("delete_rate/<int:rate_id>", views.delete_rate, name="delete_rate"),path('choose_tags/', views.choose_tags)])),
] + static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT) + static(settings.STATIC_URL, document_root=settings.STATICFILES_DIRS)admin.site.site_header = '推荐系统后台管理'
admin.site.index_title = '首页-推荐系统'
admin.site.site_title = '推荐系统'
# -*-coding:utf-8-*-
import osos.environ["DJANGO_SETTINGS_MODULE"] = "movie.settings"
import djangodjango.setup()
from movie.models import *
from math import sqrt, pow
import operator
from django.db.models import Subquery,Q,Count# from django.shortcuts import render,render_to_response
class UserCf:# 获得初始化数据def __init__(self, all_user):self.all_user = all_user# 通过用户名获得列表,仅调试使用def getItems(self, username1, username2):return self.all_user[username1], self.all_user[username2]# 计算两个用户的皮尔逊相关系数def pearson(self, user1, user2): # 数据格式为:物品id,浏览sum_xy = 0.0 # user1,user2 每项打分的的累加n = 0 # 公共浏览次数sum_x = 0.0 # user1 的打分总和sum_y = 0.0 # user2 的打分总和sumX2 = 0.0 # user1每项打分平方的累加sumY2 = 0.0 # user2每项打分平方的累加for movie1, score1 in user1.items():if movie1 in user2.keys(): # 计算公共的浏览次数n += 1sum_xy += score1 * user2[movie1]sum_x += score1sum_y += user2[movie1]sumX2 += pow(score1, 2)sumY2 += pow(user2[movie1], 2)if n == 0:# print("p氏距离为0")return 0molecule = sum_xy - (sum_x * sum_y) / n # 分子denominator = sqrt((sumX2 - pow(sum_x, 2) / n) * (sumY2 - pow(sum_y, 2) / n)) # 分母if denominator == 0:return 0r = molecule / denominatorreturn r# 计算与当前用户的距离,获得最临近的用户def nearest_user(self, current_user, n=1):distances = {}# 用户,相似度# 遍历整个数据集for user, rate_set in self.all_user.items():# 非当前的用户if user != current_user:distance = self.pearson(self.all_user[current_user], self.all_user[user])# 计算两个用户的相似度distances[user] = distanceclosest_distance = sorted(distances.items(), key=operator.itemgetter(1), reverse=True)# 最相似的N个用户print("closest user:", closest_distance[:n])return closest_distance[:n]# 给用户推荐电影def recommend(self, username, n=3):recommend = {}nearest_user = self.nearest_user(username, n)for user, score in dict(nearest_user).items(): # 最相近的n个用户for movies, scores in self.all_user[user].items(): # 推荐的用户的电影列表if movies not in self.all_user[username].keys(): # 当前username没有看过if movies not in recommend.keys(): # 添加到推荐列表中recommend[movies] = scores*score# 对推荐的结果按照电影# 浏览次数排序return sorted(recommend.items(), key=operator.itemgetter(1), reverse=True)# 基于用户的推荐
def recommend_by_user_id(user_id):user_prefer = UserTagPrefer.objects.filter(user_id=user_id).order_by('-score').values_list('tag_id', flat=True)current_user = User.objects.get(id=user_id)# 如果当前用户没有打分 则看是否选择过标签,选过的话,就从标签中找# 没有的话,就按照浏览度推荐15个if current_user.rate_set.count() == 0:if len(user_prefer) != 0:movie_list = Movie.objects.filter(tags__in=user_prefer)[:15]else:movie_list = Movie.objects.order_by("-num")[:15]return movie_list# 选取评分最多的10个用户users_rate = Rate.objects.values('user').annotate(mark_num=Count('user')).order_by('-mark_num')user_ids = [user_rate['user'] for user_rate in users_rate]user_ids.append(user_id)users = User.objects.filter(id__in=user_ids)#users 为评分最多的10个用户all_user = {}for user in users:rates = user.rate_set.all()#查出10名用户的数据rate = {}# 用户有给电影打分 在rate和all_user中进行设置if rates:for i in rates:rate.setdefault(str(i.movie.id), i.mark)#填充电影数据all_user.setdefault(user.username, rate)else:# 用户没有为电影打过分,设为0all_user.setdefault(user.username, {})user_cf = UserCf(all_user=all_user)recommend_list = [each[0] for each in user_cf.recommend(current_user.username, 15)]movie_list = list(Movie.objects.filter(id__in=recommend_list).order_by("-num")[:15])other_length = 15 - len(movie_list)if other_length > 0:fix_list = Movie.objects.filter(~Q(rate__user_id=user_id)).order_by('-collect')for fix in fix_list:if fix not in movie_list:movie_list.append(fix)if len(movie_list) >= 15:breakreturn movie_list# 计算相似度
def similarity(movie1_id, movie2_id):movie1_set = Rate.objects.filter(movie_id=movie1_id)# movie1的打分用户数movie1_sum = movie1_set.count()# movie_2的打分用户数movie2_sum = Rate.objects.filter(movie_id=movie2_id).count()# 两者的交集common = Rate.objects.filter(user_id__in=Subquery(movie1_set.values('user_id')), movie=movie2_id).values('user_id').count()# 没有人给当前电影打分if movie1_sum == 0 or movie2_sum == 0:return 0similar_value = common / sqrt(movie1_sum * movie2_sum)#余弦计算相似度return similar_valueimport sys
sys.stdout.reconfigure(encoding='utf-8')
#基于物品
def recommend_by_item_id(user_id, k=15):# 前三的tag,用户评分前三的电影user_prefer = UserTagPrefer.objects.filter(user_id=user_id).order_by('-score').values_list('tag_id', flat=True)user_prefer = list(user_prefer)[:3]current_user = User.objects.get(id=user_id)# 如果当前用户没有打分 则看是否选择过标签,选过的话,就从标签中找# 没有的话,就按照浏览度推荐15个if current_user.rate_set.count() == 0:if len(user_prefer) != 0:movie_list = Movie.objects.filter(tags__in=user_prefer)[:15]else:movie_list = Movie.objects.order_by("-num")[:15]print('from here')return movie_list# most_tags = Tags.objects.annotate(tags_sum=Count('name')).order_by('-tags_sum').filter(movie__rate__user_id=user_id).order_by('-tags_sum')# 选用户最喜欢的标签中的电影,用户没看过的30部,对这30部电影,计算距离最近un_watched = Movie.objects.filter(~Q(rate__user_id=user_id), tags__in=user_prefer).order_by('?')[:30] # 看过的电影watched = Rate.objects.filter(user_id=user_id).values_list('movie_id', 'mark')distances = []names = []# 在未看过的电影中找到for un_watched_movie in un_watched:for watched_movie in watched:if un_watched_movie not in names:names.append(un_watched_movie)distances.append((similarity(un_watched_movie.id, watched_movie[0]) * watched_movie[1], un_watched_movie))#加入相似的电影distances.sort(key=lambda x: x[0], reverse=True)# print('this is distances', distances[:15])recommend_list = []for mark, movie in distances:if len(recommend_list) >= k:breakif movie not in recommend_list:recommend_list.append(movie)# print('this is recommend list', recommend_list)# 如果得不到有效数量的推荐 按照未看过的电影中的热度进行填充# print('recommend list', recommend_list)return recommend_listif __name__ == '__main__':similarity(2003, 2008)recommend_by_item_id(1)
源码下载
链接:https://pan.baidu.com/s/1y1ntpfJhI-Zc-SOVDDUUTg
提取码:1234