基于协同过滤的电影推荐系统代码
步骤
1、初始化数据
获取movies和ratings
转换成数据userDict表⽰某个⽤户的所有电影的评分集合,并对评分除以5进⾏归⼀化
转换成数据ItemUser表⽰某部电影参与评分的所有⽤户集合
2、计算所有⽤户与userId的相似度
出所有观看电影与userId有交集的⽤户
对这些⽤户循环计算与userId的相似度
获取A⽤户与userId的并集。格式为:{‘电影ID’,[A⽤户的评分,userId的评分]},没有评分记为0。计算A⽤户与userId的余弦距离,越⼤越相似
3、根据相似度⽣成推荐电影列表
4、输出推荐列表和准确率
#!/usr/bin/python3
# -*- coding: utf-8 -*-
from numpy import*
import time
from texttable import Texttable
class CF:
def__init__(self, movies, ratings, k=5, n=10):
self.ratings = ratings
# 邻居个数
self.k = k
# 推荐个数
self.n = n
# ⽤户对电影的评分
# 数据格式{'UserID:⽤户ID':[(MovieID:电影ID,Rating:⽤户对电影的评星)]}
self.userDict ={}
# 对某电影评分的⽤户
# 数据格式:{'MovieID:电影ID',[UserID:⽤户ID]}
# {'1',[1,2,3..],...}
self.ItemUser ={}
# 邻居的信息
# 推荐列表
# 基于⽤户的推荐
# 根据对电影的评分计算⽤户之间的相似度
def recommendByUser(self, userId):
self.formatRate()
# 推荐个数等于本⾝评分电影个数,⽤户计算准确率
self.n =len(self.userDict[userId])
# 获取推荐列表
def getrecommandList(self, userId):
# 建⽴推荐字典
# 建⽴推荐字典
recommandDict ={}
for neighbor ighbors:
movies = self.userDict[neighbor[1]]
#单个neighbor结构
#{userid:[(movie1,rate1),(movie2,rate2)]}
#movies:[(movie,rate)......]
for movie in movies:
if(movie[0]in recommandDict):
recommandDict[movie[0]]+= neighbor[0]
else:
recommandDict[movie[0]]= neighbor[0]#neighbor[0]即为dist
# 建⽴推荐列表
for key in recommandDict:
# 将ratings转换为userDict和ItemUser
#userDict是对ratings的归⼀化
def formatRate(self):
self.userDict ={}
self.ItemUser ={}
for i in self.ratings:
# 评分最⾼为5 除以5 进⾏数据归⼀化
temp =(i[1],float(i[2])/5)
# 计算userDict {'1':[(1,5),(2,5)...],'2':[...]...}
if(i[0]in self.userDict):
self.userDict[i[0]].append(temp)
else:
self.userDict[i[0]]=[temp]
# 计算ItemUser {'1',[1,2,3..],...}
if(i[1]in self.ItemUser):
self.ItemUser[i[1]].append(i[0])
else:
self.ItemUser[i[1]]=[i[0]]
# 到某⽤户的相邻⽤户
def getNearestNeighbor(self, userId):
neighbors =[]
# 获取userId评分的电影都有那些⽤户也评过分
for i in self.userDict[userId]:
for j in self.ItemUser[i[0]]:
if(j != userId and j not in neighbors):
neighbors.append(j)
# 计算这些⽤户与userId的相似度并排序
for i in neighbors:
dist = Cost(userId, i)
# 排序默认是升序,reverse=True表⽰降序
# 格式化userDict数据
def formatuserDict(self, userId, l):
user ={}
for i in self.userDict[userId]:
user[i[0]]=[i[1],0]
有个叫什么代码的电影
for j in self.userDict[l]:
if(j[0]not in user):#⾮重叠部分
user[j[0]]=[0, j[1]]
else:
else:
user[j[0]][1]= j[1]#重叠部分
return user
# 计算余弦距离相似度
def getCost(self, userId, l):
# 获取⽤户userId和l评分电影的并集
# {'电影ID':[userId的评分,l的评分]} 没有评分为0
user = self.formatuserDict(userId, l)
x =0.0
y =0.0
z =0.0
for k, v in user.items():
x +=float(v[0])*float(v[0])#甲评分了⼄没评分
y +=float(v[1])*float(v[1])#⼄评分了甲没评分
z +=float(v[0])*float(v[1])#甲⼄都评分了
if(z ==0.0):
return0
return z / sqrt(x * y)
# 推荐的准确率
def getPrecision(self, userId):
user =[i[0]for i in self.userDict[userId]]
recommand =[i[1]for i andList]
count =0.0
if(len(user)>=len(recommand)):
for i in recommand:
if(i in user):
count +=1.0
else:
for i in user:
if(i in recommand):
count +=1.0
# 显⽰推荐列表
def showTable(self):
neighbors_id =[i[1]for i ighbors]
table = Texttable()
table.set_deco(Texttable.HEADER)
table.set_cols_dtype(["t","t","t","t"])
table.set_cols_align(["l","l","l","l"])
rows =[]
rows.append([u"movie ID", u"Name", u"release", u"from userID"]) for item andList:
fromID =[]
movie =[]
for i vies:
#movies结构:[u"movie ID", u"Name", u"release"]
if i[0]== item[1]:
movie = i
break
for i in self.ItemUser[item[1]]:
if i in neighbors_id:
fromID.append(i)
movie.append(fromID)
rows.append(movie)
table.add_rows(rows)
print(table.draw())
# 获取数据
def readFile(filename):
# files = open(filename, "r", encoding="utf-8")
# 如果读取不成功试⼀下
files =open(filename,"r", encoding="iso-8859-15")
data =[]
for line adlines():
item = line.strip().split("::")
data.append(item)
return data
调⽤:
start = time.process_time()
movies = readFile("C:/Users/SPLab03/Desktop/ml-1m/movies.dat") ratings = readFile("C:/Users/SPLab03/Desktop/ml-1m/ratings.dat")
demo = CF(movies, ratings, k=20)
print("推荐列表为:")
demo.showTable()
print("处理的数据为%d条"%(len(demo.ratings)))
#print("准确率: %.2f %%" % (st * 100))
end = time.process_time()
print("耗费时间: %f s"%(end - start))

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。