Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
greyhatguy007
GitHub Repository: greyhatguy007/Machine-Learning-Specialization-Coursera
Path: blob/main/C3 - Unsupervised Learning, Recommenders, Reinforcement Learning/week2/C3W2/C3W2A1/recsys_utils.py
3567 views
1
import numpy as np
2
import pandas as pd
3
from numpy import loadtxt
4
5
def normalizeRatings(Y, R):
6
"""
7
Preprocess data by subtracting mean rating for every movie (every row).
8
Only include real ratings R(i,j)=1.
9
[Ynorm, Ymean] = normalizeRatings(Y, R) normalized Y so that each movie
10
has a rating of 0 on average. Unrated moves then have a mean rating (0)
11
Returns the mean rating in Ymean.
12
"""
13
Ymean = (np.sum(Y*R,axis=1)/(np.sum(R, axis=1)+1e-12)).reshape(-1,1)
14
Ynorm = Y - np.multiply(Ymean, R)
15
return(Ynorm, Ymean)
16
17
def load_precalc_params_small():
18
19
file = open('./data/small_movies_X.csv', 'rb')
20
X = loadtxt(file, delimiter = ",")
21
22
file = open('./data/small_movies_W.csv', 'rb')
23
W = loadtxt(file,delimiter = ",")
24
25
file = open('./data/small_movies_b.csv', 'rb')
26
b = loadtxt(file,delimiter = ",")
27
b = b.reshape(1,-1)
28
num_movies, num_features = X.shape
29
num_users,_ = W.shape
30
return(X, W, b, num_movies, num_features, num_users)
31
32
def load_ratings_small():
33
file = open('./data/small_movies_Y.csv', 'rb')
34
Y = loadtxt(file,delimiter = ",")
35
36
file = open('./data/small_movies_R.csv', 'rb')
37
R = loadtxt(file,delimiter = ",")
38
return(Y,R)
39
40
def load_Movie_List_pd():
41
""" returns df with and index of movies in the order they are in in the Y matrix """
42
df = pd.read_csv('./data/small_movie_list.csv', header=0, index_col=0, delimiter=',', quotechar='"')
43
mlist = df["title"].to_list()
44
return(mlist, df)
45
46
47
48
49
50
51