Path: blob/main/C3 - Unsupervised Learning, Recommenders, Reinforcement Learning/week2/C3W2/C3W2A1/recsys_utils.py
3567 views
import numpy as np1import pandas as pd2from numpy import loadtxt34def normalizeRatings(Y, R):5"""6Preprocess data by subtracting mean rating for every movie (every row).7Only include real ratings R(i,j)=1.8[Ynorm, Ymean] = normalizeRatings(Y, R) normalized Y so that each movie9has a rating of 0 on average. Unrated moves then have a mean rating (0)10Returns the mean rating in Ymean.11"""12Ymean = (np.sum(Y*R,axis=1)/(np.sum(R, axis=1)+1e-12)).reshape(-1,1)13Ynorm = Y - np.multiply(Ymean, R)14return(Ynorm, Ymean)1516def load_precalc_params_small():1718file = open('./data/small_movies_X.csv', 'rb')19X = loadtxt(file, delimiter = ",")2021file = open('./data/small_movies_W.csv', 'rb')22W = loadtxt(file,delimiter = ",")2324file = open('./data/small_movies_b.csv', 'rb')25b = loadtxt(file,delimiter = ",")26b = b.reshape(1,-1)27num_movies, num_features = X.shape28num_users,_ = W.shape29return(X, W, b, num_movies, num_features, num_users)3031def load_ratings_small():32file = open('./data/small_movies_Y.csv', 'rb')33Y = loadtxt(file,delimiter = ",")3435file = open('./data/small_movies_R.csv', 'rb')36R = loadtxt(file,delimiter = ",")37return(Y,R)3839def load_Movie_List_pd():40""" returns df with and index of movies in the order they are in in the Y matrix """41df = pd.read_csv('./data/small_movie_list.csv', header=0, index_col=0, delimiter=',', quotechar='"')42mlist = df["title"].to_list()43return(mlist, df)4445464748495051