CoCalc -- recsys

GitHub Repository: greyhatguy007/Machine-Learning-Specialization-Coursera
Path: blob/main/C3 - Unsupervised Learning, Recommenders, Reinforcement Learning/week2/C3W2/C3W2A1/recsys_utils.py
³⁵⁶⁷ views

1
import numpy as np
2
import pandas as pd
3
from numpy import loadtxt
4

5
def normalizeRatings(Y, R):
6
    """
7
    Preprocess data by subtracting mean rating for every movie (every row).
8
    Only include real ratings R(i,j)=1.
9
    [Ynorm, Ymean] = normalizeRatings(Y, R) normalized Y so that each movie
10
    has a rating of 0 on average. Unrated moves then have a mean rating (0)
11
    Returns the mean rating in Ymean.
12
    """
13
    Ymean = (np.sum(Y*R,axis=1)/(np.sum(R, axis=1)+1e-12)).reshape(-1,1)
14
    Ynorm = Y - np.multiply(Ymean, R) 
15
    return(Ynorm, Ymean)
16

17
def load_precalc_params_small():
18

19
    file = open('./data/small_movies_X.csv', 'rb')
20
    X = loadtxt(file, delimiter = ",")
21

22
    file = open('./data/small_movies_W.csv', 'rb')
23
    W = loadtxt(file,delimiter = ",")
24

25
    file = open('./data/small_movies_b.csv', 'rb')
26
    b = loadtxt(file,delimiter = ",")
27
    b = b.reshape(1,-1)
28
    num_movies, num_features = X.shape
29
    num_users,_ = W.shape
30
    return(X, W, b, num_movies, num_features, num_users)
31
    
32
def load_ratings_small():
33
    file = open('./data/small_movies_Y.csv', 'rb')
34
    Y = loadtxt(file,delimiter = ",")
35

36
    file = open('./data/small_movies_R.csv', 'rb')
37
    R = loadtxt(file,delimiter = ",")
38
    return(Y,R)
39

40
def load_Movie_List_pd():
41
    """ returns df with and index of movies in the order they are in in the Y matrix """
42
    df = pd.read_csv('./data/small_movie_list.csv', header=0, index_col=0,  delimiter=',', quotechar='"')
43
    mlist = df["title"].to_list()
44
    return(mlist, df)
45

46

47

48

49

50

51

Product

Resources

Company