Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
ethen8181
GitHub Repository: ethen8181/machine-learning
Path: blob/master/linear_regression/linear_regession_code/gradient_descent.R
2573 views
1
library(dplyr)
2
3
# gradient descent for linear regression
4
5
# [GradientDescent] :
6
# @data : The whole data frame type data.
7
# @target : Takes a character stating column name that serves as the output variable.
8
# @learning_rate : Learning rate for the gradient descent algorithm.
9
# @iteration : Halting criterion : maximum iteration allowed for training the gradient descent algorithm.
10
# @epsilon : Halting criterion : If the trained parameter's difference between the two iteration is smaller than this value then the algorithm will halt.
11
# @normalize : Boolean value indicating whether to performing z-score normalization for the input variables. Default to TRUE.
12
# @method : Specify either "batch" or "stochastic" for the gradient descent method. Use batch for now, this will be explained later.
13
14
GradientDescent <- function( data, target, learning_rate, iteration,
15
epsilon = .001, normalize = TRUE, method )
16
{
17
# separate the input and output variables
18
input <- data %>% select( -one_of(target) ) %>% as.matrix()
19
output <- data %>% select( one_of(target) ) %>% as.matrix()
20
21
# normalize the input variables if specified
22
# record the mean and standard deviation
23
if(normalize)
24
{
25
input <- scale(input)
26
input_mean <- attr( input, "scaled:center" )
27
input_sd <- attr( input, "scaled:scale" )
28
}
29
30
# implementation trick, after the normalizing the original input column
31
# add a new column of all 1's to the first column, this serves as X0
32
input <- cbind( theta0 = 1, input )
33
34
# theta_new : initialize the theta value as all 1s
35
# theta_old : a random number whose absolute difference between new one is
36
# larger than than epsilon
37
theta_new <- matrix( 1, ncol = ncol(input) )
38
theta_old <- matrix( 2, ncol = ncol(input) )
39
40
# cost function
41
costs <- function( input, output, theta )
42
{
43
sum( ( input %*% t(theta) - output )^2 ) / ( 2 * nrow(output) )
44
}
45
46
# records the theta and cost value for visualization ; add the inital guess
47
theta_trace <- vector( mode = "list", length = iteration )
48
theta_trace[[1]] <- theta_new
49
costs_trace <- numeric( length = iteration )
50
costs_trace[1] <- costs( input, output, theta_old )
51
52
# first derivative of the cost function
53
if( method == "batch" )
54
{
55
derivative <- function( input, output, theta, step )
56
{
57
error <- ( input %*% t(theta) ) - output
58
descent <- ( t(input) %*% error ) / nrow(output)
59
return( t(descent) )
60
}
61
}else # stochastic gradient descent, using one training sample per update
62
{
63
derivative <- function( input, output, theta, step )
64
{
65
r <- step %% nrow(input) + 1
66
error <- input[ r, ] %*% t(theta) - output[ r, ]
67
descent <- input[ r, ] * error
68
return(descent)
69
}
70
}
71
72
# keep updating as long as any of the theta difference is still larger than epsilon
73
# or exceeds the maximum iteration allowed
74
step <- 1
75
while( any( abs(theta_new - theta_old) > epsilon ) & step <= iteration )
76
{
77
step <- step + 1
78
79
# gradient descent
80
theta_old <- theta_new
81
theta_new <- theta_old - learning_rate * derivative( input, output, theta_old, step )
82
83
# record keeping
84
theta_trace[[step]] <- theta_new
85
costs_trace[step] <- costs( input, output, theta_new )
86
}
87
88
# returns the noramalized mean and standard deviation for each input column
89
# and the cost, theta record
90
costs <- data.frame( costs = costs_trace )
91
theta <- data.frame( do.call( rbind, theta_trace ), row.names = NULL )
92
norm <- data.frame( input_mean = input_mean, input_sd = input_sd )
93
94
return( list( costs = costs, theta = theta, norm = norm ) )
95
}
96
97
98
99
100
101