CoCalc -- r_script.R

GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Trabajo_grupal/WG1/Solution/r_script.R
⁴⁵⁹⁶ views
1

2
#######################################
3

4
" Homework 1 - solution  "
5
" @author: Roberto Mendoza "
6
" @date: 12/09/2020 "
7

8
#######################################
9

10
"1. IF statement and Loop"
11

12
vector <- sample( seq(500) , 20) # seq(500) valores entre 0 y 500, 20 observaciones
13

14
length(vector)
15

16
for (i in seq(20)){
17
  
18
  x <- vector[i]
19
  
20
  if (x<=100) { y = x^(0.5) 
21
  } else if (x>100 & x <=300) { y = x - 5 
22
  } else if (x>300) { y = 50 }
23
  
24
  vector[i] <- y
25
  
26
}
27

28

29
"2. IF statement and Loop, escalar function"
30

31

32
## Escalar a vector 
33

34
vector2 <- sample( seq(500) , 100) # seq(500) valores entre 0 y 500, 100 observaciones
35

36
length(vector2)
37

38
for (i in seq(100)){
39
  
40
  x <- vector2[i]
41
  max <- max(vector2)
42
  min <- min(vector2)
43
  
44
  y  <- (x - min)/(max-min)
45
  
46
  vector2[i] <- y
47
  
48
}
49

50
vector2
51

52
## Escalar Matrix
53

54
set.seed(756)
55

56
total <- 100*50 # total elements
57
  
58
elements <- sample(total) # random number
59

60
M <- matrix(elements , nrow = 100, ncol = 50) # reshape matrix
61

62
dim(M)
63

64
for (i in seq(dim(M)[2] )){
65
  
66
  max <- max(M[,i])
67
  min <- min(M[,i])
68
  
69
  
70
  for (j in seq(dim(M)[1] )){
71
  
72
    x <- M[j,i]
73

74
    y  <- (x - min)/(max-min)
75
  
76
    M[j,i] <- y
77
  }
78
}
79

80
print(M)
81

82
"3. OLS and Loop"
83

84

85

86
set.seed(756)
87

88
x1 <- runif(10000)
89
x2 <- runif(10000)
90
x3 <- runif(10000)
91
x4 <- runif(10000)
92
x4 <- runif(10000)
93
x5 <- rnorm(10000)
94
e <- rnorm(1000)
95

96
# Poblacional regression (Data Generating Process GDP)
97

98
Y <- 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4 + 0.5*x5 + e
99

100

101
#M1 <- matrix(0,8,2)
102

103
X <- cbind(matrix(1,10000), x1,x2,x3,x5) # omitiendo la cuarta variable
104

105

106
size <- c(10,50,80,120,200,500,800,1000,5000)
107

108
beta <- matrix(0,length(size),5)
109
sd <- matrix(0,length(size),5)
110

111
Reg <- function(X,Y){
112
  
113
  beta <- solve(t(X) %*% X) %*% (t(X) %*% Y)
114
  y_est <- X %*% beta  ## Y estimado 
115
  n <- dim(X)[1]  # filas
116
  k <- dim(X)[2] - 1  # varaibles sin contar el intercepto}
117
  df <- n- k ## grados de libertad
118
  sigma <- sum(sapply(Y - y_est , function(x) x ^ 2))/ df 
119
  
120
  Var <- sigma*solve(t(X) %*% X)
121
  sd <- sapply(diag(Var) , sqrt) #
122
  
123
  
124
  return(list(beta,sd))
125
  
126
} 
127

128
j <- 0
129

130
for (i in size) {
131
  j <- j + 1
132
  filter <- sample( seq(10000) , i)
133
  X1 <- X[filter,] # filter filas de todas las columnas
134
  Y1 <- Y[filter]  # filter oservaciones del vector Y
135
  
136
  beta[j,] <- matrix ( unlist( Reg(X1,Y1)[1]), 1, 5)
137
  sd[j,] <- matrix ( unlist( Reg(X1,Y1)[2] ), 1, 5)
138
}
139

140
table <- data.frame(sample_size= size,  
141
                    beta_inter = beta[,1], sd_inter = sd[,1], 
142
                    beta_x1 = beta[,2], sd_x1= sd[,2], 
143
                    beta_x2 = beta[,3], sd_x2 = sd[,3], 
144
                    beta_x3 = beta[,4], sd_x3 = sd[,4], 
145
                    beta_x5= beta[,5], sd_x5 = sd[,5]
146
                    )
147

148
table
149

150

151

152
"4. Fucntion OLS"
153

154
x1 <- rnorm(800)
155
x2 <- rnorm(800)
156
x3 <- rnorm(800)
157
x4 <- rnorm(800)
158
x5 <- rnorm(800)
159
x6 <- rnorm(800)
160
x7 <- rnorm(800)
161

162
e <- rnorm(800)
163

164

165
Y <- 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4 + 0.2*x5+0.5*x6+2*x7+e
166
X <- cbind(matrix(1,800), x1,x2,x3,x4,x5,x6,x7)
167

168

169
ols <- function(M, Y , intercepto = TRUE, Robust.sd = FALSE){
170
  
171
  
172
  if (intercepto){
173
    
174
    beta <- solve(t(M) %*% M) %*% (t(M) %*% Y)
175
    
176
    y_est <- M %*% beta  ## Y estimado 
177
    n <- dim(M)[1]  # filas
178
    k <- dim(M)[2] - 1  # varaibles sin contar el intercepto}
179
    df <- n- k ## grados de libertad
180
    
181
    if (Robust.sd==F){
182
    
183
        sigma <- sum(sapply(Y - y_est , function(x) x ^ 2))/ df 
184
        Var <- sigma*solve(t(M) %*% M)
185
        sd <- sapply(diag(Var) , sqrt) ## raíz cuadrado a los datos de la diagonal principal de Var
186
        
187
        t.est <- abs(beta/sd)
188
        pvalue <-2*pt(t.est, df = df, lower.tail = FALSE) ## pt : t - student densidad
189
        
190
        lower_bound <- beta-1.96*sd
191
        upper_bound <- beta+1.96*sd
192
        
193
        SCR <- sum(sapply(Y - y_est , function(x) x ^ 2))
194
        SCT <- sum(sapply(Y - mean(y_est) , function(x) x ^ 2))
195
        
196
        R2 <- 1-SCR/SCT
197
        
198
        rmse <- sqrt(SCR/n)
199
        
200
        table <- data.frame(OLS = beta,  
201
                            standar.error = sd, P.value = pvalue, Lower.bound=lower_bound ,
202
                            Upper.bound = upper_bound)
203
        
204
        fit_var <- c(R2,rmse)
205
    }else{
206
      
207
      
208
      Matrix_robust <- diag(sapply(Y - y_est , function(x) x ^ 2),n)
209
      
210
      
211
      
212
      
213
      Var <- solve(t(M) %*% M) %*% t(M) %*% Matrix_robust %*% t(M) %*% solve(t(M) %*% M)
214
      sd <- sapply(diag(Var) , sqrt) ## raíz cuadrado a los datos de la diagonal principal de Var
215
      
216
      t.est <- abs(beta/sd)
217
      pvalue <-2*pt(t.est, df = df, lower.tail = FALSE) ## pt : t - student densidad
218
      
219
      lower_bound <- beta-1.96*sd
220
      upper_bound <- beta+1.96*sd
221
      
222
      SCR <- sum(sapply(Y - y_est , function(x) x ^ 2))
223
      SCT <- sum(sapply(Y - mean(y_est) , function(x) x ^ 2))
224
      
225
      R2 <- 1-SCR/SCT
226
      
227
      rmse <- sqrt(SCR/n)
228
      
229
      table <- data.frame(OLS = beta,  
230
                          standar.error = sd, P.value = pvalue, Lower.bound=lower_bound ,
231
                          Upper.bound = upper_bound)
232
      
233
      fit_var <- c(R2,rmse)
234
      
235
    }
236
  
237
    
238
  }else {
239
    
240
    M <- M[,2:ncol(M)]
241
    
242
    beta <- solve(t(M) %*% M) %*% (t(M) %*% Y)
243
    
244
    y_est <- M %*% beta  ## Y estimado 
245
    n <- dim(M)[1]  # filas
246
    k <- dim(M)[2] - 1  # varaibles sin contar el intercepto}
247
    df <- n- k ## grados de libertad
248
    
249
    if (Robust.sd==F){
250
      
251
      sigma <- sum(sapply(Y - y_est , function(x) x ^ 2))/ df 
252
      Var <- sigma*solve(t(M) %*% M)
253
      sd <- sapply(diag(Var) , sqrt) ## raíz cuadrado a los datos de la diagonal principal de Var
254
      
255
      t.est <- abs(beta/sd)
256
      pvalue <-2*pt(t.est, df = df, lower.tail = FALSE) ## pt : t - student densidad
257
      
258
      lower_bound <- beta-1.96*sd
259
      upper_bound <- beta+1.96*sd
260
      
261
      SCR <- sum(sapply(Y - y_est , function(x) x ^ 2))
262
      SCT <- sum(sapply(Y - mean(y_est) , function(x) x ^ 2))
263
      
264
      R2 <- 1-SCR/SCT
265
      
266
      rmse <- sqrt(SCR/n)
267
      
268
      table <- data.frame(OLS = beta,  
269
                          standar.error = sd, P.value = pvalue, Lower.bound=lower_bound ,
270
                          Upper.bound = upper_bound)
271
      
272
      fit_var <- c(R2,rmse)
273
    }else{
274
      
275
      
276
      Matrix_robust <- diag(sapply(Y - y_est , function(x) x ^ 2),n)
277
      
278
      
279
      Var <- solve(t(M) %*% M) %*% t(M) %*% Matrix_robust %*% M %*% solve(t(M) %*% M)
280
      sd <- sapply(diag(Var) , sqrt) ## raíz cuadrado a los datos de la diagonal principal de Var
281
      
282
      t.est <- abs(beta/sd)
283
      pvalue <-2*pt(t.est, df = df, lower.tail = FALSE) ## pt : t - student densidad
284
      
285
      lower_bound <- beta-1.96*sd
286
      upper_bound <- beta+1.96*sd
287
      
288
      SCR <- sum(sapply(Y - y_est , function(x) x ^ 2))
289
      SCT <- sum(sapply(Y - mean(y_est) , function(x) x ^ 2))
290
      
291
      R2 <- 1-SCR/SCT
292
      
293
      rmse <- sqrt(SCR/n)
294
      
295
      table <- data.frame(OLS = beta,  
296
                          standar.error = sd, P.value = pvalue, Lower.bound=lower_bound ,
297
                          Upper.bound = upper_bound)
298
      
299
      fit_var <- c(R2,rmse)
300
      
301
    }
302
    
303
  }
304
  
305
  return(list(table,fit_var))
306
  
307
  }
308
  
309
ols(X,Y)
310

311
ols(X,Y, intercepto = F, Robust.sd = T)
312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327
Product

Resources

Company