CoCalc -- Grupo1_Pregunta1_OLS

GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Trabajo_final/grupo1/Grupo1_Pregunta1_OLS_R.R
⁴⁶⁸⁴ views
1
install.packages("rlang")
2
install.packages("Metrics")
3
install.packages("xtable")
4
install.packages("tidyverse")
5

6
library(ggplot2)
7
library(Metrics)
8
library(haven)
9
library(dplyr)
10
library(stargazer)
11
library(plm)
12
library(lmtest)
13
library(sandwich)
14
library(estimatr)
15
library(lfe)
16
library(caret)
17
library(texreg)
18
library(mfx)
19
library(fastDummies)
20
library(lubridate) 
21
library(tidyverse)
22

23
librarian::shelf(plm,lfe,caret,texreg,mfx,fastDummies,estimatr,dplyr,stargazer,tidyverse,sandwich,lmtest,xtable,haven)
24

25
#Seteamos el directorio
26
user <- Sys.getenv("USERNAME")  # username
27

28
setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2") ) # set directorio
29

30

31
DATA <- read_dta("Trabajo_final/datos/mss_repdata.dta")
32
DATA <-as.data.frame(DATA)
33

34
#Me quedo con las variables necesarias para realizar la tabla de estad�sticos
35
dat_2 <- DATA %>% dplyr::select(NDVI_g,tot_100,trade_pGDP,pop_den_rur,land_crop,va_agr,va_ind_manf) %>% as.data.frame()
36

37
#Tabla de estad�sticas de variables seleccionadas
38
list_var <- c("Tasa de variaci�n del �ndice de vegetaci�n", "T�rminos de intercambio",
39
              "Porcentaje de las exportaciones recpecto al PBI", "Densidad poblacional rural",
40
              "Porcentaje de tierra cultivable en uso", "Valor agregado del sector agricultura respecto PBI",
41
              "Valor agregado del sector manufacturero respecto PBI")
42

43
stargazer(dat_2, title = "Descriptive Statistics", digits = 2, covariate.labels = list_var,  
44
          summary.stat = c("mean", "sd", "n"),min.max = F,
45
           notes.append = FALSE, notes.align = 'l')
46

47
#Se crean dummies para cada pa�s
48

49
DATA  <- dummy_cols(DATA, select_columns = 'ccode')
50

51
index <- grep("ccode_", colnames(DATA))
52

53
# Se crea la variable a�o a partir de 1981 en adelante
54
DATA$time_year <- DATA$year - 1980
55

56
list_vars_mod <- names(DATA)[index]
57
list_vars_mod[1]
58

59
#Creamos la variable a�o x pa�s.
60

61
i = 1
62

63
while(i < 42){
64
  
65
  var <- paste0(list_vars_mod[i],"_","time")
66
  DATA[var]  <- DATA[list_vars_mod[i]]*DATA["time_year"]
67
  
68
  i = i + 1
69
}
70

71

72
# Modelo (1)
73

74

75
index_country_time <- grep("_time$", colnames(DATA))
76
country_time_trend <-names(DATA)[index_country_time]
77
index_country <- grep("^ccode_\\d+$", colnames(DATA))
78

79
country_fe <-names(DATA)[index_country]
80

81

82
model1_formula <- as.formula(
83
  paste("any_prio",
84
        "~",
85
        paste("GPCP_g","GPCP_g_l", paste(country_time_trend, collapse = "+"),
86
              paste(country_fe, collapse = "+")
87
              , sep="+")
88
  )
89
)
90

91

92
ols_model1 <- lm_robust(model1_formula, data = DATA,
93
                        clusters = ccode, se_type = "stata")
94

95
summary(ols_model1)
96
tidy(ols_model1)
97
glance(ols_model1)
98

99
#Usamos LM y luego coestest para hallar los errores standar robusto"
100

101

102
m1 <- lm(model1_formula, data = DATA)
103

104
lm_rmse1 <- round(rmse(m1$fitted.values, DATA$any_prio) ,2 )
105

106
robust_model1 <- coeftest(m1 ,
107
                          vcov = vcovCL,
108
                          type = "HC1",
109
                          cluster = ~ ccode)
110

111

112

113
sd_robust_model1 <- robust_model1[,2]
114

115

116

117
# Modelo (2)
118

119
model2_formula <- as.formula(
120
  paste("war_prio",
121
        "~",
122
        paste("GPCP_g","GPCP_g_l", paste(country_time_trend, collapse = "+"),
123
              paste(country_fe, collapse = "+")
124
              , sep="+")
125
  )
126
)
127

128

129
ols_model2 <- lm_robust(model2_formula, data = DATA,
130
                        clusters = ccode, se_type = "stata")
131

132
summary(ols_model2)
133

134
#Usando LM y luego coestest para los error standar robusto"
135

136

137
m2 <- lm(model2_formula, data = DATA)
138

139
lm_rmse2 <- round(rmse(m2$fitted.values, DATA$war_prio ) ,2 )
140

141
robust_model2 <- coeftest(m2 ,
142
                          vcov = vcovCL,
143
                          type = "HC1",
144
                          cluster = ~ ccode)
145

146

147

148
sd_robust_model2 <- robust_model2[,2]
149

150
#Exportando la tabla
151

152

153
stargazer( m1, m2,
154
           se=list(sd_robust_model1, sd_robust_model2),
155
           dep.var.labels = c(""),
156
           title = "Rainfall and Civil Conflict (Reduced-Form)",
157
           keep = c("GPCP_g","GPCP_g_l"),
158
           covariate.labels=c("Growth in rainfall, t","Growth in rainfall, t-1"),
159
           align = T, no.space = T,
160
           add.lines=list(c("Country fixed effects","yes","yes"),
161
                          c("Country-specific time trends","yes","yes"),
162
                          c("Root mean square error",lm_rmse1,lm_rmse2)),
163
           keep.stat = c("rsq","n"),
164
           notes.append = FALSE, notes.align = "l",
165
           notes ="Huber robust standard errors are in parentheses. Regression disturbance terms are clustered at the country level.A country-specific year time trend is included in all specifications (coefficient estimates not reported). * Significantly different from zero at 90 percent confidence. ** Significantly different from zero at 95 percent confidence. *** Significantly different from zero at 99 percent confidence.", style = "qje"
166
)
167

168

169

170
##### Coef-plot
171

172
# Primero calculamos los intervalos de confianza para ambos modelos
173

174
#Modelos 1. c.i : confidence interval 
175

176
model1_lower = coefci(m1, df = Inf, 
177
       vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,1]
178

179
model1_upper = coefci(m1, df = Inf, 
180
                      vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,2]
181

182
#Modelo 2. c.i : confidence interval 
183

184
model2_lower = coefci(m2, df = Inf, 
185
       vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,1]
186

187
model2_upper = coefci(m2, df = Inf, 
188
                      vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,2]
189

190

191
#Extraemos los coeficientes de la variable gpcp_g de ambos modelos
192

193
model1_coef<- ols_model1$coefficients[2]
194

195
model2_coef<- ols_model2$coefficients[2]
196

197
# Creamos una tabla de resultados que recopile los coeficientes y los intervalos de confianza
198

199
table<- matrix(0, 2, 3)
200

201
table[1,1]<- model1_coef
202
table[1,2]<- model1_lower
203
table[1,3]<- model1_upper
204

205
table[2,1]<- model2_coef
206
table[2,2]<- model2_lower
207
table[2,3]<- model2_upper
208

209
#Le damos nombres a las filas y columnas
210
colnames(table)<- c("Estimate","lower_bound","upper_bound")
211
rownames(table)<- c("OLS 1", "OLS 2")
212

213

214
# Lo convertimos a dataframe para poder ploterarlo
215
tab <- as.data.frame(table)
216

217
#Ploteamos el gr�fico
218

219
theme_set(theme_bw(20)) # tema con fondo blanco y cuadro con bordes en negro
220

221
options(repr.plot.width = 8, repr.plot.height =8)  # tama�o del gr�fico
222

223

224
tab  %>% ggplot(aes(x=rownames(tab), y=Estimate)) +
225
  geom_point(size=2, color = 'red') +
226
  geom_errorbar(aes(ymin=lower_bound, ymax=upper_bound) , width = 0.1,color="blue", size = 0.9) +
227
  labs(x="", y="") + ggtitle("Stimated Coefficient of growth in rainfall (95% CI)")  +
228
  theme(text=element_text(size =8), plot.title = element_text(hjust = 0.5)) +
229
  geom_hline(yintercept=0, linetype="dashed", color = "black", size=1) +
230
  scale_x_discrete(limits = c("OLS 1","OLS 2")) + # order x - axis
231
  theme(panel.grid.major = element_blank(), # borramos las cuadr�culas en el fondo
232
        panel.grid.minor = element_blank())
233

234
# Guardamos la imagen del coefplot en la ruta especificada
235

236
ggsave("Trabajo_final/grupo1/plots/Coef_plot.png"
237
       , height = 8  # alto
238
       , width = 12  # ancho
239
       , dpi = 320   # resoluci�n (calidad de la imagen)
240
)
241

242

243

244
Product

Resources

Company