Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
robertopucp
GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Lab10/R_script.ipynb
2714 views
Kernel: R
library('dplyr') # filter dataset library(ggplot2) # plots library(sandwich) # linear models library(lmtest) # linear models library(xtable) library(haven) # for read Stara files, spps and others library("stringr") # library to RE library("readxl")
data <- read_dta("../../../data/Pesos/peso.dta")
head(data)
# type of variables as.list(sapply(data, class))
# Dummy variable data <- data %>% mutate(Dummy = ifelse(cigs > 0 , 1 , ifelse(!is.na(cigs),0, NA) ) ) %>% mutate(Dummy1 = case_when(Dummy == 0 ~ "No smoking", # create this variables for plot section Dummy == 1 ~ "Smoking")) attach(data)
The following objects are masked from data (pos = 3): bwght, bwghtlbs, cigprice, cigs, cigtax, Dummy, Dummy1, faminc, fatheduc, lbwght, lfaminc, male, motheduc, packs, parity, white The following objects are masked from data (pos = 4): bwght, bwghtlbs, cigprice, cigs, cigtax, Dummy, faminc, fatheduc, lbwght, lfaminc, male, motheduc, packs, parity, white The following objects are masked from data (pos = 5): bwght, bwghtlbs, cigprice, cigs, cigtax, Dummy, faminc, fatheduc, lbwght, lfaminc, male, motheduc, packs, parity, white
theme_set(theme_bw(20)) data %>% ggplot(aes(x=bwghtlbs, fill = Dummy1)) + geom_histogram( color="black", alpha=0.6, position = 'identity', size=0.6) + scale_fill_manual(values=c("#69b3a2", "#404080"), name=NULL) + # for legend section labs(x = " ", y = "Absolute frequency", title = "Smoking status and newborn weight (lbs)", size = 15) + theme(text=element_text(size =15), plot.title = element_text(hjust = 0.5)) + theme(legend.position = c(0.85,0.9)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + # for legend position scale_x_continuous(limits = c(0,18), expand = c(0, 0)) + scale_y_continuous(limits = c(0,300), expand = c(0, 0))
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`. Warning message: "Removed 4 rows containing missing values (geom_bar)."
Image in a Jupyter notebook
theme_set(theme_bw(20)) data %>% ggplot(aes(x=bwghtlbs, fill = Dummy1)) + geom_histogram( aes(y = ..density..), color="black", alpha=0.3, position = 'identity', size=0.6) + scale_fill_manual(values=c("blue", "red"), name=NULL) + # for legend section labs(x = " ", y = "Relative frequency", title = "Smoking status and newborn weight (lbs)", size = 15) + theme(text=element_text(size =15), plot.title = element_text(hjust = 0.5)) + theme(legend.position = c(0.85,0.9)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + # drop square lines scale_x_continuous(limits = c(0,18), expand = c(0, 0)) + scale_y_continuous(limits = c(0,0.4), expand = c(0, 0))
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`. Warning message: "Removed 4 rows containing missing values (geom_bar)."
Image in a Jupyter notebook
theme_set(theme_bw(20)) data %>% ggplot(aes(x=bwghtlbs, fill = Dummy1 , colour=Dummy1)) + geom_density(alpha=0.3, color = "black", size=0.6) + ggtitle("Smoking status and newborn weight (lbs)") + theme(text=element_text(size =15), plot.title = element_text(hjust = 0.5)) + labs(x = "", y = "Kernel Density") + scale_fill_manual(values=c("blue", "red"), name=NULL) + theme(legend.position = c(0.85,0.9)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + # drop square lines scale_x_continuous(limits = c(0,18), expand = c(0, 0)) + scale_y_continuous(limits = c(0,0.4), expand = c(0, 0))
Image in a Jupyter notebook
# First Model model1 <- lm(lbwght ~ Dummy) model1.tab <- coeftest(model1, vcov=vcovHC(model1, type='HC')) model1_coef <- model1.tab[2,1] model1_coef_se = model1.tab[2,2] # HC0: standar error robust aginst heterocedasticity model1_lower = coefci(model1, df = Inf, vcov. = vcovHC, type = "HC")[2,1] model1_upper = coefci(model1, df = Inf, vcov. = vcovHC, type = "HC")[2,2] # Second Model model2 <- lm(lbwght ~ Dummy + motheduc) model2.tab <- coeftest(model2, vcov=vcovHC(model2, type='HC')) model2_coef <- model2.tab[2,1] model2_coef_se = model2.tab[2,2] model2_lower = coefci(model2, df = Inf, vcov. = vcovHC, type = "HC")[2,1] model2_upper = coefci(model2, df = Inf, vcov. = vcovHC, type = "HC")[2,2] # Third Model model3 <- lm(lbwght ~ Dummy + motheduc + lfaminc + white + Dummy:(motheduc + lfaminc + white)) model3.tab <- coeftest(model3, vcov=vcovHC(model3, type='HC')) model3_coef <- model3.tab[2,1] model3_coef_se = model3.tab[2,2] model3_lower = coefci(model3, df = Inf, vcov. = vcovHC, type = "HC")[2,1] model3_upper = coefci(model3, df = Inf, vcov. = vcovHC, type = "HC")[2,2]
table<- matrix(0, 3, 4) table[1,1]<- model1_coef table[1,2]<- model1_coef_se table[2,1]<- model2_coef table[2,2]<- model2_coef_se table[3,1]<- model3_coef table[3,2]<- model3_coef_se table[1,3]<- model1_lower table[1,4]<- model1_upper table[2,3]<- model2_lower table[2,4]<- model2_upper table[3,3]<- model3_lower table[3,4]<- model3_upper colnames(table)<- c("Estimate","se","lower_bound","upper_bound") rownames(table)<- c("OLS baseline", "OLS with controls", "OLS interactive model") tab<- xtable(table) tab
theme_set(theme_bw(20)) options(repr.plot.width = 8, repr.plot.height =8) # plot size ggplot(tab, aes(x=rownames(tab), y=Estimate)) + geom_point(size=4, color = 'red') + geom_errorbar(aes(ymin=lower_bound, ymax=upper_bound) , width = 0.1,color="blue", size = 0.9) + labs(x="", y="") + ggtitle("Smoking Coefficient (95% CI)") + theme(text=element_text(size =15), plot.title = element_text(hjust = 0.5)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + geom_hline(yintercept=0, linetype="dashed", color = "black", size=1) + scale_x_discrete(limits = c("OLS baseline","OLS with controls", "OLS interactive model")) # order x - axis
Image in a Jupyter notebook
data <- read_excel("../../../data/Centro_salud/Centro_salud_mental.xls")
head(data)
# equivalents expression to regex using apply data$Ins <- apply(data['Institución_ruc'], 1, function(x) gsub("[0-9]", '', x)) data$Ins <- apply(data['Institución_ruc'], 1, function(x) gsub("\\d", '', x)) # drop digist data$Ruc <- apply(data['Institución_ruc'], 1, function(x) gsub("[^0-9]", '', x)) data$Ruc <- apply(data['Institución_ruc'], 1, function(x) gsub("\\D", '', x)) # No drop digist
data