Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
robertopucp
GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Lab10/script_coef_plot_py.ipynb
2714 views
Kernel: Python 3 (ipykernel)
import pandas as pd import numpy as np #plots library import matplotlib.pyplot as plt import seaborn as sns # import linear models library import statsmodels.api as sm import statsmodels.formula.api as smf
#!pip install statsmodels # instalar stsatmodels
data = pd.read_stata("../data/Pesos/peso.dta")
data
# Dummy variable data['Dummy'] = np.where(data['cigs'] > 0 , 1, 0)
fig = plt.subplots(figsize=(8,5)) sns.histplot(data = data, x = 'bwghtlbs', alpha = 0.6, hue = 'Dummy', # crear dos hsitogramas a aprtir de los valores de la Dummy palette=['white','blue'], edgecolor="0.1", # borde de las barras linewidth=1, # ancho del borde binwidth=0.5 # ancho de la barra ) # Modificar la leyenda plt.legend(labels=['Smoking','No smoking'], title = "", frameon=True, bbox_to_anchor=(1.0, 0.98)) plt.title('Smoking status and newborn weight (lbs)', size=11) plt.ylabel('Absolute frequency') plt.xlabel('')
Text(0.5, 0, '')
Image in a Jupyter notebook
fig = plt.subplots(figsize=(8,5)) # Frcuencia relativa sns.histplot(data = data[data.Dummy == 0], # muejres no fumadoras x = 'bwghtlbs', alpha = 0.6, edgecolor="0.1", linewidth=1, stat = "percent", # frecuencia realtiva binwidth=0.5 ) sns.histplot(data = data[data.Dummy == 1], # muejres fumadoras x = 'bwghtlbs', alpha = 0.2, edgecolor="0.1", linewidth=1, stat = "percent", binwidth=0.5, color = 'red' ) plt.legend(labels=['No smoking','Smoking'], title = "", frameon=True, bbox_to_anchor=(1.0, 0.98)) plt.title('Smoking status and newborn weight (lbs)', size=11) plt.ylabel('Relative frequency') plt.xlabel('')
Text(0.5, 0, '')
Image in a Jupyter notebook
fig = plt.subplots(figsize=(8,5)) #kdeplot : función de densidad sns.kdeplot(data = data[data.Dummy == 0], x = 'bwghtlbs', alpha = 0.6, edgecolor="0.1", linewidth=1, fill=True ) sns.kdeplot(data = data[data.Dummy == 1], x = 'bwghtlbs', alpha = 0.2, edgecolor="0.1", linewidth=1, fill=True, color = 'red' ) plt.legend(labels=['No smoking','Smoking'], title = "", frameon=True, bbox_to_anchor=(1.0, 0.98)) plt.title('Smoking status and newborn weight (lbs)', size=11) plt.ylabel('Kernel Density') plt.xlabel('')
Text(0.5, 0, '')
Image in a Jupyter notebook
smf.ols('lbwght ~ Dummy', data).fit(cov_type = 'HC1') # HC0: white , HC1: Hubert - White model1 = smf.ols('lbwght ~ Dummy', data).fit(cov_type = 'HC1').summary() print(model1)
OLS Regression Results ============================================================================== Dep. Variable: lbwght R-squared: 0.021 Model: OLS Adj. R-squared: 0.020 Method: Least Squares F-statistic: 31.28 Date: Fri, 11 Nov 2022 Prob (F-statistic): 2.68e-08 Time: 21:17:36 Log-Likelihood: 346.06 No. Observations: 1388 AIC: -688.1 Df Residuals: 1386 BIC: -677.7 Df Model: 1 Covariance Type: HC1 ============================================================================== coef std err z P>|z| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 4.7718 0.006 862.764 0.000 4.761 4.783 Dummy -0.0769 0.014 -5.593 0.000 -0.104 -0.050 ============================================================================== Omnibus: 611.550 Durbin-Watson: 1.928 Prob(Omnibus): 0.000 Jarque-Bera (JB): 5930.961 Skew: -1.791 Prob(JB): 0.00 Kurtosis: 12.472 Cond. No. 2.85 ============================================================================== Notes: [1] Standard Errors are heteroscedasticity robust (HC1)
smf.ols('lbwght ~ Dummy', data).fit(cov_type = 'HC1').summary2().tables[1]
# First Model model1 = smf.ols('lbwght ~ Dummy', data).fit(cov_type = 'HC1').summary2().tables[1] model1_coef = model1.iloc[1,0] # fila posición 1 y columan posición 0 model1_coef_se = model1.iloc[1,1] # fila posición 1 y columan posición 1 # HC1: standar error robust aginst heterocedasticity model1_lower = model1.iloc[1,4] model1_upper = model1.iloc[1,5]
# Second Model model2 = smf.ols('lbwght ~ Dummy + motheduc', data).fit(cov_type = 'HC1').summary2().tables[1] model2_coef = model2.iloc[1,0] model2_coef_se = model2.iloc[1,1] model2_lower = model2.iloc[1,4] model2_upper = model2.iloc[1,5] # Third Model model3 = smf.ols('lbwght ~ Dummy + motheduc + lfaminc + white + Dummy:(motheduc + lfaminc + white)', data).fit(cov_type = 'HC1').summary2().tables[1] model3_coef = model3.iloc[1,0] model3_coef_se = model3.iloc[1,1] model3_lower = model3.iloc[1,4] model3_upper = model3.iloc[1,5]
table = np.zeros( ( 3, 4 ) ) table[0,0] = model1_coef table[0,1] = model1_coef_se table[0,2] = model1_lower table[0,3] = model1_upper table[1,0] = model2_coef table[1,1] = model2_coef_se table[1,2] = model2_lower table[1,3] = model2_upper table[2,0] = model3_coef table[2,1] = model3_coef_se table[2,2] = model3_lower table[2,3] = model3_upper table_pandas = pd.DataFrame( table, columns = [ "Estimate","Std. Error","Lower_bound" , "Upper_bound"]) table_pandas.index = [ "OLS baseline","OLS with controls", "OLS interactive model"] table_pandas.reset_index(inplace = True) table_pandas.rename(columns = {"index" : "Model"}, inplace = True) table_pandas.round(8)
fig, ax = plt.subplots(figsize=(7, 6)) ax.scatter(x=table_pandas['Model'], marker='o', s=20, # s: modificar tamaño del point y=table_pandas['Estimate'], color = "black") eb1 = plt.errorbar(x=table_pandas['Model'], y=table_pandas['Estimate'], yerr = 0.5*(table_pandas['Upper_bound']- table_pandas['Lower_bound']), color = 'black', ls='', capsize = 4) # ls='': no une los puntos rojos # yerr genera el gráfico del intervalo de confianza plt.axhline(y=0, color = 'black').set_linestyle('--') # linea horizontal # Set title & labels plt.title('Smoking Coefficient (95% CI)',fontsize=12)
Text(0.5, 1.0, 'Smoking Coefficient (95% CI)')
Image in a Jupyter notebook