Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
robertopucp
GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Trabajo_final/grupo1/Grupo1_Pregunta1_OLS_Python.ipynb
2714 views
Kernel: Python 3 (ipykernel)

Pregunta 1

# Importamos librerías import pandas as pd import numpy as np import re from tqdm import tqdm # Controlar el tiempo en un loop import os # Librería de modelo lineal import statsmodels.api as sm # linear regression utiliza todas las columnas de base de datos import statsmodels.formula.api as smf # linear regression usa uan formula from stargazer.stargazer import Stargazer from sklearn import datasets, linear_model # Modelos from sklearn.metrics import mean_squared_error, r2_score import econtools.metrics as mt from linearmodels.iv import IV2SLS # Para IV regression import warnings warnings.filterwarnings('ignore') # Eliminar los warning messages # Para poder exportar la tabla a latex from pystout import pystout #plots library import matplotlib.pyplot as plt import seaborn as sns # Seteamos el directorio user = os.getlogin() # Username os.chdir(f"C:/Users/{user}/Documents/GitHub/1ECO35_2022_2/Trabajo_final/datos")
# Cargamos la base de datos # Ponemos "False" para que no lea las etiquetas de valor repdata = pd.read_stata(r"../datos/mss_repdata.dta", convert_categoricals=False) # Mostramos la base de datos repdata
# Descripción de la base de datos repdata.describe() # Información de las variables para conocer su tipo repdata.info() repdata.dtypes
<class 'pandas.core.frame.DataFrame'> Int64Index: 743 entries, 0 to 742 Columns: 200 entries, ccode to soc dtypes: datetime64[ns](1), float32(104), float64(78), int32(9), int8(5), object(3) memory usage: 813.4+ KB
ccode float64 year datetime64[ns] country_name object country_code object GPCP float32 ... fh_pol float64 S float32 W float32 WoverS float32 soc float32 Length: 200, dtype: object
# Se resta el año inicial a cada año para que los resultados sean más pequeños repdata['time_year'] = pd.DatetimeIndex(repdata['year']).year - 1978 repdata['time_year']
0 3 1 4 2 5 3 6 4 7 .. 738 17 739 18 740 19 741 20 742 21 Name: time_year, Length: 743, dtype: int64
dummys = pd.get_dummies(repdata["ccode"].astype(int), prefix = "ccode", dummy_na=False) dummys.columns # ccode es un float, necesitamos pasarlo a entero con el prefijo ccode # dummy_na=False te dice que no cree dummy para valores missing # se convierte a entero repdata["ccode"].astype(int) # ccode: código por país # dummy_na=False # se crea una dummy por país para capturar variables omitidas invariantes de cada país # es una base de datos que tiene información geográfica
Index(['ccode_404', 'ccode_420', 'ccode_432', 'ccode_433', 'ccode_434', 'ccode_435', 'ccode_436', 'ccode_437', 'ccode_438', 'ccode_439', 'ccode_450', 'ccode_451', 'ccode_452', 'ccode_461', 'ccode_471', 'ccode_475', 'ccode_481', 'ccode_482', 'ccode_483', 'ccode_484', 'ccode_490', 'ccode_500', 'ccode_501', 'ccode_510', 'ccode_516', 'ccode_517', 'ccode_520', 'ccode_522', 'ccode_530', 'ccode_540', 'ccode_541', 'ccode_551', 'ccode_552', 'ccode_553', 'ccode_560', 'ccode_565', 'ccode_570', 'ccode_571', 'ccode_572', 'ccode_580', 'ccode_625'], dtype='object')
dummys
len(dummys.columns)
41
# Así unimos ambas bases de datos de manera horizontal repdata = pd.concat([ repdata , dummys], axis = 1 ) # concantenar ambas bases de datos de manera horizontal (axis = 1)
repdata
# Creación del trend_country effects : multiplicación de la dummy por país y la variable temporal # capturar variables omitidas variantes en el tiempo por cada país i = 0 while i < 41: # 41 por el tema de indexing pues en python la posición inicial es cero. var = dummys.columns[i]+"_"+"time" # creamos el nombre de cada variable repdata[var] = repdata[dummys.columns[i]]*repdata["time_year"] # multiplicación de variables # multiplicacón de variables: dummy país * variable temporal i = i + 1
# observamos para país y la variable temporarl. # primeras 40 observaciones repdata[['ccode','time_year']].iloc[0:40,:]
# Seleccionamos las variables para las estadísticas descriptivas tab1 = repdata.loc[:,["NDVI_g", "tot_100", "trade_pGDP", "pop_den_rur", "land_crop", "va_agr", "va_ind_manf"]] # Mostramos la base de datos tab1
# Seleccionamos los estadísticos de interés: media, error estándar y cantidad de observaciones sum_tab = tab1.describe().loc[["mean","std","count"]] sum_tab
# Es lo mismo que table, pero es su traspuesta sum_tab = tab1.describe().loc[["mean","std","count"]].T sum_tab # .t permite tranponer el DataFrame
# Mostramos los nombres de las columnas tab1.columns
Index(['NDVI_g', 'tot_100', 'trade_pGDP', 'pop_den_rur', 'land_crop', 'va_agr', 'va_ind_manf'], dtype='object')
tab1.columns # lista con los nuevos nombres nuevos = ["Tasa de variación del índice de vegetación", "Términos de intercambio", "Porcentaje de las exportaciones respecto al PBI", "Densidad poblacional rural", "Porcentaje de tierra cultivable en uso", "Valor agregado del sector agrícola respecto al PBI", "Valor agregado del sector manufacturero respecto al PBI"] # Unimos la lista de nombres viejos con la lista de nombres nuevos por medio de un diccionario dict(zip(tab1.columns, nuevos) )
{'NDVI_g': 'Tasa de variación del índice de vegetación', 'tot_100': 'Términos de intercambio', 'trade_pGDP': 'Porcentaje de las exportaciones respecto al PBI', 'pop_den_rur': 'Densidad poblacional rural', 'land_crop': 'Porcentaje de tierra cultivable en uso', 'va_agr': 'Valor agregado del sector agrícola respecto al PBI', 'va_ind_manf': 'Valor agregado del sector manufacturero respecto al PBI'}
# Personalizamos la tabla de resumen # A los nombres viejos y nuevos los voy a llamar index index_nuevos = dict(zip(tab1.columns, nuevos)) # Con esto cambio el nombre de las columnas columns_nuevos = { "mean": "Media", "std": "Desviación estándar", "count": "Observaciones", } # Con Summary table voy a hacer un rename de index de columnas y filas sum_tab.rename(index=index_nuevos, columns=columns_nuevos, inplace=True)
# Mostramos la tabla de resultados sum_tab
# Cambiamos la cantidad de decimales que me va mostrar # En Media y Desviación estándar mostramos solo 2 decimales # Observaciones como es un número entero se indica que no muestre ningún decimal sum_tab.style.format(subset="Media", precision=2).\ format(subset="Desviación estándar", precision=2).format(subset="Observaciones", precision=0)
# Generamos un código para poder exportar está tabla a LaTex sum_tab.style.format(subset="Media", precision=2).format(subset="Desviación estándar", precision=2)\ .format(subset="Observaciones", precision=0)\ .to_latex( "sum.tex", caption="Estadísticas Descriptivas", column_format = "lccc" ) # Indicamos que la primera columna vaya a la izquierda y las otras tres columnas con formato centrado # Es decir, el l significa left y cada c significa center # El código lo copiamos y pegamos en LaTex

Model OLS (1):

  • Variable endógena: any_prio

  • Variables explicativas: GPCP_g y GPCP_g_l

  • Si efectos fijos (country)

  • Si country-time trends

  • Errores estándar robustos (Huber-White robust)

  • Los términos de perturbación están clusterizados (agrupados) a nivel país

  • No se añade variables de control

# Definimos la variable endógena y1 = repdata["any_prio"] # Agregamos las variables explicativas X1 = sm.add_constant(repdata.loc[:,["GPCP_g", "GPCP_g_l"]]) X1
# Planteamos el modelo de homogeneidad homocedástica ols_model_1 = sm.OLS(y1, X1).fit()
# Mostramos la primera tabla del modelo OLS # En este extracto encontramos información general de las variables # Podemos ver el R cuadrado y el ajustado, así como el F estadístico y número de observaciones # Recordemos que en Python los valores empiezan a contar a partir de 0 ols_model_1.summary().tables[0]
# Mostramos la segunda tabla del modelo OLS # Podemos ver los valores de los coeficientes y las desviaciones estándar de las variables # Además, el p-value para saber su significancia y los intervalos de confianza ols_model_1.summary().tables[1]
# Mostramos la tercera tabla del modelo OLS # Podemos ver algunos test como el Durbin Watson, Jarque-Bera y la Kurtosis ols_model_1.summary().tables[2]
# Imprimimos todas las tablas juntas # Vemos que simula bastante bien la presentación de Stata print(ols_model_1.summary())
OLS Regression Results ============================================================================== Dep. Variable: any_prio R-squared: 0.003 Model: OLS Adj. R-squared: 0.000 Method: Least Squares F-statistic: 1.008 Date: Sun, 11 Dec 2022 Prob (F-statistic): 0.366 Time: 04:33:14 Log-Likelihood: -448.04 No. Observations: 743 AIC: 902.1 Df Residuals: 740 BIC: 915.9 Df Model: 2 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ const 0.2697 0.016 16.449 0.000 0.238 0.302 GPCP_g -0.0288 0.085 -0.339 0.735 -0.196 0.138 GPCP_g_l -0.1204 0.086 -1.397 0.163 -0.290 0.049 ============================================================================== Omnibus: 189.379 Durbin-Watson: 0.530 Prob(Omnibus): 0.000 Jarque-Bera (JB): 159.939 Skew: 1.044 Prob(JB): 1.86e-35 Kurtosis: 2.104 Cond. No. 6.26 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
# Errores estándar robustos # HC1: error estándar robusto frente a la heterocedasticidad ols_model_rb = sm.OLS(y1, X1).fit(cov_type = "HC1") print(ols_model_rb.summary())
OLS Regression Results ============================================================================== Dep. Variable: any_prio R-squared: 0.003 Model: OLS Adj. R-squared: 0.000 Method: Least Squares F-statistic: 1.014 Date: Sun, 11 Dec 2022 Prob (F-statistic): 0.363 Time: 04:33:14 Log-Likelihood: -448.04 No. Observations: 743 AIC: 902.1 Df Residuals: 740 BIC: 915.9 Df Model: 2 Covariance Type: HC1 ============================================================================== coef std err z P>|z| [0.025 0.975] ------------------------------------------------------------------------------ const 0.2697 0.016 16.374 0.000 0.237 0.302 GPCP_g -0.0288 0.090 -0.321 0.748 -0.205 0.147 GPCP_g_l -0.1204 0.087 -1.391 0.164 -0.290 0.049 ============================================================================== Omnibus: 189.379 Durbin-Watson: 0.530 Prob(Omnibus): 0.000 Jarque-Bera (JB): 159.939 Skew: 1.044 Prob(JB): 1.86e-35 Kurtosis: 2.104 Cond. No. 6.26 ============================================================================== Notes: [1] Standard Errors are heteroscedasticity robust (HC1)
# Acceder a la información de la tabla ols_model_rb.summary2() ols_model_rb.summary2().tables[1]
# Lista de atributos y métodos dir(ols_model_rb)
['HC0_se', 'HC1_se', 'HC2_se', 'HC3_se', '_HCCM', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_abat_diagonal', '_cache', '_data_attr', '_data_in_cache', '_get_robustcov_results', '_is_nested', '_use_t', '_wexog_singular_values', 'aic', 'bic', 'bse', 'centered_tss', 'compare_f_test', 'compare_lm_test', 'compare_lr_test', 'condition_number', 'conf_int', 'conf_int_el', 'cov_HC0', 'cov_HC1', 'cov_HC2', 'cov_HC3', 'cov_kwds', 'cov_params', 'cov_params_default', 'cov_type', 'df_model', 'df_resid', 'diagn', 'eigenvals', 'el_test', 'ess', 'f_pvalue', 'f_test', 'fittedvalues', 'fvalue', 'get_influence', 'get_prediction', 'get_robustcov_results', 'het_scale', 'info_criteria', 'initialize', 'k_constant', 'llf', 'load', 'model', 'mse_model', 'mse_resid', 'mse_total', 'nobs', 'normalized_cov_params', 'outlier_test', 'params', 'predict', 'pvalues', 'remove_data', 'resid', 'resid_pearson', 'rsquared', 'rsquared_adj', 'save', 'scale', 'ssr', 'summary', 'summary2', 't_test', 't_test_pairwise', 'tvalues', 'uncentered_tss', 'use_t', 'wald_test', 'wald_test_terms', 'wresid']
# Y estimados a partir del método predict sm.OLS(y1, X1).fit().predict()
array([0.28496978, 0.2748217 , 0.25195793, 0.27214113, 0.26302426, 0.26568972, 0.2466797 , 0.27791809, 0.25526007, 0.28224195, 0.26917588, 0.30081865, 0.28540232, 0.21866934, 0.2861791 , 0.27253843, 0.27999266, 0.27934855, 0.26917232, 0.28043463, 0.27237109, 0.26828123, 0.27929381, 0.25381756, 0.26769429, 0.27252959, 0.27744108, 0.24540483, 0.2622242 , 0.28374367, 0.22578955, 0.28753694, 0.26812988, 0.25210689, 0.2885355 , 0.26993623, 0.26967236, 0.25559903, 0.24386254, 0.2739153 , 0.30170927, 0.26866805, 0.28991521, 0.24865613, 0.26155857, 0.27105444, 0.17496034, 0.29626138, 0.29942372, 0.23988976, 0.30644488, 0.18201347, 0.27697215, 0.25379256, 0.24793872, 0.26968144, 0.29971203, 0.27894354, 0.27432929, 0.27281134, 0.28310697, 0.25005254, 0.24888163, 0.27129552, 0.27677362, 0.2524269 , 0.27155199, 0.28413026, 0.2312682 , 0.28199823, 0.27089332, 0.2498764 , 0.28802241, 0.26710212, 0.26587571, 0.26061178, 0.2816283 , 0.25395249, 0.24080911, 0.29886544, 0.26830424, 0.24838017, 0.28130712, 0.26336496, 0.25129281, 0.27310025, 0.28621181, 0.27746349, 0.28334718, 0.287771 , 0.26165149, 0.26820117, 0.26669159, 0.26500867, 0.28311481, 0.26802954, 0.27617081, 0.26463664, 0.2899249 , 0.25148575, 0.26197403, 0.2830811 , 0.27255562, 0.25724725, 0.27454112, 0.25363973, 0.27860369, 0.26622255, 0.26348373, 0.27386322, 0.2701804 , 0.26925542, 0.27306353, 0.27548579, 0.2667854 , 0.26456781, 0.26831091, 0.28676397, 0.27466626, 0.24652724, 0.27373615, 0.26641863, 0.25932942, 0.26635622, 0.26933033, 0.27183976, 0.27969916, 0.27150216, 0.26157324, 0.27072618, 0.27180415, 0.27647364, 0.25294516, 0.24895213, 0.2898935 , 0.28356814, 0.28855814, 0.28652282, 0.23272215, 0.28414438, 0.26690943, 0.21789304, 0.30029623, 0.29493282, 0.21974803, 0.26397938, 0.27688917, 0.19916378, 0.29395434, 0.27651376, 0.27886489, 0.24405499, 0.23491972, 0.26913932, 0.2614215 , 0.28823138, 0.23593892, 0.26834563, 0.28889719, 0.26150323, 0.25476589, 0.28021396, 0.2785566 , 0.28739284, 0.25638399, 0.2685143 , 0.26601195, 0.266387 , 0.28047375, 0.2795782 , 0.24956602, 0.27618855, 0.21320236, 0.28918143, 0.28556657, 0.29926377, 0.23754775, 0.27906998, 0.26678421, 0.24210984, 0.26576587, 0.31459122, 0.27405136, 0.24264627, 0.277868 , 0.29356047, 0.28646617, 0.23867612, 0.27209826, 0.25694241, 0.2604217 , 0.27243465, 0.28738483, 0.27930561, 0.25147996, 0.24456337, 0.28121169, 0.26633678, 0.23019748, 0.29172668, 0.28749903, 0.24031677, 0.27051027, 0.26277372, 0.28804177, 0.2257001 , 0.27398469, 0.27986473, 0.27040816, 0.25399174, 0.28285273, 0.28265373, 0.28293827, 0.25610697, 0.26501434, 0.27097668, 0.26839272, 0.27354068, 0.27955355, 0.24731268, 0.28533431, 0.24160057, 0.28978968, 0.29282641, 0.24800285, 0.24949416, 0.23826094, 0.26310096, 0.25438468, 0.28716589, 0.29769171, 0.26526959, 0.26059674, 0.25591369, 0.25387336, 0.29156528, 0.26901609, 0.27783372, 0.26318861, 0.28377692, 0.28088419, 0.28861658, 0.2754023 , 0.21938379, 0.26384442, 0.27896696, 0.25244708, 0.28188856, 0.25745628, 0.28301216, 0.24279603, 0.30309932, 0.26144509, 0.27874158, 0.25212564, 0.27821526, 0.27051771, 0.2818894 , 0.27417885, 0.25867843, 0.29059362, 0.29452066, 0.26426166, 0.25535766, 0.24858263, 0.26895224, 0.26055379, 0.26625866, 0.27383444, 0.26121317, 0.27357787, 0.27429979, 0.25721523, 0.29231413, 0.27848952, 0.27543343, 0.26381051, 0.27067454, 0.24849388, 0.27312393, 0.29277768, 0.25732122, 0.26590736, 0.25722482, 0.24220079, 0.25835867, 0.27736391, 0.29110722, 0.25071988, 0.27249154, 0.24667352, 0.28453534, 0.26808795, 0.26916287, 0.27444919, 0.27196229, 0.28122464, 0.2752372 , 0.2750082 , 0.28815601, 0.22531638, 0.27212014, 0.27465749, 0.26100753, 0.27865461, 0.260037 , 0.28519386, 0.26132007, 0.2827029 , 0.2640916 , 0.26392098, 0.26866438, 0.28346345, 0.27598611, 0.26640891, 0.29957058, 0.22487442, 0.24185624, 0.3151507 , 0.26392006, 0.25288431, 0.2861609 , 0.28180334, 0.24162488, 0.26748908, 0.2813641 , 0.29542743, 0.26492747, 0.27644114, 0.24360548, 0.28417196, 0.26034035, 0.17026035, 0.30999498, 0.27696997, 0.23869529, 0.29867055, 0.25692226, 0.27413664, 0.25267573, 0.25573401, 0.24826106, 0.26042945, 0.29312151, 0.29211235, 0.25152075, 0.29296429, 0.22099036, 0.28299408, 0.2356418 , 0.24959036, 0.28636198, 0.27331616, 0.28060294, 0.26493497, 0.28194248, 0.29095738, 0.24315764, 0.27541201, 0.25938504, 0.26466378, 0.26737449, 0.26595197, 0.29768923, 0.26383165, 0.25954319, 0.25987779, 0.28399343, 0.26380928, 0.28600594, 0.25770208, 0.28576176, 0.25631652, 0.254864 , 0.29963553, 0.25451147, 0.27546931, 0.2578158 , 0.26132511, 0.26856981, 0.27780048, 0.27176283, 0.27174748, 0.27706396, 0.29420754, 0.23583807, 0.28863299, 0.24272018, 0.26333879, 0.28775854, 0.29702876, 0.22687502, 0.24580235, 0.30930804, 0.25007445, 0.30066286, 0.22727061, 0.29096045, 0.25035398, 0.25630239, 0.26997338, 0.28689748, 0.26815232, 0.28173516, 0.29287411, 0.27978217, 0.26405809, 0.24444535, 0.26822766, 0.27884218, 0.22996273, 0.28585741, 0.28870403, 0.24169328, 0.27014081, 0.26660939, 0.22146699, 0.30172142, 0.27983771, 0.26684136, 0.23897902, 0.27601925, 0.26925608, 0.3101956 , 0.27663425, 0.25205277, 0.2547879 , 0.27424848, 0.28229888, 0.223476 , 0.2662009 , 0.32331318, 0.26671921, 0.2668772 , 0.2074825 , 0.22536602, 0.28593027, 0.30477405, 0.26808999, 0.24947935, 0.25745349, 0.27321761, 0.2748773 , 0.27776485, 0.23928001, 0.2635465 , 0.28749362, 0.29472081, 0.21862439, 0.2676335 , 0.30080166, 0.26966232, 0.29306065, 0.23688644, 0.28930969, 0.24584524, 0.24023323, 0.26620497, 0.28865302, 0.25440343, 0.27250326, 0.28728198, 0.07583003, 0.31090373, 0.29627702, 0.236097 , 0.21528979, 0.31659791, 0.26719918, 0.29998336, 0.27639208, 0.29233801, 0.27626678, 0.23871984, 0.29270098, 0.28971898, 0.19109602, 0.29560864, 0.29507539, 0.20492604, 0.2788876 , 0.25891261, 0.17444965, 0.31339199, 0.27884491, 0.26400569, 0.22305778, 0.27565457, 0.28403302, 0.27106399, 0.28610541, 0.25836077, 0.25912874, 0.27306704, 0.27233956, 0.24449237, 0.26971896, 0.27261432, 0.26493679, 0.27394499, 0.26721449, 0.25684216, 0.27022947, 0.27542375, 0.26995902, 0.27579783, 0.28002371, 0.25709103, 0.24234488, 0.296826 , 0.26192363, 0.25365127, 0.2864295 , 0.24891512, 0.24938813, 0.28482997, 0.27828059, 0.28726476, 0.28035497, 0.27157612, 0.25801002, 0.26678263, 0.26853841, 0.27516955, 0.27874475, 0.28386285, 0.24604537, 0.28752855, 0.29526714, 0.24616023, 0.2486039 , 0.24670796, 0.26244128, 0.25179474, 0.28499766, 0.3037 , 0.26479621, 0.26168092, 0.25250814, 0.25671517, 0.28608606, 0.27361334, 0.27955091, 0.25924962, 0.28006212, 0.2549075 , 0.28531706, 0.2908116 , 0.26898266, 0.25820194, 0.24600473, 0.27005837, 0.27632638, 0.26294027, 0.27953282, 0.26117204, 0.26488158, 0.27616653, 0.27326821, 0.28086191, 0.28593079, 0.28560771, 0.26892224, 0.28822231, 0.16313024, 0.25497695, 0.32293383, 0.21249017, 0.27262115, 0.30609214, 0.16679669, 0.27783477, 0.23734724, 0.29606521, 0.26716866, 0.2454338 , 0.30372741, 0.27167846, 0.26917541, 0.24335986, 0.28579254, 0.25898906, 0.24126848, 0.29032779, 0.29051066, 0.24820739, 0.29564777, 0.22738325, 0.27717002, 0.22919359, 0.25948569, 0.28919155, 0.28430183, 0.26479902, 0.27528442, 0.2899312 , 0.28356204, 0.28107796, 0.21581487, 0.27031131, 0.25826531, 0.24656602, 0.28470031, 0.29621292, 0.25227005, 0.26035504, 0.26259964, 0.26482224, 0.26159492, 0.25209917, 0.3018435 , 0.25325603, 0.25525354, 0.30793087, 0.21737779, 0.2329495 , 0.31341325, 0.28866529, 0.23647127, 0.27112947, 0.27523367, 0.29539826, 0.28145113, 0.28730419, 0.18898682, 0.28578298, 0.22418157, 0.25214535, 0.28336495, 0.28970836, 0.29180466, 0.27580531, 0.2309655 , 0.28955376, 0.25599223, 0.27000073, 0.28290819, 0.28032227, 0.25336004, 0.25340982, 0.29365966, 0.27637294, 0.28032937, 0.28603889, 0.26034879, 0.26022068, 0.27697228, 0.24887283, 0.28978698, 0.28490459, 0.28200161, 0.28765797, 0.26949008, 0.22787498, 0.27018998, 0.28271041, 0.25357493, 0.26073737, 0.27463583, 0.2798359 , 0.24296043, 0.30306325, 0.25136765, 0.26761196, 0.2565544 , 0.28044399, 0.26945989, 0.28381678, 0.27537043, 0.24773015, 0.24674011, 0.30349319, 0.27601743, 0.22736574, 0.28421549, 0.25956987, 0.23834513, 0.2883796 , 0.26600035, 0.27435681, 0.28586025, 0.28055657, 0.25998459, 0.27036974, 0.25652407, 0.27700439, 0.28883738, 0.26650265, 0.26988379, 0.2485796 , 0.29300512, 0.26148703, 0.25306436, 0.27810453, 0.25796663, 0.25084136, 0.26911237, 0.28340842, 0.2867332 , 0.28721706, 0.26366116, 0.27082614, 0.26701527, 0.27578898, 0.28386679, 0.28645862, 0.27412183, 0.26185508, 0.28062735, 0.2602632 , 0.26023209, 0.26803586, 0.29506981, 0.24185199, 0.2570696 , 0.29029694, 0.27443058, 0.29234101, 0.22388564, 0.30235436, 0.26654653, 0.25558241, 0.27048435, 0.28396046, 0.25304765, 0.25309798, 0.31575402, 0.26484374, 0.24554492, 0.22660271, 0.28882671, 0.29258781, 0.22467728, 0.28253382, 0.28212012, 0.29394026, 0.27481921, 0.21302369, 0.30612175, 0.24754037, 0.20656903, 0.26960102, 0.28885259])
# predict para ello uso la función predict ols_model_1.predict(X1) # acceso a los parámetros print(ols_model_1.params) # R2 y R2 ajustado print(ols_model_1.rsquared) print(ols_model_1.rsquared_adj)
const 0.269715 GPCP_g -0.028825 GPCP_g_l -0.120448 dtype: float64 0.0027161400455674167 2.0778261906828632e-05
control_formula = "any_prio ~ GPCP_g + GPCP_g_l" ols_model_1 = smf.ols(control_formula, data=repdata).fit() print(ols_model_1.summary())
OLS Regression Results ============================================================================== Dep. Variable: any_prio R-squared: 0.003 Model: OLS Adj. R-squared: 0.000 Method: Least Squares F-statistic: 1.008 Date: Sun, 11 Dec 2022 Prob (F-statistic): 0.366 Time: 04:33:15 Log-Likelihood: -448.04 No. Observations: 743 AIC: 902.1 Df Residuals: 740 BIC: 915.9 Df Model: 2 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 0.2697 0.016 16.449 0.000 0.238 0.302 GPCP_g -0.0288 0.085 -0.339 0.735 -0.196 0.138 GPCP_g_l -0.1204 0.086 -1.397 0.163 -0.290 0.049 ============================================================================== Omnibus: 189.379 Durbin-Watson: 0.530 Prob(Omnibus): 0.000 Jarque-Bera (JB): 159.939 Skew: 1.044 Prob(JB): 1.86e-35 Kurtosis: 2.104 Cond. No. 6.26 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
ols_model_skl_1 = linear_model.LinearRegression().fit(X1, y1) ols_model_skl_1.coef_ # Acceso a coeficientes ols_model_skl_1.predict(X1) # Predicción en formato array ols_model_skl_1.score(X1,y1) # R cuadrado
0.0027161400455674167
dir(ols_model_skl_1)
['__abstractmethods__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_abc_impl', '_check_feature_names', '_check_n_features', '_decision_function', '_estimator_type', '_get_param_names', '_get_tags', '_more_tags', '_preprocess_data', '_repr_html_', '_repr_html_inner', '_repr_mimebundle_', '_residues', '_set_intercept', '_validate_data', 'coef_', 'copy_X', 'feature_names_in_', 'fit', 'fit_intercept', 'get_params', 'intercept_', 'n_features_in_', 'n_jobs', 'normalize', 'positive', 'predict', 'rank_', 'score', 'set_params', 'singular_']
mean_squared_error(y1, ols_model_1.predict())**0.5 # Errores cuadráticos medios
0.4422283305322858
# Efectos Fijos por países index_columns = np.where( repdata.columns.str.contains( '_time$'))[0] # Índice con nombre de variables que terminan con _time country_trend = repdata.columns[index_columns] # se extrae el nombre de todas las variables que terminan con _time country_trend
Index(['ccode_404_time', 'ccode_420_time', 'ccode_432_time', 'ccode_433_time', 'ccode_434_time', 'ccode_435_time', 'ccode_436_time', 'ccode_437_time', 'ccode_438_time', 'ccode_439_time', 'ccode_450_time', 'ccode_451_time', 'ccode_452_time', 'ccode_461_time', 'ccode_471_time', 'ccode_475_time', 'ccode_481_time', 'ccode_482_time', 'ccode_483_time', 'ccode_484_time', 'ccode_490_time', 'ccode_500_time', 'ccode_501_time', 'ccode_510_time', 'ccode_516_time', 'ccode_517_time', 'ccode_520_time', 'ccode_522_time', 'ccode_530_time', 'ccode_540_time', 'ccode_541_time', 'ccode_551_time', 'ccode_552_time', 'ccode_553_time', 'ccode_560_time', 'ccode_565_time', 'ccode_570_time', 'ccode_571_time', 'ccode_572_time', 'ccode_580_time', 'ccode_625_time'], dtype='object')
formula_model_1 = "any_prio ~ GPCP_g + GPCP_g_l + C(ccode)" + ' + ' + ' + '.join(country_trend) ols_model_1 = smf.ols(formula_model_1, data=repdata).fit(cov_type='cluster', cov_kwds={'groups': repdata['ccode']}) print(ols_model_1.summary()) rmse_ols_1 = round(mean_squared_error(y1, ols_model_1.predict())**0.5, 2) print(rmse_ols_1)
OLS Regression Results ============================================================================== Dep. Variable: any_prio R-squared: 0.708 Model: OLS Adj. R-squared: 0.672 Method: Least Squares F-statistic: 162.4 Date: Sun, 11 Dec 2022 Prob (F-statistic): 6.28e-20 Time: 04:33:16 Log-Likelihood: 8.6565 No. Observations: 743 AIC: 150.7 Df Residuals: 659 BIC: 538.0 Df Model: 83 Covariance Type: cluster ===================================================================================== coef std err z P>|z| [0.025 0.975] ------------------------------------------------------------------------------------- Intercept -0.2458 0.004 -66.965 0.000 -0.253 -0.239 C(ccode)[T.420.0] 0.4921 0.001 468.956 0.000 0.490 0.494 C(ccode)[T.432.0] 0.2600 0.008 32.651 0.000 0.244 0.276 C(ccode)[T.433.0] -0.0986 0.002 -59.007 0.000 -0.102 -0.095 C(ccode)[T.434.0] 0.2438 0.004 57.888 0.000 0.236 0.252 C(ccode)[T.435.0] 0.2377 0.009 26.715 0.000 0.220 0.255 C(ccode)[T.436.0] 0.1243 0.013 9.406 0.000 0.098 0.150 C(ccode)[T.437.0] 0.2450 0.004 61.386 0.000 0.237 0.253 C(ccode)[T.438.0] -0.0061 0.003 -2.184 0.029 -0.012 -0.001 C(ccode)[T.439.0] 0.4443 0.004 105.272 0.000 0.436 0.453 C(ccode)[T.450.0] -0.3559 0.004 -92.197 0.000 -0.363 -0.348 C(ccode)[T.451.0] -0.2257 0.002 -120.766 0.000 -0.229 -0.222 C(ccode)[T.452.0] 0.6882 0.004 159.022 0.000 0.680 0.697 C(ccode)[T.461.0] 0.4129 0.005 88.451 0.000 0.404 0.422 C(ccode)[T.471.0] 0.4257 0.004 106.398 0.000 0.418 0.433 C(ccode)[T.475.0] 0.2433 0.005 44.966 0.000 0.233 0.254 C(ccode)[T.481.0] 0.2542 0.002 162.114 0.000 0.251 0.257 C(ccode)[T.482.0] 0.2466 0.003 72.125 0.000 0.240 0.253 C(ccode)[T.483.0] 1.3664 0.008 162.199 0.000 1.350 1.383 C(ccode)[T.484.0] -0.0916 0.002 -58.163 0.000 -0.095 -0.088 C(ccode)[T.490.0] 1.3481 0.001 1581.192 0.000 1.346 1.350 C(ccode)[T.500.0] 1.2554 0.002 743.377 0.000 1.252 1.259 C(ccode)[T.501.0] 0.4613 0.005 92.925 0.000 0.452 0.471 C(ccode)[T.510.0] 0.2446 0.004 60.659 0.000 0.237 0.252 C(ccode)[T.516.0] -0.1279 0.001 -184.968 0.000 -0.129 -0.127 C(ccode)[T.517.0] -0.1171 0.001 -198.672 0.000 -0.118 -0.116 C(ccode)[T.520.0] 1.2725 0.021 59.568 0.000 1.231 1.314 C(ccode)[T.522.0] -0.0132 0.008 -1.660 0.097 -0.029 0.002 C(ccode)[T.530.0] 1.3320 0.002 620.380 0.000 1.328 1.336 C(ccode)[T.540.0] 1.2494 0.002 611.501 0.000 1.245 1.253 C(ccode)[T.541.0] 1.7636 0.004 405.215 0.000 1.755 1.772 C(ccode)[T.551.0] 0.2440 0.005 53.114 0.000 0.235 0.253 C(ccode)[T.552.0] -0.0049 0.003 -1.585 0.113 -0.011 0.001 C(ccode)[T.553.0] 0.2460 0.005 44.830 0.000 0.235 0.257 C(ccode)[T.560.0] 1.7528 0.002 796.854 0.000 1.749 1.757 C(ccode)[T.565.0] -1.4419 0.030 -47.517 0.000 -1.501 -1.382 C(ccode)[T.570.0] 0.1329 0.002 74.355 0.000 0.129 0.136 C(ccode)[T.571.0] 0.2493 0.002 110.308 0.000 0.245 0.254 C(ccode)[T.572.0] 0.2519 0.001 329.982 0.000 0.250 0.253 C(ccode)[T.580.0] 0.2488 0.002 147.237 0.000 0.245 0.252 C(ccode)[T.625.0] 0.7557 0.003 246.088 0.000 0.750 0.762 GPCP_g -0.0238 0.043 -0.550 0.582 -0.108 0.061 GPCP_g_l -0.1219 0.052 -2.352 0.019 -0.224 -0.020 ccode_404_time 0.0295 0.000 185.142 0.000 0.029 0.030 ccode_420_time -0.0160 0.000 -143.094 0.000 -0.016 -0.016 ccode_432_time 0.0078 0.001 15.618 0.000 0.007 0.009 ccode_433_time 0.0595 5.87e-05 1014.282 0.000 0.059 0.060 ccode_434_time 0.0004 0.000 2.272 0.023 4.97e-05 0.001 ccode_435_time 0.0009 0.001 1.469 0.142 -0.000 0.002 ccode_436_time 0.0369 0.001 34.562 0.000 0.035 0.039 ccode_437_time -1.254e-05 5.51e-06 -2.276 0.023 -2.33e-05 -1.74e-06 ccode_438_time 0.0297 0.000 270.544 0.000 0.030 0.030 ccode_439_time -0.0032 0.000 -19.303 0.000 -0.004 -0.003 ccode_450_time 0.1090 0.000 656.335 0.000 0.109 0.109 ccode_451_time 0.0786 0.000 259.266 0.000 0.078 0.079 ccode_452_time -0.0281 0.000 -211.081 0.000 -0.028 -0.028 ccode_461_time -0.0052 0.000 -42.688 0.000 -0.005 -0.005 ccode_471_time -0.0106 8.85e-05 -119.747 0.000 -0.011 -0.010 ccode_475_time 0.0002 0.000 1.308 0.191 -0.000 0.001 ccode_481_time -0.0005 0.000 -2.518 0.012 -0.001 -0.000 ccode_482_time 4.412e-05 4.9e-05 0.901 0.368 -5.19e-05 0.000 ccode_483_time -0.0185 0.001 -31.860 0.000 -0.020 -0.017 ccode_484_time 0.0415 0.000 162.672 0.000 0.041 0.042 ccode_490_time -0.0379 0.000 -88.293 0.000 -0.039 -0.037 ccode_500_time -0.0095 0.000 -25.149 0.000 -0.010 -0.009 ccode_501_time -0.0133 0.000 -45.306 0.000 -0.014 -0.013 ccode_510_time -3.845e-05 2.79e-05 -1.376 0.169 -9.32e-05 1.63e-05 ccode_516_time 0.0662 0.000 249.253 0.000 0.066 0.067 ccode_517_time 0.0697 0.000 244.485 0.000 0.069 0.070 ccode_520_time -0.0015 0.002 -0.705 0.481 -0.006 0.003 ccode_522_time 0.0437 0.001 30.559 0.000 0.041 0.047 ccode_530_time -0.0246 6.17e-05 -398.471 0.000 -0.025 -0.024 ccode_540_time -0.0004 0.000 -2.421 0.015 -0.001 -6.67e-05 ccode_541_time -0.0737 0.000 -337.720 0.000 -0.074 -0.073 ccode_551_time 5.597e-05 5.52e-05 1.014 0.310 -5.22e-05 0.000 ccode_552_time 0.0301 0.000 159.587 0.000 0.030 0.030 ccode_553_time 7.894e-05 0.000 0.257 0.797 -0.001 0.001 ccode_560_time -0.0684 5.73e-05 -1192.513 0.000 -0.068 -0.068 ccode_565_time 0.1134 0.001 80.950 0.000 0.111 0.116 ccode_570_time 0.0142 0.000 100.113 0.000 0.014 0.014 ccode_571_time 0.0002 0.000 1.506 0.132 -6.68e-05 0.001 ccode_572_time -0.0001 5.78e-05 -2.521 0.012 -0.000 -3.24e-05 ccode_580_time -0.0002 0.000 -1.087 0.277 -0.001 0.000 ccode_625_time 0.0331 3.67e-05 901.316 0.000 0.033 0.033 ============================================================================== Omnibus: 91.156 Durbin-Watson: 1.478 Prob(Omnibus): 0.000 Jarque-Bera (JB): 421.027 Skew: 0.452 Prob(JB): 3.76e-92 Kurtosis: 6.575 Cond. No. 226. ============================================================================== Notes: [1] Standard Errors are robust to cluster correlation (cluster) 0.24

Model OLS (2):

  • Variable endógena: war_prio

  • Variables explicativas: GPCP_g y GPCP_g_l

  • Si efectos fijos (country)

  • Si country-time trends

  • Errores estándar robustos (Huber-White robust)

  • Los términos de perturbación están clusterizados (agrupados) a nivel país

  • No se añade variables de control

# Definimos la variable endógena y2 = repdata["war_prio"] # Agregamos las variables explicativas X2 = sm.add_constant(repdata.loc[:,["GPCP_g", "GPCP_g_l"]]) X2
# Planteamos el modelo de homogeneidad homocedástica ols_model_2 = sm.OLS(y2, X2).fit() # Imprimimos todas las tablas juntas # Vemos que simula bastante bien la presentación de Stata print(ols_model_2.summary())
OLS Regression Results ============================================================================== Dep. Variable: war_prio R-squared: 0.003 Model: OLS Adj. R-squared: 0.001 Method: Least Squares F-statistic: 1.200 Date: Sun, 11 Dec 2022 Prob (F-statistic): 0.302 Time: 04:33:17 Log-Likelihood: -320.10 No. Observations: 743 AIC: 646.2 Df Residuals: 740 BIC: 660.0 Df Model: 2 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ const 0.1697 0.014 12.292 0.000 0.143 0.197 GPCP_g -0.0977 0.072 -1.363 0.173 -0.238 0.043 GPCP_g_l -0.0891 0.073 -1.228 0.220 -0.232 0.053 ============================================================================== Omnibus: 216.896 Durbin-Watson: 0.482 Prob(Omnibus): 0.000 Jarque-Bera (JB): 434.329 Skew: 1.777 Prob(JB): 4.86e-95 Kurtosis: 4.181 Cond. No. 6.26 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
# Errores estándar robustos # HC1: error estándar robusto frente a la heterocedasticidad ols_model_rb2 = sm.OLS(y2, X2).fit(cov_type = "HC1") print(ols_model_rb2.summary())
OLS Regression Results ============================================================================== Dep. Variable: war_prio R-squared: 0.003 Model: OLS Adj. R-squared: 0.001 Method: Least Squares F-statistic: 1.507 Date: Sun, 11 Dec 2022 Prob (F-statistic): 0.222 Time: 04:33:17 Log-Likelihood: -320.10 No. Observations: 743 AIC: 646.2 Df Residuals: 740 BIC: 660.0 Df Model: 2 Covariance Type: HC1 ============================================================================== coef std err z P>|z| [0.025 0.975] ------------------------------------------------------------------------------ const 0.1697 0.014 12.142 0.000 0.142 0.197 GPCP_g -0.0977 0.066 -1.474 0.140 -0.228 0.032 GPCP_g_l -0.0891 0.063 -1.412 0.158 -0.213 0.035 ============================================================================== Omnibus: 216.896 Durbin-Watson: 0.482 Prob(Omnibus): 0.000 Jarque-Bera (JB): 434.329 Skew: 1.777 Prob(JB): 4.86e-95 Kurtosis: 4.181 Cond. No. 6.26 ============================================================================== Notes: [1] Standard Errors are heteroscedasticity robust (HC1)
# Lista de atributos y métodos dir(ols_model_rb2) # Y estimados a partir del método predict sm.OLS(y2, X2).fit().predict()
array([0.18704291, 0.16156761, 0.1591636 , 0.16689364, 0.16611632, 0.15020309, 0.16052623, 0.16445212, 0.1678748 , 0.17493067, 0.18461317, 0.21096717, 0.14646171, 0.14220873, 0.18236471, 0.17711868, 0.18211489, 0.18210699, 0.14566715, 0.17998848, 0.16871667, 0.17702256, 0.16698367, 0.15666318, 0.1680248 , 0.17984615, 0.16204504, 0.1430644 , 0.1802974 , 0.14964679, 0.14789666, 0.18521695, 0.15446167, 0.16866698, 0.18319467, 0.17212033, 0.15998443, 0.16219266, 0.14809908, 0.19410448, 0.18898141, 0.18438161, 0.1734193 , 0.14512834, 0.17947203, 0.1080412 , 0.11027748, 0.21478754, 0.16391527, 0.1845413 , 0.13976727, 0.11104919, 0.16801464, 0.14509102, 0.14936252, 0.18708431, 0.19845518, 0.17957136, 0.17248373, 0.18268706, 0.1702437 , 0.14207546, 0.15359244, 0.1777945 , 0.16454007, 0.15436827, 0.186385 , 0.15436469, 0.14794209, 0.18320195, 0.15511713, 0.166916 , 0.18187508, 0.16644452, 0.16207225, 0.15871311, 0.17390356, 0.13542463, 0.16609041, 0.1940256 , 0.15327744, 0.16152694, 0.17701248, 0.15333377, 0.15588651, 0.18178009, 0.18544734, 0.18102702, 0.19238042, 0.17812424, 0.16287117, 0.16802763, 0.16162038, 0.17802261, 0.16563186, 0.17415717, 0.16759646, 0.18110427, 0.17467989, 0.14947877, 0.1714517 , 0.18359009, 0.16247644, 0.16628072, 0.16164332, 0.1636782 , 0.17512774, 0.16253175, 0.16759689, 0.17313232, 0.16962104, 0.16977008, 0.17919028, 0.15981144, 0.16498076, 0.16277749, 0.17780929, 0.18909981, 0.15799448, 0.15521208, 0.17201592, 0.16114667, 0.15991444, 0.16695022, 0.16935811, 0.17699674, 0.17946938, 0.16566712, 0.16434882, 0.17016698, 0.17781272, 0.16502038, 0.15318973, 0.16544964, 0.19148407, 0.18787251, 0.20031107, 0.15687086, 0.14979529, 0.18722649, 0.13143828, 0.14516687, 0.21543774, 0.15845881, 0.12520563, 0.18152704, 0.12671072, 0.13232035, 0.18971026, 0.18393917, 0.16208728, 0.14271711, 0.14576751, 0.16000941, 0.18016869, 0.16290845, 0.14075029, 0.18143644, 0.18127924, 0.1526682 , 0.1646786 , 0.1798863 , 0.18942011, 0.17439827, 0.15942248, 0.1672099 , 0.16370274, 0.17189004, 0.18657038, 0.16575548, 0.14826875, 0.13594103, 0.13915283, 0.18840088, 0.20532517, 0.16983948, 0.15135654, 0.17843133, 0.1520727 , 0.14061413, 0.19234703, 0.21477279, 0.15514228, 0.15207831, 0.18710926, 0.20282314, 0.16165132, 0.14981917, 0.16472176, 0.15445135, 0.16228788, 0.18100661, 0.19059567, 0.16938383, 0.13874985, 0.15727079, 0.18267049, 0.13946622, 0.15154327, 0.19780696, 0.18044045, 0.15052487, 0.1614658 , 0.18290449, 0.15501597, 0.13844977, 0.17849413, 0.18040646, 0.15855663, 0.16498806, 0.18514041, 0.18950848, 0.17160514, 0.15637973, 0.16720823, 0.16970888, 0.16894229, 0.18159817, 0.16500872, 0.14367771, 0.16136921, 0.15740701, 0.20200474, 0.17496123, 0.14547437, 0.13512712, 0.14491251, 0.15345414, 0.16504363, 0.20073796, 0.18862624, 0.1619266 , 0.15739639, 0.14600772, 0.17220029, 0.18415935, 0.17434085, 0.17551435, 0.14825508, 0.18491573, 0.18730939, 0.1945623 , 0.14339232, 0.12653114, 0.17440473, 0.16309447, 0.16725978, 0.16760426, 0.17442337, 0.15696866, 0.17266447, 0.18704612, 0.17229268, 0.16373636, 0.1627915 , 0.17283451, 0.18546656, 0.14741111, 0.16374303, 0.1708573 , 0.20133781, 0.18597484, 0.15978412, 0.14547983, 0.15475955, 0.16405312, 0.15975333, 0.1710791 , 0.1670655 , 0.16449794, 0.17809304, 0.1619682 , 0.17368958, 0.19080806, 0.18097444, 0.16895817, 0.17033488, 0.15735606, 0.1521995 , 0.18861119, 0.17922437, 0.15908425, 0.16268817, 0.144761 , 0.14191308, 0.16210217, 0.19210516, 0.17207626, 0.16141921, 0.15482731, 0.16221672, 0.17979104, 0.16770973, 0.17097229, 0.17857576, 0.15472526, 0.18194385, 0.17270062, 0.1920322 , 0.15509649, 0.13716635, 0.17635244, 0.16594548, 0.17102634, 0.16738002, 0.17410421, 0.17367534, 0.17236854, 0.17645524, 0.16244065, 0.16286941, 0.17662267, 0.18396776, 0.17370455, 0.16101548, 0.16923238, 0.11190001, 0.17801593, 0.20261344, 0.15289799, 0.16481666, 0.19364133, 0.16133588, 0.1467095 , 0.17121662, 0.19580919, 0.18360753, 0.17473608, 0.15650069, 0.1570425 , 0.19069279, 0.09478098, 0.11662815, 0.22025171, 0.15045632, 0.1672967 , 0.18178798, 0.16524338, 0.16360239, 0.15097496, 0.14773611, 0.14540589, 0.17326466, 0.20525889, 0.16924749, 0.17906121, 0.15295605, 0.14632107, 0.16156667, 0.12915442, 0.1654211 , 0.18161083, 0.18346956, 0.1738644 , 0.16963028, 0.1964992 , 0.16736664, 0.15501128, 0.16811332, 0.15880366, 0.16602904, 0.1613465 , 0.1844751 , 0.19097083, 0.1609754 , 0.1533545 , 0.17307076, 0.17343374, 0.17810026, 0.17139475, 0.17215015, 0.17654417, 0.14518327, 0.18024761, 0.180942 , 0.16359373, 0.167556 , 0.15599243, 0.16161041, 0.17353556, 0.17738658, 0.16938193, 0.18410675, 0.19667618, 0.16255364, 0.16037175, 0.1678773 , 0.14420727, 0.17099655, 0.20556127, 0.16805829, 0.11572445, 0.18159303, 0.1800722 , 0.18210499, 0.16185073, 0.15424136, 0.17488568, 0.14798629, 0.15497048, 0.19050428, 0.14164697, 0.17298674, 0.19172098, 0.19318605, 0.17714965, 0.15039347, 0.14711426, 0.18074995, 0.14971273, 0.14660418, 0.1978318 , 0.16626958, 0.1476653 , 0.17641938, 0.13227487, 0.15285324, 0.19920126, 0.1795625 , 0.14991386, 0.13920319, 0.16828129, 0.19342648, 0.20616132, 0.16583065, 0.14727692, 0.15805624, 0.18741266, 0.15220377, 0.12490523, 0.20183799, 0.20601325, 0.17341799, 0.13514278, 0.09431298, 0.14184593, 0.20374632, 0.1962324 , 0.16160206, 0.12980427, 0.16257447, 0.17328315, 0.18290921, 0.15760305, 0.14187327, 0.17081467, 0.20606595, 0.15731004, 0.12554287, 0.18902469, 0.18770634, 0.19027183, 0.1625395 , 0.16043646, 0.17314178, 0.13489106, 0.14129788, 0.18517464, 0.15821201, 0.15011158, 0.21350717, 0.05465512, 0.0476425 , 0.21957367, 0.17853394, 0.10087474, 0.16877245, 0.16398743, 0.18667498, 0.19340881, 0.18671646, 0.19575813, 0.15283282, 0.15527513, 0.21186909, 0.13221655, 0.12165224, 0.21494396, 0.14650931, 0.1253552 , 0.18558154, 0.0949594 , 0.12566243, 0.20696712, 0.17983861, 0.13608168, 0.13428499, 0.1836093 , 0.17832032, 0.18238577, 0.17625905, 0.15436404, 0.16265776, 0.1776055 , 0.15578372, 0.1503598 , 0.17238686, 0.16816736, 0.16873778, 0.1731356 , 0.15981054, 0.15953081, 0.17398574, 0.17258451, 0.17598569, 0.16465887, 0.17446053, 0.13876987, 0.16702164, 0.18797323, 0.15064069, 0.17067385, 0.17244879, 0.13928198, 0.16346391, 0.18397256, 0.18568486, 0.18871859, 0.1804116 , 0.16400864, 0.15943395, 0.16632276, 0.17061527, 0.18057733, 0.17157088, 0.1635388 , 0.1588517 , 0.20205154, 0.17604352, 0.14225616, 0.13985979, 0.15100566, 0.15188 , 0.16080549, 0.20309118, 0.19244664, 0.16293896, 0.15537597, 0.14626345, 0.17022956, 0.18261636, 0.17942167, 0.17431904, 0.14521316, 0.1663386 , 0.16542766, 0.19448551, 0.18572249, 0.1653653 , 0.14639544, 0.15099438, 0.17556209, 0.16852706, 0.1719489 , 0.17247756, 0.15936575, 0.16994439, 0.17545043, 0.17752014, 0.18563946, 0.19237074, 0.17882417, 0.1778929 , 0.12045619, 0.07140599, 0.20074209, 0.17459829, 0.11976406, 0.21110232, 0.12880444, 0.10420644, 0.15247391, 0.15697276, 0.21193017, 0.14725273, 0.17320624, 0.19506753, 0.17539197, 0.15001762, 0.16083143, 0.17959224, 0.1415976 , 0.15742811, 0.20271294, 0.16581939, 0.17720555, 0.15955643, 0.1487012 , 0.15165633, 0.13070567, 0.172528 , 0.19210238, 0.18560423, 0.16678886, 0.18539161, 0.18971351, 0.19536676, 0.14363159, 0.13098983, 0.16664983, 0.14535142, 0.15745466, 0.19991597, 0.17942568, 0.15175086, 0.15889733, 0.16164316, 0.16464185, 0.1480495 , 0.17522556, 0.20085826, 0.14102674, 0.18947149, 0.17171226, 0.10198957, 0.16606492, 0.21912074, 0.16239543, 0.14603957, 0.17041403, 0.18964808, 0.1902954 , 0.20268902, 0.12760449, 0.12630954, 0.15566774, 0.12359586, 0.16186551, 0.1942139 , 0.17708252, 0.19659289, 0.14613178, 0.15557385, 0.17597748, 0.15817872, 0.17623212, 0.18785079, 0.17045474, 0.14378938, 0.17200213, 0.19065048, 0.17862203, 0.18883183, 0.17775083, 0.15466109, 0.17126419, 0.1582525 , 0.1692128 , 0.17515346, 0.18632771, 0.18868285, 0.18903063, 0.14350925, 0.13653112, 0.18037886, 0.1704386 , 0.15197 , 0.16331233, 0.18516054, 0.15446659, 0.17385424, 0.1824608 , 0.15719475, 0.15794858, 0.1675651 , 0.17413515, 0.18333956, 0.15955835, 0.16448492, 0.13434514, 0.1714557 , 0.20562819, 0.14524184, 0.14775105, 0.17944519, 0.13930323, 0.1589058 , 0.18089702, 0.16790272, 0.18130923, 0.19010938, 0.17093998, 0.16493286, 0.1616061 , 0.16070747, 0.19104145, 0.1677181 , 0.17148525, 0.15273654, 0.1693474 , 0.18458853, 0.1514887 , 0.16379162, 0.1711832 , 0.1494963 , 0.15369807, 0.176016 , 0.18770166, 0.19434745, 0.17849891, 0.16640218, 0.16838233, 0.16935242, 0.18324291, 0.17970368, 0.18651227, 0.1660176 , 0.17194037, 0.17286607, 0.15804757, 0.15688771, 0.18808021, 0.17347543, 0.13777878, 0.1739309 , 0.18251431, 0.19563886, 0.15242018, 0.1564431 , 0.19382495, 0.15894914, 0.15679604, 0.18238104, 0.16721841, 0.13976083, 0.18681281, 0.2026636 , 0.1576839 , 0.12280629, 0.14468015, 0.20554201, 0.15639431, 0.14328271, 0.18410435, 0.19104338, 0.2008204 , 0.1317299 , 0.15190923, 0.19194582, 0.11405117, 0.11955249, 0.1834425 , 0.17662792])
# predict para ello uso la función predict ols_model_2.predict(X2) # acceso a los parámetros print(ols_model_2.params) # R2 y R2 ajustado print(ols_model_2.rsquared) print(ols_model_2.rsquared_adj)
const 0.169673 GPCP_g -0.097672 GPCP_g_l -0.089139 dtype: float64 0.003232131465761179 0.0005381642535065012
control_formula_2 = "war_prio ~ GPCP_g + GPCP_g_l" ols_model_2 = smf.ols(control_formula_2, data=repdata).fit() print(ols_model_2.summary())
OLS Regression Results ============================================================================== Dep. Variable: war_prio R-squared: 0.003 Model: OLS Adj. R-squared: 0.001 Method: Least Squares F-statistic: 1.200 Date: Sun, 11 Dec 2022 Prob (F-statistic): 0.302 Time: 04:33:17 Log-Likelihood: -320.10 No. Observations: 743 AIC: 646.2 Df Residuals: 740 BIC: 660.0 Df Model: 2 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 0.1697 0.014 12.292 0.000 0.143 0.197 GPCP_g -0.0977 0.072 -1.363 0.173 -0.238 0.043 GPCP_g_l -0.0891 0.073 -1.228 0.220 -0.232 0.053 ============================================================================== Omnibus: 216.896 Durbin-Watson: 0.482 Prob(Omnibus): 0.000 Jarque-Bera (JB): 434.329 Skew: 1.777 Prob(JB): 4.86e-95 Kurtosis: 4.181 Cond. No. 6.26 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
ols_model_skl_2 = linear_model.LinearRegression().fit(X2, y2) ols_model_skl_2.coef_ # Acceso a coeficientes ols_model_skl_2.predict(X2) # Predicción en formato array ols_model_skl_2.score(X2,y2) # R cuadrado
0.003232131465761179
dir(ols_model_skl_2)
['__abstractmethods__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_abc_impl', '_check_feature_names', '_check_n_features', '_decision_function', '_estimator_type', '_get_param_names', '_get_tags', '_more_tags', '_preprocess_data', '_repr_html_', '_repr_html_inner', '_repr_mimebundle_', '_residues', '_set_intercept', '_validate_data', 'coef_', 'copy_X', 'feature_names_in_', 'fit', 'fit_intercept', 'get_params', 'intercept_', 'n_features_in_', 'n_jobs', 'normalize', 'positive', 'predict', 'rank_', 'score', 'set_params', 'singular_']
mean_squared_error(y2, ols_model_2.predict())**0.5 # Errores cuadrados medios
0.3722754239552763
formula_model_2 = "war_prio ~ GPCP_g + GPCP_g_l + C(ccode)" + ' + ' + ' + '.join(country_trend) ols_model_2 = smf.ols(formula_model_2, data=repdata).fit(cov_type='cluster', cov_kwds={'groups': repdata['ccode']}) print(ols_model_2.summary())
OLS Regression Results ============================================================================== Dep. Variable: war_prio R-squared: 0.699 Model: OLS Adj. R-squared: 0.661 Method: Least Squares F-statistic: 335.5 Date: Sun, 11 Dec 2022 Prob (F-statistic): 1.01e-25 Time: 04:33:18 Log-Likelihood: 124.86 No. Observations: 743 AIC: -81.72 Df Residuals: 659 BIC: 305.6 Df Model: 83 Covariance Type: cluster ===================================================================================== coef std err z P>|z| [0.025 0.975] ------------------------------------------------------------------------------------- Intercept -0.1101 0.002 -45.002 0.000 -0.115 -0.105 C(ccode)[T.420.0] 0.1149 0.001 199.754 0.000 0.114 0.116 C(ccode)[T.432.0] 0.1033 0.005 19.235 0.000 0.093 0.114 C(ccode)[T.433.0] -0.0023 0.001 -2.213 0.027 -0.004 -0.000 C(ccode)[T.434.0] 0.1096 0.003 40.431 0.000 0.104 0.115 C(ccode)[T.435.0] 0.1019 0.006 16.920 0.000 0.090 0.114 C(ccode)[T.436.0] 0.0952 0.009 10.702 0.000 0.078 0.113 C(ccode)[T.437.0] 0.1097 0.003 41.518 0.000 0.104 0.115 C(ccode)[T.438.0] -0.0040 0.002 -2.267 0.023 -0.008 -0.001 C(ccode)[T.439.0] 0.1620 0.003 58.445 0.000 0.157 0.167 C(ccode)[T.450.0] -0.0897 0.002 -36.815 0.000 -0.094 -0.085 C(ccode)[T.451.0] -0.1389 0.001 -129.586 0.000 -0.141 -0.137 C(ccode)[T.452.0] 0.1090 0.003 36.791 0.000 0.103 0.115 C(ccode)[T.461.0] 0.1085 0.003 34.498 0.000 0.102 0.115 C(ccode)[T.471.0] 0.1095 0.003 39.909 0.000 0.104 0.115 C(ccode)[T.475.0] 0.1073 0.004 29.411 0.000 0.100 0.114 C(ccode)[T.481.0] 0.1145 0.001 117.993 0.000 0.113 0.116 C(ccode)[T.482.0] 0.1105 0.002 48.065 0.000 0.106 0.115 C(ccode)[T.483.0] 1.2499 0.006 216.523 0.000 1.239 1.261 C(ccode)[T.484.0] -0.2316 0.001 -272.071 0.000 -0.233 -0.230 C(ccode)[T.490.0] 1.3097 0.001 2311.875 0.000 1.309 1.311 C(ccode)[T.500.0] 1.3184 0.001 1150.994 0.000 1.316 1.321 C(ccode)[T.501.0] 0.1104 0.003 38.947 0.000 0.105 0.116 C(ccode)[T.510.0] 0.1097 0.003 41.535 0.000 0.105 0.115 C(ccode)[T.516.0] -0.0009 0.000 -2.207 0.027 -0.002 -0.000 C(ccode)[T.517.0] -0.1895 0.000 -603.755 0.000 -0.190 -0.189 C(ccode)[T.520.0] -0.4516 0.015 -30.400 0.000 -0.481 -0.423 C(ccode)[T.522.0] 0.1279 0.006 23.040 0.000 0.117 0.139 C(ccode)[T.530.0] 1.6181 0.001 1209.520 0.000 1.615 1.621 C(ccode)[T.540.0] 1.2389 0.001 890.450 0.000 1.236 1.242 C(ccode)[T.541.0] 1.6248 0.003 537.619 0.000 1.619 1.631 C(ccode)[T.551.0] 0.1087 0.003 35.587 0.000 0.103 0.115 C(ccode)[T.552.0] -0.1417 0.002 -67.737 0.000 -0.146 -0.138 C(ccode)[T.553.0] 0.1072 0.004 28.201 0.000 0.100 0.115 C(ccode)[T.560.0] 1.6180 0.001 1190.509 0.000 1.615 1.621 C(ccode)[T.565.0] -1.6018 0.019 -83.329 0.000 -1.639 -1.564 C(ccode)[T.570.0] 0.1155 0.001 121.781 0.000 0.114 0.117 C(ccode)[T.571.0] 0.1122 0.002 72.342 0.000 0.109 0.115 C(ccode)[T.572.0] 0.1146 0.001 215.878 0.000 0.114 0.116 C(ccode)[T.580.0] 0.1135 0.001 107.970 0.000 0.111 0.116 C(ccode)[T.625.0] 0.6990 0.002 346.178 0.000 0.695 0.703 GPCP_g -0.0625 0.030 -2.088 0.037 -0.121 -0.004 GPCP_g_l -0.0687 0.032 -2.174 0.030 -0.131 -0.007 ccode_404_time 0.0138 0.000 136.685 0.000 0.014 0.014 ccode_420_time -0.0002 7.58e-05 -2.310 0.021 -0.000 -2.66e-05 ccode_432_time 0.0008 0.000 2.309 0.021 0.000 0.001 ccode_433_time 0.0139 3.9e-05 357.734 0.000 0.014 0.014 ccode_434_time 0.0002 9.92e-05 2.229 0.026 2.67e-05 0.000 ccode_435_time 0.0009 0.000 2.294 0.022 0.000 0.002 ccode_436_time 0.0017 0.001 2.316 0.021 0.000 0.003 ccode_437_time -7.637e-06 3.43e-06 -2.227 0.026 -1.44e-05 -9.15e-07 ccode_438_time 0.0139 7.64e-05 181.539 0.000 0.014 0.014 ccode_439_time 0.0003 0.000 2.314 0.021 3.88e-05 0.000 ccode_450_time 0.0361 0.000 315.758 0.000 0.036 0.036 ccode_451_time 0.0294 0.000 139.810 0.000 0.029 0.030 ccode_452_time 0.0001 8.65e-05 1.619 0.105 -2.95e-05 0.000 ccode_461_time 0.0002 8.42e-05 2.132 0.033 1.45e-05 0.000 ccode_471_time 8.159e-05 5.6e-05 1.457 0.145 -2.82e-05 0.000 ccode_475_time 0.0003 0.000 2.270 0.023 3.68e-05 0.001 ccode_481_time -0.0002 0.000 -1.828 0.068 -0.000 1.41e-05 ccode_482_time 7.448e-05 3.41e-05 2.186 0.029 7.7e-06 0.000 ccode_483_time -0.0465 0.000 -116.348 0.000 -0.047 -0.046 ccode_484_time 0.0418 0.000 275.958 0.000 0.041 0.042 ccode_490_time -0.0512 0.000 -178.531 0.000 -0.052 -0.051 ccode_500_time -0.0479 0.000 -192.114 0.000 -0.048 -0.047 ccode_501_time 0.0002 0.000 1.443 0.149 -8.18e-05 0.001 ccode_510_time -4.393e-05 1.93e-05 -2.281 0.023 -8.17e-05 -6.18e-06 ccode_516_time 0.0136 0.000 77.533 0.000 0.013 0.014 ccode_517_time 0.0469 0.000 242.507 0.000 0.047 0.047 ccode_520_time 0.1060 0.001 73.516 0.000 0.103 0.109 ccode_522_time -0.0022 0.001 -2.253 0.024 -0.004 -0.000 ccode_530_time -0.0773 4.27e-05 -1808.402 0.000 -0.077 -0.077 ccode_540_time -0.0195 8.62e-05 -225.947 0.000 -0.020 -0.019 ccode_541_time -0.0734 0.000 -497.796 0.000 -0.074 -0.073 ccode_551_time 8.487e-05 3.84e-05 2.213 0.027 9.7e-06 0.000 ccode_552_time 0.0301 0.000 231.268 0.000 0.030 0.030 ccode_553_time 0.0004 0.000 1.989 0.047 6.07e-06 0.001 ccode_560_time -0.0685 3.55e-05 -1928.554 0.000 -0.069 -0.068 ccode_565_time 0.1148 0.001 134.450 0.000 0.113 0.116 ccode_570_time -0.0001 8.9e-05 -1.419 0.156 -0.000 4.81e-05 ccode_571_time 0.0002 0.000 2.298 0.022 3.41e-05 0.000 ccode_572_time -4.777e-05 3.14e-05 -1.524 0.128 -0.000 1.37e-05 ccode_580_time -0.0003 0.000 -2.229 0.026 -0.000 -3.09e-05 ccode_625_time 0.0165 1.97e-05 840.404 0.000 0.016 0.017 ============================================================================== Omnibus: 138.515 Durbin-Watson: 1.311 Prob(Omnibus): 0.000 Jarque-Bera (JB): 1043.072 Skew: 0.609 Prob(JB): 3.16e-227 Kurtosis: 8.675 Cond. No. 226. ============================================================================== Notes: [1] Standard Errors are robust to cluster correlation (cluster)
rmse_ols_2 = round(mean_squared_error(y2, ols_model_2.predict())**0.5, 2) print(rmse_ols_2)
0.2

TABLE 3: Rainfall and Civil Conflict (Reduced-Form)

# Lista de explicativa a mostrarse en la tabla explicativas = ['GPCP_g','GPCP_g_l'] # Etiquetas a las variables etiquetas = ['Growth in rainfall, t','Growth in rainfall, t-1','Country fixed effects', 'Country-specific time trends','R^2', 'Root mean square error', 'Observations'] # Unimos las explicativas y las etiquetas labels = dict(zip(explicativas,etiquetas)) labels
{'GPCP_g': 'Growth in rainfall, t', 'GPCP_g_l': 'Growth in rainfall, t-1'}
pystout(models= [ols_model_1, ols_model_2], file='regression_table.tex', digits=3, endog_names= ['(1)','(2)'], exogvars= explicativas , # Seleccionamos las variables varlabels= labels, # Etiquetas a las variables mgroups= {'Ordinary Least Squares':[1,5]}, # Título a las regresiones modstat= {'nobs':'Observations','rsquared':'R\sym{2}'}, # Estadísticos addrows= {'Country fixed effects': ['yes','yes'], 'Country-specific time trends': ['yes','yes'], 'Root mean square error': [rmse_ols_1, rmse_ols_2]}, # Añadimos filas addnotes= ['Note.—Huber robust standard errors are in parentheses.', 'Regression disturbance terms are clustered at the country level.', 'A country-specific year time trend is included in all specifications (coefficient estimates not reported).', '* Significantly different from zero at 90 percent confidence.', '** Significantly different from zero at 95 percent confidence.', '*** Significantly different from zero at 99 percent confidence.'], title= 'TABLE 3 - Rainfall and Civil Conflict (Reduced-Form)', stars= {.1:'*',.05:'**',.01:'***'} ) # Las tables en latex se guardan en el archivo regression_table # endog_names: nombre de las variables endógenas. En este caso solo son numerales # exogvars: selecciona las variables explicativas

Gráfico Coeft plot

model1 = smf.ols(formula_model_1, data=repdata).fit(cov_type = 'HC1').summary2().tables[1] # Extraemos el coeficiente de la variable explicativa GPCP_g model1_coef = model1.iloc[41,0] # Fila 41 y columna 0 model1_coef_se = model1.iloc[41,1] # Fila 41 y columna 1 # Extraemos los intervalos de confianza model1_lower = model1.iloc[41,4] # Límite inferior model1_upper = model1.iloc[41,5] # Límite superior
# Gráficamos el modelo 2 donde war_prio es la variable endógena model2 = smf.ols(formula_model_2, data=repdata).fit(cov_type = 'HC1').summary2().tables[1] # Extraemos el coeficiente de la variable explicativa GPCP_g model2_coef = model2.iloc[41,0] # Fila 41 y columna 0 model2_coef_se = model2.iloc[41,1] # Fila 41 y columna 1 # Extraemos los intervalos de confianza model2_lower = model2.iloc[41,4] # Límite inferior model2_upper = model2.iloc[41,5] # Límite superior
# Creamos una tabla de ceros de 2 filas y 4 columnas table = np.zeros( ( 2, 4 ) ) # Agregamos los valores extraídos previamente en la tabla nueva table[0,0] = model1_coef table[0,1] = model1_coef_se table[0,2] = model1_lower table[0,3] = model1_upper table[1,0] = model2_coef table[1,1] = model2_coef_se table[1,2] = model2_lower table[1,3] = model2_upper # Convertimos la tabla a un dataframe y nombramos las columnas creadas anteriormente table_pandas = pd.DataFrame(table, columns = [ "Estimate", "Std. Error", "Lower_bound" , "Upper_bound"]) table_pandas.index = ["OLS 1","OLS 2"] # Reseteamos el index y pedimos con inplace=True que los datos se guarden table_pandas.reset_index(inplace = True) # Renombramos el nombre del index como Model y pedimos con inplace=True que los datos se guarden table_pandas.rename(columns = {"index" : "Model"}, inplace = True) # Mostramos la tabla creada table_pandas.round(8)
# Creamos la gráfica Coeft plot fig, ax = plt.subplots(figsize=(6, 5)) # Tamaño de la figura ax.scatter(x= table_pandas['Model'], marker= 'o', s= 40, # s: modificar tamaño del point (punto rojo) y= table_pandas['Estimate'], color = "red") eb1 = plt.errorbar(x=table_pandas['Model'], y=table_pandas['Estimate'], yerr = 0.4*(table_pandas['Upper_bound']-table_pandas['Lower_bound']), color = 'blue', ls='', capsize = 5) # ls='': no une los puntos rojos # yerr genera el gráfico del intervalo de confianza, va decir cuanto hacia arriba y abajo # capsize es el tamaño de las rayitas al final del intervalo de confianza plt.axhline(y=0, color = 'black').set_linestyle('--') # Línea horizontal plt.title('Tasa de variación de la lluvia en el período t (95% CI)',fontsize=12) # Título del gráfico plt.savefig("Imagen_1.jpg", bbox_inches='tight') # Guardamos la imagen para copiarlo en el informe
Image in a Jupyter notebook